Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,4 @@ tests/evals/js/eval-bun/test-data.txt
__pycache__

bt-sync
*.env
15 changes: 15 additions & 0 deletions scripts/eval-runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,21 @@ def load_evaluators(files: list[str]) -> tuple[list[EvaluatorInstance], dict[str
cwd = os.getcwd()
if cwd not in sys.path:
sys.path.insert(0, cwd)

# Add the project root inferred from input files to sys.path so that
# sibling-package imports work when files live outside CWD (e.g.
# sandbox bundles extracted to a temp directory). Walk up from each
# file's directory looking for a register.py (bundle marker) or the
# filesystem root, whichever comes first.
for f in files:
d = os.path.dirname(os.path.abspath(f))
while d and d != os.path.dirname(d):
if os.path.isfile(os.path.join(d, "register.py")):
if d not in sys.path:
sys.path.insert(0, d)
break
d = os.path.dirname(d)

unique_files: set[str] = set()
for file_path in files:
for candidate in collect_files(file_path):
Expand Down
258 changes: 5 additions & 253 deletions scripts/eval-runner.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import { createRequire } from "node:module";
import path from "node:path";
import { fileURLToPath, pathToFileURL } from "node:url";
import {
isObject,
loadParameterSerializationHelpers,
serializeEvaluatorParameters,
} from "./evaluator-params";

type EvaluatorEntry = {
evaluator: {
Expand Down Expand Up @@ -142,17 +147,8 @@ type EvalRunner = {
getState: (() => unknown) | null;
};

type ParameterContainerSerializer = (parameters: unknown) => unknown;
type PromptDefinitionSerializer = (prompt: unknown) => unknown;
type ZodSchemaSerializer = (schema: unknown) => Record<string, unknown>;
type ParseParentFunction = (parent: unknown) => string | undefined;

type ParameterSerializationHelpers = {
sdkSerializeParameters: ParameterContainerSerializer | null;
promptDefinitionToPromptData: PromptDefinitionSerializer | null;
zodToJsonSchema: ZodSchemaSerializer | null;
};

declare global {
// eslint-disable-next-line no-var
var _evals: GlobalEvals | undefined;
Expand All @@ -162,10 +158,6 @@ declare global {
var __inherited_braintrust_state: unknown;
}

function isObject(value: unknown): value is Record<string, unknown> {
return typeof value === "object" && value !== null;
}

function isBraintrustModule(value: unknown): value is BraintrustModule {
return isObject(value) && ("Eval" in value || "login" in value);
}
Expand Down Expand Up @@ -825,163 +817,6 @@ async function loadBraintrust() {
return normalizeBraintrustModule(mod);
}

function extractParameterSerializer(
mod: unknown,
): ParameterContainerSerializer | null {
if (!isObject(mod)) {
return null;
}
const candidate = Reflect.get(mod, "serializeRemoteEvalParametersContainer");
if (typeof candidate === "function") {
return candidate as ParameterContainerSerializer;
}
const defaultExport = Reflect.get(mod, "default");
if (isObject(defaultExport)) {
const fromDefault = Reflect.get(
defaultExport,
"serializeRemoteEvalParametersContainer",
);
if (typeof fromDefault === "function") {
return fromDefault as ParameterContainerSerializer;
}
}
return null;
}

function extractPromptDefinitionSerializer(
mod: unknown,
): PromptDefinitionSerializer | null {
if (!isObject(mod)) {
return null;
}
const candidate = Reflect.get(mod, "promptDefinitionToPromptData");
if (typeof candidate === "function") {
return candidate as PromptDefinitionSerializer;
}
const defaultExport = Reflect.get(mod, "default");
if (isObject(defaultExport)) {
const fromDefault = Reflect.get(
defaultExport,
"promptDefinitionToPromptData",
);
if (typeof fromDefault === "function") {
return fromDefault as PromptDefinitionSerializer;
}
}
return null;
}

function isZodV4Schema(schema: unknown): boolean {
return (
isObject(schema) &&
"_zod" in schema &&
Reflect.get(schema, "_zod") !== undefined
);
}

function normalizeJsonSchema(value: unknown): Record<string, unknown> {
if (isObject(value)) {
return value;
}
return {};
}

function loadZodSchemaSerializer(
braintrustResolvedPath: string,
): ZodSchemaSerializer | null {
const requireFromBraintrust = createRequire(
pathToFileURL(braintrustResolvedPath).href,
);

let zodToJsonSchemaV3: ((schema: unknown) => unknown) | null = null;
try {
const zodToJsonSchemaModule: unknown =
requireFromBraintrust("zod-to-json-schema");
if (typeof zodToJsonSchemaModule === "function") {
zodToJsonSchemaV3 = zodToJsonSchemaModule as (schema: unknown) => unknown;
} else if (isObject(zodToJsonSchemaModule)) {
const direct = Reflect.get(zodToJsonSchemaModule, "zodToJsonSchema");
if (typeof direct === "function") {
zodToJsonSchemaV3 = direct as (schema: unknown) => unknown;
} else {
const nestedDefault = Reflect.get(zodToJsonSchemaModule, "default");
if (typeof nestedDefault === "function") {
zodToJsonSchemaV3 = nestedDefault as (schema: unknown) => unknown;
} else if (isObject(nestedDefault)) {
const fromDefault = Reflect.get(nestedDefault, "zodToJsonSchema");
if (typeof fromDefault === "function") {
zodToJsonSchemaV3 = fromDefault as (schema: unknown) => unknown;
}
}
}
}
} catch {
zodToJsonSchemaV3 = null;
}

let zodToJsonSchemaV4:
| ((schema: unknown, options?: { target?: string }) => unknown)
| null = null;
try {
const zodV4Module: unknown = requireFromBraintrust("zod/v4");
if (isObject(zodV4Module)) {
const direct = Reflect.get(zodV4Module, "toJSONSchema");
if (typeof direct === "function") {
zodToJsonSchemaV4 = direct as (
schema: unknown,
options?: { target?: string },
) => unknown;
}
}
} catch {
zodToJsonSchemaV4 = null;
}

if (!zodToJsonSchemaV3 && !zodToJsonSchemaV4) {
return null;
}

return (schema: unknown): Record<string, unknown> => {
try {
if (isZodV4Schema(schema) && zodToJsonSchemaV4) {
return normalizeJsonSchema(
zodToJsonSchemaV4(schema, { target: "draft-7" }),
);
}
if (zodToJsonSchemaV3) {
return normalizeJsonSchema(zodToJsonSchemaV3(schema));
}
if (zodToJsonSchemaV4) {
return normalizeJsonSchema(
zodToJsonSchemaV4(schema, { target: "draft-7" }),
);
}
return {};
} catch {
return {};
}
};
}

async function loadParameterSerializationHelpers(): Promise<ParameterSerializationHelpers> {
const braintrustPath = resolveBraintrustPath();
const zodToJsonSchema = loadZodSchemaSerializer(braintrustPath);
try {
const mod: unknown = await import(pathToFileURL(braintrustPath).href);
return {
sdkSerializeParameters: extractParameterSerializer(mod),
promptDefinitionToPromptData: extractPromptDefinitionSerializer(mod),
zodToJsonSchema,
};
} catch {
return {
sdkSerializeParameters: null,
promptDefinitionToPromptData: null,
zodToJsonSchema,
};
}
}

function extractParseParent(mod: unknown): ParseParentFunction | null {
if (!isObject(mod)) {
return null;
Expand Down Expand Up @@ -1294,89 +1129,6 @@ function extractScoreName(score: unknown, idx: number): string {
return `scorer_${idx}`;
}

async function serializeEvaluatorParameters(
raw: unknown,
helpers?: ParameterSerializationHelpers,
): Promise<unknown | undefined> {
if (raw === undefined || raw === null) {
return undefined;
}

const resolved = raw instanceof Promise ? await raw : raw;
if (!isObject(resolved)) {
return undefined;
}

if (helpers?.sdkSerializeParameters) {
try {
return helpers.sdkSerializeParameters(resolved);
} catch {
// Fallback to legacy serialization below when SDK internals are unavailable.
}
}

const marker = Reflect.get(resolved, "__braintrust_parameters_marker");
if (marker === true) {
const schema = Reflect.get(resolved, "schema");
const source = {
parametersId: Reflect.get(resolved, "id"),
slug: Reflect.get(resolved, "slug"),
name: Reflect.get(resolved, "name"),
projectId: Reflect.get(resolved, "projectId"),
version: Reflect.get(resolved, "version"),
};
return {
type: "braintrust.parameters",
schema,
source,
};
}

const schema: Record<string, unknown> = {};
for (const [name, value] of Object.entries(resolved)) {
if (isObject(value) && value.type === "prompt") {
let promptDefault = value.default;
if (
promptDefault !== undefined &&
helpers?.promptDefinitionToPromptData
) {
try {
promptDefault = helpers.promptDefinitionToPromptData(promptDefault);
} catch {
// Keep raw prompt default when conversion utility is unavailable.
}
}
schema[name] = {
type: "prompt",
...(promptDefault !== undefined ? { default: promptDefault } : {}),
...(typeof value.description === "string"
? { description: value.description }
: {}),
};
} else {
const jsonSchema = helpers?.zodToJsonSchema
? helpers.zodToJsonSchema(value)
: {};
schema[name] = {
type: "data",
schema: jsonSchema,
...(Object.prototype.hasOwnProperty.call(jsonSchema, "default")
? { default: Reflect.get(jsonSchema, "default") }
: {}),
...(typeof Reflect.get(jsonSchema, "description") === "string"
? { description: Reflect.get(jsonSchema, "description") as string }
: {}),
};
}
}

return {
type: "braintrust.staticParameters",
schema,
source: null,
};
}

async function buildEvaluatorDefinitions(evaluators: EvaluatorEntry[]) {
const serializationHelpers = await loadParameterSerializationHelpers();
const result: Record<
Expand Down
Loading
Loading