From 29834ad23b369809c95772f2252dedfda9a192e5 Mon Sep 17 00:00:00 2001 From: max-braintrust Date: Thu, 19 Mar 2026 15:25:49 -0700 Subject: [PATCH 1/8] Update sdk --- js/src/cli/index.ts | 146 ++++++++++++++++++++++++++++ js/src/exports.ts | 1 + js/src/logger.ts | 232 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 370 insertions(+), 9 deletions(-) diff --git a/js/src/cli/index.ts b/js/src/cli/index.ts index 48a38be3f..d67532b2f 100755 --- a/js/src/cli/index.ts +++ b/js/src/cli/index.ts @@ -14,11 +14,13 @@ import pluralize from "pluralize"; import { login, init as _initExperiment, + initDataset, Experiment, BaseMetadata, Dataset, type ParametersRef, RemoteEvalParameters, + _internalGetGlobalState, } from "../logger"; import type { ProgressReporter } from "../reporters/types"; import { @@ -1032,6 +1034,85 @@ function addCompileArgs(parser: ArgumentParser) { }); } +interface DatasetCommandArgs { + api_key?: string; + org_name?: string; + app_url?: string; + project: string; + dataset: string; + debug_logging?: "error" | "warn" | "info" | "debug"; +} + +async function openDataset(args: DatasetCommandArgs) { + await login({ + apiKey: args.api_key, + orgName: args.org_name, + appUrl: args.app_url, + debugLogLevel: args.debug_logging, + }); + return initDataset({ + project: args.project, + dataset: args.dataset, + }); +} + +async function datasetSnapshotCommand( + args: DatasetCommandArgs & { name: string; description?: string }, +) { + const dataset = await openDataset(args); + const snapshot = await dataset.createSnapshot({ + name: args.name, + description: args.description, + }); + console.log( + JSON.stringify( + { + id: snapshot.id, + name: snapshot.name, + xact_id: snapshot.xact_id, + created_at: snapshot.created_at, + }, + null, + 2, + ), + ); +} + +async function datasetSnapshotsCommand(args: DatasetCommandArgs) { + const dataset = await openDataset(args); + const snapshots = await dataset.listSnapshots(); + if (snapshots.length === 0) { + console.log("No snapshots."); + return; + } + for (const snap of snapshots) { + console.log( + `${snap.name}\txact_id=${snap.xact_id}\tid=${snap.id}\tcreated=${snap.created_at}`, + ); + } +} + +async function datasetTagEnvCommand( + args: DatasetCommandArgs & { env: string; version?: string }, +) { + const dataset = await openDataset(args); + const datasetId = await dataset.id; + const objectVersion = + args.version ?? (await dataset.version()); + if (!objectVersion) { + console.error("Dataset has no records — nothing to tag."); + process.exit(1); + } + const state = _internalGetGlobalState(); + await state.apiConn().put_json( + `environment-object/dataset/${datasetId}/${encodeURIComponent(args.env)}`, + { object_version: objectVersion }, + ); + console.log( + `Tagged version ${objectVersion} with environment "${args.env}".`, + ); +} + async function main() { const parser = new ArgumentParser({ description: "Braintrust CLI", @@ -1157,6 +1238,71 @@ async function main() { }); parser_pull.set_defaults({ func: pullCommand }); + // -- dataset-snapshot: create a named snapshot -- + const parser_dataset_snapshot = subparser.add_parser("dataset-snapshot", { + help: "Create a named snapshot of a dataset's current version.", + parents: [parentParser], + }); + addAuthArgs(parser_dataset_snapshot); + addDebugLoggingArg(parser_dataset_snapshot); + parser_dataset_snapshot.add_argument("--project", { + help: "The project containing the dataset.", + required: true, + }); + parser_dataset_snapshot.add_argument("--dataset", { + help: "The name of the dataset.", + required: true, + }); + parser_dataset_snapshot.add_argument("--name", { + help: "A name for the snapshot.", + required: true, + }); + parser_dataset_snapshot.add_argument("--description", { + help: "An optional description for the snapshot.", + }); + parser_dataset_snapshot.set_defaults({ func: datasetSnapshotCommand }); + + // -- dataset-snapshots: list snapshots -- + const parser_dataset_snapshots = subparser.add_parser("dataset-snapshots", { + help: "List all named snapshots for a dataset.", + parents: [parentParser], + }); + addAuthArgs(parser_dataset_snapshots); + addDebugLoggingArg(parser_dataset_snapshots); + parser_dataset_snapshots.add_argument("--project", { + help: "The project containing the dataset.", + required: true, + }); + parser_dataset_snapshots.add_argument("--dataset", { + help: "The name of the dataset.", + required: true, + }); + parser_dataset_snapshots.set_defaults({ func: datasetSnapshotsCommand }); + + // -- dataset-tag-env: tag a version with an environment -- + const parser_dataset_tag_env = subparser.add_parser("dataset-tag-env", { + help: "Tag a dataset version with an environment (e.g. staging, production).", + parents: [parentParser], + }); + addAuthArgs(parser_dataset_tag_env); + addDebugLoggingArg(parser_dataset_tag_env); + parser_dataset_tag_env.add_argument("--project", { + help: "The project containing the dataset.", + required: true, + }); + parser_dataset_tag_env.add_argument("--dataset", { + help: "The name of the dataset.", + required: true, + }); + parser_dataset_tag_env.add_argument("--env", { + help: "The environment slug to tag (e.g. staging, production).", + required: true, + }); + parser_dataset_tag_env.add_argument("--version", { + help: "The version (xact_id) to tag. If omitted, uses the dataset's current version.", + }); + parser_dataset_tag_env.set_defaults({ func: datasetTagEnvCommand }); + const parsed = normalizeDebugLoggingArgs(parser.parse_args()); try { diff --git a/js/src/exports.ts b/js/src/exports.ts index 6eb2b5c1f..bfede3e19 100644 --- a/js/src/exports.ts +++ b/js/src/exports.ts @@ -8,6 +8,7 @@ export type { CompiledPromptParams, CompletionPrompt, ContextParentSpanIds, + DatasetSnapshot, DataSummary, DatasetSummary, DefaultMetadataType, diff --git a/js/src/logger.ts b/js/src/logger.ts index 0560c52e7..c475da0ed 100644 --- a/js/src/logger.ts +++ b/js/src/logger.ts @@ -87,6 +87,16 @@ const BRAINTRUST_PARAMS = Object.keys(braintrustModelParamsSchema.shape); // 6 MB for the AWS lambda gateway (from our own testing). export const DEFAULT_MAX_REQUEST_SIZE = 6 * 1024 * 1024; +const datasetSnapshotResponseSchema = z.object({ + id: z.string().uuid(), + dataset_id: z.string().uuid(), + name: z.string(), + description: z.string().nullish(), + xact_id: z.string(), + created_at: z.string(), +}); +export type DatasetSnapshot = z.infer; + const parametersRowSchema = z.object({ id: z.string().uuid(), _xact_id: z.string(), @@ -1254,6 +1264,50 @@ class HTTPConnection { return await resp.json(); } + async put_json( + path: string, + args: Record | undefined = undefined, + ) { + const this_fetch = this.fetch; + const this_base_url = this.base_url; + const this_headers = this.headers; + const resp = await checkResponse( + await this_fetch(_urljoin(this_base_url, path), { + method: "PUT", + headers: { + Accept: "application/json", + "Content-Type": "application/json", + ...this_headers, + }, + body: args ? JSON.stringify(args) : undefined, + keepalive: true, + }), + ); + return await resp.json(); + } + + async delete_json( + path: string, + args: Record | undefined = undefined, + ) { + const this_fetch = this.fetch; + const this_base_url = this.base_url; + const this_headers = this.headers; + const resp = await checkResponse( + await this_fetch(_urljoin(this_base_url, path), { + method: "DELETE", + headers: { + Accept: "application/json", + "Content-Type": "application/json", + ...this_headers, + }, + body: args ? JSON.stringify(args) : undefined, + keepalive: true, + }), + ); + return await resp.json(); + } + // Custom inspect for Node.js console.log [Symbol.for("nodejs.util.inspect.custom")](): string { return `HTTPConnection { @@ -3682,7 +3736,7 @@ export function init( * Alias for init(options). */ export function initExperiment( - options: Readonly>, + options: Readonly>, ): InitializedExperiment; /** @@ -3699,7 +3753,7 @@ export function initExperiment( * `initExperiment(project, options)`. */ export function initExperiment( - projectOrOptions: string | Readonly>, + projectOrOptions: string | Readonly>, optionalOptions?: Readonly>, ): InitializedExperiment { const options = ((): Readonly> => { @@ -3759,6 +3813,7 @@ export type InitDatasetOptions = dataset?: string; description?: string; version?: string; + environment?: string; projectId?: string; metadata?: Record; state?: BraintrustState; @@ -3769,6 +3824,64 @@ export type FullInitDatasetOptions = { project?: string; } & InitDatasetOptions; +async function resolveDatasetVersion({ + state, + lazyMetadata, + version, +}: { + state: BraintrustState; + lazyMetadata: LazyValue; + version: string; +}): Promise { + // If it looks like a numeric xact_id, use it directly. + if (/^\d+$/.test(version)) { + return version; + } + // Otherwise, treat it as a snapshot name and resolve via API. + const metadata = await lazyMetadata.get(); + const datasetId = metadata.dataset.id; + try { + const snapshots = await state.appConn().get_json("api/dataset_snapshot/list", { + dataset_id: datasetId, + }); + const match = (snapshots as { name: string; xact_id: string }[]).find( + (s) => s.name === version, + ); + if (match) { + return match.xact_id; + } + } catch { + // Fall through to HEAD on error. + } + return undefined; +} + +async function resolveDatasetEnvironment({ + state, + lazyMetadata, + environment, +}: { + state: BraintrustState; + lazyMetadata: LazyValue; + environment: string; +}): Promise { + const metadata = await lazyMetadata.get(); + const datasetId = metadata.dataset.id; + try { + const envObject = await state + .apiConn() + .get_json(`environment-object/dataset/${datasetId}/${environment}`); + const objectVersion = (envObject as { object_version?: string }) + .object_version; + if (objectVersion) { + return objectVersion; + } + } catch { + // Fall through to HEAD if no association found. + } + return undefined; +} + /** * Create a new dataset in a specified project. If the project does not exist, it will be created. * @@ -3776,6 +3889,8 @@ export type FullInitDatasetOptions = { * @param options.project The name of the project to create the dataset in. Must specify at least one of `project` or `projectId`. * @param options.dataset The name of the dataset to create. If not specified, a name will be generated automatically. * @param options.description An optional description of the dataset. + * @param options.version Pin the dataset to a specific version. Can be a numeric xact_id or a snapshot name. If a snapshot name is provided and no matching snapshot is found, falls back to the latest version. + * @param options.environment Pin the dataset to the version tagged with this environment slug. If no version is tagged with the environment, falls back to the latest version. * @param options.appUrl The URL of the Braintrust App. Defaults to https://www.braintrust.dev. * @param options.apiKey The API key to use. If the parameter is not specified, will try to use the `BRAINTRUST_API_KEY` environment variable. If no API key is specified, will prompt the user to login. * @param options.orgName (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple. @@ -3832,6 +3947,7 @@ export function initDataset< dataset, description, version, + environment, appUrl, apiKey, orgName, @@ -3844,6 +3960,12 @@ export function initDataset< _internal_btql, } = options; + if (version !== undefined && environment !== undefined) { + throw new Error( + "Cannot specify both `version` and `environment`. Use one or the other.", + ); + } + const state = stateArg ?? _globalState; const lazyMetadata: LazyValue = new LazyValue( @@ -3883,10 +4005,29 @@ export function initDataset< }, ); + const resolvedVersion: LazyValue = new LazyValue( + async () => { + if (version !== undefined) { + return await resolveDatasetVersion({ + state, + lazyMetadata, + version, + }); + } else if (environment !== undefined) { + return await resolveDatasetEnvironment({ + state, + lazyMetadata, + environment, + }); + } + return undefined; + }, + ); + return new Dataset( stateArg ?? _globalState, lazyMetadata, - version, + resolvedVersion, legacy, _internal_btql, ); @@ -5589,9 +5730,7 @@ function validateAndSanitizeExperimentLogFullArgs( throw new Error("scores must be specified"); } - if (hasDataset && event.datasetRecordId === undefined) { - throw new Error("datasetRecordId must be specified when using a dataset"); - } else if (!hasDataset && event.datasetRecordId !== undefined) { + if (!hasDataset && event.datasetRecordId !== undefined) { throw new Error( "datasetRecordId cannot be specified when not using a dataset", ); @@ -5618,7 +5757,7 @@ export class ObjectFetcher implements AsyncIterable< | "experiment" | "project_logs" | "playground_logs", - private pinnedVersion: string | undefined, + protected pinnedVersion: string | undefined, // eslint-disable-next-line @typescript-eslint/no-explicit-any private mutateRecord?: (r: any) => WithTransactionId, private _internal_btql?: Record, @@ -5752,6 +5891,10 @@ export class ObjectFetcher implements AsyncIterable< } public async version(options?: { batchSize?: number }) { + // Resolve any lazy pinned version (e.g. from environment lookup) before + // checking the field — subclasses like Dataset populate pinnedVersion + // inside getState(). + await this.getState(); if (this.pinnedVersion !== undefined) { return this.pinnedVersion; } else { @@ -6764,13 +6907,14 @@ export class Dataset< IsLegacyDataset extends boolean = typeof DEFAULT_IS_LEGACY_DATASET, > extends ObjectFetcher> { private readonly lazyMetadata: LazyValue; + private readonly lazyPinnedVersion: LazyValue | undefined; private readonly __braintrust_dataset_marker = true; private newRecords = 0; constructor( private state: BraintrustState, lazyMetadata: LazyValue, - pinnedVersion?: string, + pinnedVersion?: string | LazyValue, legacy?: IsLegacyDataset, _internal_btql?: Record, ) { @@ -6784,9 +6928,11 @@ export class Dataset< `Records will be fetched from this dataset in the legacy format, with the "expected" field renamed to "output". Please update your code to use "expected", and use \`braintrust.initDataset()\` with \`{ useOutput: false }\`, which will become the default in a future version of Braintrust.`, ); } + const staticVersion = + pinnedVersion instanceof LazyValue ? undefined : pinnedVersion; super( "dataset", - pinnedVersion, + staticVersion, (r: AnyDatasetRecord) => // eslint-disable-next-line @typescript-eslint/consistent-type-assertions ensureDatasetRecord( @@ -6796,6 +6942,8 @@ export class Dataset< _internal_btql, ); this.lazyMetadata = lazyMetadata; + this.lazyPinnedVersion = + pinnedVersion instanceof LazyValue ? pinnedVersion : undefined; } public get id(): Promise { @@ -6823,6 +6971,10 @@ export class Dataset< protected async getState(): Promise { // Ensure the login state is populated by awaiting lazyMetadata. await this.lazyMetadata.get(); + // Resolve lazy pinned version (e.g. from environment or snapshot name lookup). + if (this.lazyPinnedVersion !== undefined && this.pinnedVersion === undefined) { + this.pinnedVersion = await this.lazyPinnedVersion.get(); + } return this.state; } @@ -7002,6 +7154,68 @@ export class Dataset< return id; } + /** + * Create a named snapshot of the dataset at the current version. Flushes any pending writes first. + * + * @param options.name A human-readable name for the snapshot. Must be unique within the dataset. + * @param options.description An optional description of the snapshot. + * @returns The created snapshot object. + */ + public async createSnapshot({ + name, + description, + }: { + readonly name: string; + readonly description?: string; + }): Promise { + await this.flush(); + const state = await this.getState(); + const datasetId = await this.id; + const currentVersion = await this.version(); + if (currentVersion === undefined) { + throw new Error("Cannot create snapshot: dataset has no records"); + } + const response = await state + .appConn() + .post_json("api/dataset_snapshot/create", { + dataset_id: datasetId, + name, + description, + xact_id: currentVersion, + }); + return datasetSnapshotResponseSchema.parse(response); + } + + /** + * List all named snapshots for this dataset. + * + * @returns An array of snapshot objects, ordered by creation time (newest first). + */ + public async listSnapshots(): Promise { + const state = await this.getState(); + const datasetId = await this.id; + const response = await state.appConn().get_json("api/dataset_snapshot/list", { + dataset_id: datasetId, + }); + return datasetSnapshotResponseSchema.array().parse(response); + } + + /** + * Delete a named snapshot from this dataset. + * + * @param snapshotId The unique identifier of the snapshot to delete. + * @returns The deleted snapshot object. + */ + public async deleteSnapshot(snapshotId: string): Promise { + const state = await this.getState(); + const response = await state + .appConn() + .post_json("api/dataset_snapshot/delete_id", { + id: snapshotId, + }); + return datasetSnapshotResponseSchema.parse(response); + } + /** * Summarize the dataset, including high level metrics about its size and other metadata. * @param summarizeData Whether to summarize the data. If false, only the metadata will be returned. From 2f93fca57ad316ac1554026e83c07e64c66444f4 Mon Sep 17 00:00:00 2001 From: max-braintrust Date: Mon, 30 Mar 2026 16:55:14 -0700 Subject: [PATCH 2/8] sdk updates --- js/src/generated_types.ts | 4 + js/src/logger.test.ts | 126 +++++++++++++++++++++++ js/src/logger.ts | 210 ++++++++++++++++++++++++++++---------- 3 files changed, 286 insertions(+), 54 deletions(-) diff --git a/js/src/generated_types.ts b/js/src/generated_types.ts index 6d0414645..dfc78157e 100644 --- a/js/src/generated_types.ts +++ b/js/src/generated_types.ts @@ -1825,6 +1825,8 @@ export const RunEval = z.object({ data: z.union([ z.object({ dataset_id: z.string(), + dataset_version: z.union([z.string(), z.null()]).optional(), + dataset_environment: z.union([z.string(), z.null()]).optional(), _internal_btql: z .union([z.object({}).partial().passthrough(), z.null()]) .optional(), @@ -1832,6 +1834,8 @@ export const RunEval = z.object({ z.object({ project_name: z.string(), dataset_name: z.string(), + dataset_version: z.union([z.string(), z.null()]).optional(), + dataset_environment: z.union([z.string(), z.null()]).optional(), _internal_btql: z .union([z.object({}).partial().passthrough(), z.null()]) .optional(), diff --git a/js/src/logger.test.ts b/js/src/logger.test.ts index fc4f14f7b..07ea928ce 100644 --- a/js/src/logger.test.ts +++ b/js/src/logger.test.ts @@ -4,6 +4,7 @@ import { vi, expect, test, describe, beforeEach, afterEach } from "vitest"; import { _exportsForTestingOnly, init, + initDataset, initLogger, Prompt, BraintrustState, @@ -453,6 +454,93 @@ test("init accepts dataset with id and version", () => { expect(datasetWithVersion.version).toBe("v2"); }); +test("init accepts dataset with id and environment", () => { + const datasetWithEnvironment = { + id: "dataset-id-123", + environment: "production", + }; + + expect(datasetWithEnvironment.id).toBe("dataset-id-123"); + expect(datasetWithEnvironment.environment).toBe("production"); +}); + +test("dataset.toEvalData preserves dataset_environment", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + environment: "production", + state, + }); + + await expect(dataset.toEvalData()).resolves.toEqual({ + dataset_id: "00000000-0000-0000-0000-000000000002", + dataset_environment: "production", + }); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + +test("init resolves dataset environment before experiment registration", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + const getJson = vi.spyOn(state.apiConn(), "get_json").mockResolvedValue({ + object_version: "123", + }); + const postJson = vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + experiment: { + id: "00000000-0000-0000-0000-000000000003", + project_id: "00000000-0000-0000-0000-000000000001", + name: "test-experiment", + public: false, + }, + }); + + const experiment = init({ + project: "test-project", + experiment: "test-experiment", + dataset: { + id: "00000000-0000-0000-0000-000000000002", + environment: "production", + }, + setCurrent: false, + state, + }); + + await experiment.id; + + expect(getJson).toHaveBeenCalledWith( + "environment-object/dataset/00000000-0000-0000-0000-000000000002/production", + ); + expect(postJson).toHaveBeenCalledWith( + "api/experiment/register", + expect.objectContaining({ + dataset_id: "00000000-0000-0000-0000-000000000002", + dataset_version: "123", + }), + ); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + describe("loader version precedence", () => { let state: BraintrustState; let getJson: ReturnType; @@ -614,6 +702,44 @@ describe("loader version precedence", () => { version: "v1", }); }); + + test("initDataset resolves env to version before fetching dataset rows", async () => { + vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }); + getJson.mockResolvedValueOnce({ + object_version: "123", + }); + const post = vi.spyOn(state.apiConn(), "post").mockResolvedValue({ + json: vi.fn().mockResolvedValue({ data: [], cursor: undefined }), + } as Response); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + environment: "production", + state, + }); + + await dataset.fetchedData(); + + expect(getJson).toHaveBeenCalledWith( + "environment-object/dataset/00000000-0000-0000-0000-000000000002/production", + ); + const requestBody = post.mock.calls[0]?.[1] as Record; + expect(requestBody).toMatchObject({ + version: "123", + query_source: "js_sdk_object_fetcher_dataset", + }); + expect(requestBody).not.toHaveProperty("env"); + }); }); describe("prompt.build structured output templating", () => { diff --git a/js/src/logger.ts b/js/src/logger.ts index add7aa4b5..fb1c04644 100644 --- a/js/src/logger.ts +++ b/js/src/logger.ts @@ -3426,6 +3426,7 @@ type InitOpenOption = { export interface DatasetRef { id: string; version?: string; + environment?: string; } export interface ParametersRef { @@ -3652,20 +3653,13 @@ export function init( } if (dataset !== undefined) { - if ( - "id" in dataset && - typeof dataset.id === "string" && - !("__braintrust_dataset_marker" in dataset) - ) { - // Simple {id: ..., version?: ...} object - args["dataset_id"] = dataset.id; - if ("version" in dataset && dataset.version !== undefined) { - args["dataset_version"] = dataset.version; - } - } else { - // Full Dataset object - args["dataset_id"] = await (dataset as AnyDataset).id; - args["dataset_version"] = await (dataset as AnyDataset).version(); + const datasetSelection = await serializeDatasetForExperiment({ + dataset, + state, + }); + args["dataset_id"] = datasetSelection.datasetId; + if (datasetSelection.datasetVersion !== undefined) { + args["dataset_version"] = datasetSelection.datasetVersion; } } @@ -3854,16 +3848,19 @@ async function resolveDatasetVersion({ const metadata = await lazyMetadata.get(); const datasetId = metadata.dataset.id; try { - const snapshots = await state.appConn().get_json("api/dataset_snapshot/list", { - dataset_id: datasetId, - }); + const snapshots = await state + .appConn() + .get_json("api/dataset_snapshot/get", { + dataset_id: datasetId, + }); const match = (snapshots as { name: string; xact_id: string }[]).find( (s) => s.name === version, ); if (match) { return match.xact_id; } - } catch { + } catch (e) { + console.warn(e); // Fall through to HEAD on error. } return undefined; @@ -3871,28 +3868,91 @@ async function resolveDatasetVersion({ async function resolveDatasetEnvironment({ state, - lazyMetadata, + datasetId, environment, }: { state: BraintrustState; - lazyMetadata: LazyValue; + datasetId: string; environment: string; }): Promise { - const metadata = await lazyMetadata.get(); - const datasetId = metadata.dataset.id; try { - const envObject = await state + const response = await state .apiConn() - .get_json(`environment-object/dataset/${datasetId}/${environment}`); - const objectVersion = (envObject as { object_version?: string }) + .get_json( + `environment-object/dataset/${datasetId}/${encodeURIComponent(environment)}`, + ); + return z.object({ object_version: z.string() }).parse(response) .object_version; - if (objectVersion) { - return objectVersion; + } catch (e) { + console.warn(e); + return undefined; + } +} + +async function resolveDatasetEnvironmentForMetadata({ + state, + lazyMetadata, + environment, +}: { + state: BraintrustState; + lazyMetadata: LazyValue; + environment: string; +}): Promise { + const metadata = await lazyMetadata.get(); + return await resolveDatasetEnvironment({ + state, + datasetId: metadata.dataset.id, + environment, + }); +} + +async function serializeDatasetForExperiment({ + dataset, + state, +}: { + dataset: AnyDataset | DatasetRef; + state: BraintrustState; +}): Promise<{ datasetId: string; datasetVersion?: string }> { + if (!Dataset.isDataset(dataset)) { + if (dataset.version !== undefined && dataset.environment !== undefined) { + throw new Error( + "Cannot specify both dataset.version and dataset.environment. Use one or the other.", + ); } - } catch { - // Fall through to HEAD if no association found. + + if (dataset.environment !== undefined) { + return { + datasetId: dataset.id, + datasetVersion: await resolveDatasetEnvironment({ + state, + datasetId: dataset.id, + environment: dataset.environment, + }), + }; + } + + return { + datasetId: dataset.id, + datasetVersion: dataset.version, + }; } - return undefined; + + const evalData = await dataset.toEvalData(); + if (evalData.dataset_environment !== undefined) { + return { + datasetId: evalData.dataset_id, + datasetVersion: await resolveDatasetEnvironment({ + state, + datasetId: evalData.dataset_id, + environment: evalData.dataset_environment, + }), + }; + } + + return { + datasetId: evalData.dataset_id, + datasetVersion: evalData.dataset_version, + }; } /** @@ -4018,29 +4078,30 @@ export function initDataset< }, ); - const resolvedVersion: LazyValue = new LazyValue( - async () => { - if (version !== undefined) { - return await resolveDatasetVersion({ - state, - lazyMetadata, - version, - }); - } else if (environment !== undefined) { - return await resolveDatasetEnvironment({ - state, - lazyMetadata, - environment, - }); - } - return undefined; - }, - ); + const resolvedVersion = + version !== undefined + ? new LazyValue(async () => { + return await resolveDatasetVersion({ + state, + lazyMetadata, + version, + }); + }) + : environment !== undefined + ? new LazyValue(async () => { + return await resolveDatasetEnvironmentForMetadata({ + state, + lazyMetadata, + environment, + }); + }) + : undefined; return new Dataset( stateArg ?? _globalState, lazyMetadata, resolvedVersion, + environment, legacy, _internal_btql, ); @@ -5773,7 +5834,8 @@ export class ObjectFetcher implements AsyncIterable< protected pinnedVersion: string | undefined, // eslint-disable-next-line @typescript-eslint/no-explicit-any private mutateRecord?: (r: any) => WithTransactionId, - private _internal_btql?: Record, + protected _internal_btql?: Record, + protected pinnedEnvironment?: string, ) {} public get id(): Promise { @@ -5841,6 +5903,12 @@ export class ObjectFetcher implements AsyncIterable< version: this.pinnedVersion, } : {}), + ...(this.pinnedVersion === undefined && + this.pinnedEnvironment !== undefined + ? { + env: this.pinnedEnvironment, + } + : {}), }, { headers: { "Accept-Encoding": "gzip" } }, ); @@ -6928,6 +6996,7 @@ export class Dataset< private state: BraintrustState, lazyMetadata: LazyValue, pinnedVersion?: string | LazyValue, + pinnedEnvironment?: string, legacy?: IsLegacyDataset, _internal_btql?: Record, ) { @@ -6953,6 +7022,7 @@ export class Dataset< isLegacyDataset, ) as WithTransactionId>, _internal_btql, + pinnedEnvironment, ); this.lazyMetadata = lazyMetadata; this.lazyPinnedVersion = @@ -6981,11 +7051,41 @@ export class Dataset< return this.state; } + public async toEvalData(): Promise<{ + dataset_id: string; + dataset_version?: string; + dataset_environment?: string; + _internal_btql?: Record; + }> { + const metadata = await this.lazyMetadata.get(); + + return { + dataset_id: metadata.dataset.id, + ...(this.pinnedEnvironment !== undefined + ? { + dataset_environment: this.pinnedEnvironment, + } + : {}), + ...(this.pinnedEnvironment === undefined && + this.pinnedVersion !== undefined + ? { + dataset_version: this.pinnedVersion, + } + : {}), + ...(this._internal_btql !== undefined + ? { _internal_btql: this._internal_btql } + : {}), + }; + } + protected async getState(): Promise { // Ensure the login state is populated by awaiting lazyMetadata. await this.lazyMetadata.get(); // Resolve lazy pinned version (e.g. from environment or snapshot name lookup). - if (this.lazyPinnedVersion !== undefined && this.pinnedVersion === undefined) { + if ( + this.lazyPinnedVersion !== undefined && + this.pinnedVersion === undefined + ) { this.pinnedVersion = await this.lazyPinnedVersion.get(); } return this.state; @@ -7190,7 +7290,7 @@ export class Dataset< } const response = await state .appConn() - .post_json("api/dataset_snapshot/create", { + .post_json("api/dataset_snapshot/register", { dataset_id: datasetId, name, description, @@ -7207,9 +7307,11 @@ export class Dataset< public async listSnapshots(): Promise { const state = await this.getState(); const datasetId = await this.id; - const response = await state.appConn().get_json("api/dataset_snapshot/list", { - dataset_id: datasetId, - }); + const response = await state + .appConn() + .get_json("api/dataset_snapshot/get", { + dataset_id: datasetId, + }); return datasetSnapshotResponseSchema.array().parse(response); } From f21ce0da52a94a013d41c519151638b14deb2dc0 Mon Sep 17 00:00:00 2001 From: max-braintrust Date: Tue, 31 Mar 2026 13:43:01 -0700 Subject: [PATCH 3/8] fix col names --- js/dev/server.ts | 13 ++-- js/src/cli/index.ts | 17 ++--- js/src/logger.test.ts | 154 ++++++++++++++++++++++++++++++++++++++++++ js/src/logger.ts | 20 +++--- 4 files changed, 184 insertions(+), 20 deletions(-) diff --git a/js/dev/server.ts b/js/dev/server.ts index aee357ad5..e35d165ae 100644 --- a/js/dev/server.ts +++ b/js/dev/server.ts @@ -314,9 +314,12 @@ async function getDataset( state, project: data.project_name, dataset: data.dataset_name, + version: data.dataset_version ?? undefined, + environment: data.dataset_environment ?? undefined, _internal_btql: data._internal_btql ?? undefined, }); - } else if ("dataset_id" in data) { + } + if ("dataset_id" in data) { const datasetInfo = await getDatasetById({ state, datasetId: data.dataset_id, @@ -325,12 +328,14 @@ async function getDataset( state, projectId: datasetInfo.projectId, dataset: datasetInfo.dataset, + version: data.dataset_version ?? undefined, + environment: data.dataset_environment ?? undefined, _internal_btql: data._internal_btql ?? undefined, }); - } else { - // eslint-disable-next-line @typescript-eslint/consistent-type-assertions - return data.data as EvalCase[]; } + + // eslint-disable-next-line @typescript-eslint/consistent-type-assertions + return data.data as EvalCase[]; } const datasetFetchSchema = z.object({ diff --git a/js/src/cli/index.ts b/js/src/cli/index.ts index d67532b2f..ef5d3d580 100755 --- a/js/src/cli/index.ts +++ b/js/src/cli/index.ts @@ -1070,7 +1070,7 @@ async function datasetSnapshotCommand( id: snapshot.id, name: snapshot.name, xact_id: snapshot.xact_id, - created_at: snapshot.created_at, + created: snapshot.created, }, null, 2, @@ -1087,7 +1087,7 @@ async function datasetSnapshotsCommand(args: DatasetCommandArgs) { } for (const snap of snapshots) { console.log( - `${snap.name}\txact_id=${snap.xact_id}\tid=${snap.id}\tcreated=${snap.created_at}`, + `${snap.name}\txact_id=${snap.xact_id}\tid=${snap.id}\tcreated=${snap.created}`, ); } } @@ -1097,17 +1097,18 @@ async function datasetTagEnvCommand( ) { const dataset = await openDataset(args); const datasetId = await dataset.id; - const objectVersion = - args.version ?? (await dataset.version()); + const objectVersion = args.version ?? (await dataset.version()); if (!objectVersion) { console.error("Dataset has no records — nothing to tag."); process.exit(1); } const state = _internalGetGlobalState(); - await state.apiConn().put_json( - `environment-object/dataset/${datasetId}/${encodeURIComponent(args.env)}`, - { object_version: objectVersion }, - ); + await state + .apiConn() + .put_json( + `environment-object/dataset/${datasetId}/${encodeURIComponent(args.env)}`, + { object_version: objectVersion }, + ); console.log( `Tagged version ${objectVersion} with environment "${args.env}".`, ); diff --git a/js/src/logger.test.ts b/js/src/logger.test.ts index 07ea928ce..2bb0ea556 100644 --- a/js/src/logger.test.ts +++ b/js/src/logger.test.ts @@ -740,6 +740,160 @@ describe("loader version precedence", () => { }); expect(requestBody).not.toHaveProperty("env"); }); + + test("createSnapshot returns the created snapshot from the register response", async () => { + const postJson = vi.spyOn(state.appConn(), "post_json"); + postJson + .mockResolvedValueOnce({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }) + .mockResolvedValueOnce({ + dataset_snapshot: { + id: "00000000-0000-0000-0000-000000000003", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "production", + description: "Pinned snapshot", + xact_id: "123", + created: "2026-03-31T00:00:00.000Z", + }, + found_existing: false, + }); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + version: "123", + state, + }); + + await expect( + dataset.createSnapshot({ + name: "production", + description: "Pinned snapshot", + }), + ).resolves.toEqual({ + id: "00000000-0000-0000-0000-000000000003", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "production", + description: "Pinned snapshot", + xact_id: "123", + created: "2026-03-31T00:00:00.000Z", + }); + + expect(postJson).toHaveBeenNthCalledWith( + 2, + "api/dataset_snapshot/register", + { + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "production", + description: "Pinned snapshot", + xact_id: "123", + }, + ); + }); + + test("listSnapshots returns dataset snapshots", async () => { + vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }); + const appGetJson = vi + .spyOn(state.appConn(), "get_json") + .mockResolvedValueOnce([ + { + id: "00000000-0000-0000-0000-000000000003", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "production", + description: null, + xact_id: "123", + created: "2026-03-31T00:00:00.000Z", + }, + ]); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + version: "123", + state, + }); + + await expect(dataset.listSnapshots()).resolves.toEqual([ + { + id: "00000000-0000-0000-0000-000000000003", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "production", + description: null, + xact_id: "123", + created: "2026-03-31T00:00:00.000Z", + }, + ]); + + expect(appGetJson).toHaveBeenCalledWith("api/dataset_snapshot/get", { + dataset_id: "00000000-0000-0000-0000-000000000002", + }); + }); + + test("deleteSnapshot returns the deleted snapshot", async () => { + const postJson = vi.spyOn(state.appConn(), "post_json"); + postJson + .mockResolvedValueOnce({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }) + .mockResolvedValueOnce({ + id: "00000000-0000-0000-0000-000000000003", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "production", + description: null, + xact_id: "123", + created: "2026-03-31T00:00:00.000Z", + }); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + version: "123", + state, + }); + + await expect( + dataset.deleteSnapshot("00000000-0000-0000-0000-000000000003"), + ).resolves.toEqual({ + id: "00000000-0000-0000-0000-000000000003", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "production", + description: null, + xact_id: "123", + created: "2026-03-31T00:00:00.000Z", + }); + + expect(postJson).toHaveBeenNthCalledWith( + 2, + "api/dataset_snapshot/delete_id", + { + id: "00000000-0000-0000-0000-000000000003", + }, + ); + }); }); describe("prompt.build structured output templating", () => { diff --git a/js/src/logger.ts b/js/src/logger.ts index fb1c04644..92b51a2f9 100644 --- a/js/src/logger.ts +++ b/js/src/logger.ts @@ -96,10 +96,15 @@ const datasetSnapshotResponseSchema = z.object({ name: z.string(), description: z.string().nullish(), xact_id: z.string(), - created_at: z.string(), + created: z.string(), }); export type DatasetSnapshot = z.infer; +const datasetSnapshotRegisterResponseSchema = z.object({ + dataset_snapshot: datasetSnapshotResponseSchema, + found_existing: z.boolean().optional(), +}); + const parametersRowSchema = z.object({ id: z.string().uuid(), _xact_id: z.string(), @@ -3848,14 +3853,12 @@ async function resolveDatasetVersion({ const metadata = await lazyMetadata.get(); const datasetId = metadata.dataset.id; try { - const snapshots = await state - .appConn() - .get_json("api/dataset_snapshot/get", { + const snapshots = datasetSnapshotResponseSchema.array().parse( + await state.appConn().get_json("api/dataset_snapshot/get", { dataset_id: datasetId, - }); - const match = (snapshots as { name: string; xact_id: string }[]).find( - (s) => s.name === version, + }), ); + const match = snapshots.find((s) => s.name === version); if (match) { return match.xact_id; } @@ -7296,7 +7299,8 @@ export class Dataset< description, xact_id: currentVersion, }); - return datasetSnapshotResponseSchema.parse(response); + return datasetSnapshotRegisterResponseSchema.parse(response) + .dataset_snapshot; } /** From f9648a87b37da3afdc6dfd69de4dd78801e4067d Mon Sep 17 00:00:00 2001 From: max-braintrust Date: Wed, 1 Apr 2026 11:30:15 -0700 Subject: [PATCH 4/8] fix env resolution --- js/src/logger.test.ts | 199 ++++++++++++++++++++++++++++++++++++++++++ js/src/logger.ts | 114 +++++++++++++++--------- 2 files changed, 274 insertions(+), 39 deletions(-) diff --git a/js/src/logger.test.ts b/js/src/logger.test.ts index 2bb0ea556..0aaf0145b 100644 --- a/js/src/logger.test.ts +++ b/js/src/logger.test.ts @@ -467,6 +467,9 @@ test("init accepts dataset with id and environment", () => { test("dataset.toEvalData preserves dataset_environment", async () => { const state = await _exportsForTestingOnly.simulateLoginForTests(); vi.spyOn(state, "login").mockResolvedValue(state); + vi.spyOn(state.apiConn(), "get_json").mockResolvedValue({ + object_version: "123", + }); vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ project: { id: "00000000-0000-0000-0000-000000000001", @@ -541,6 +544,86 @@ test("init resolves dataset environment before experiment registration", async ( vi.restoreAllMocks(); }); +test("init resolves dataset snapshot names before experiment registration", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + const appGetJson = vi.spyOn(state.appConn(), "get_json").mockResolvedValue([ + { + id: "00000000-0000-0000-0000-000000000004", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "production", + description: null, + xact_id: "123", + created: "2026-03-31T00:00:00.000Z", + }, + ]); + const postJson = vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + experiment: { + id: "00000000-0000-0000-0000-000000000003", + project_id: "00000000-0000-0000-0000-000000000001", + name: "test-experiment", + public: false, + }, + }); + + const experiment = init({ + project: "test-project", + experiment: "test-experiment", + dataset: { + id: "00000000-0000-0000-0000-000000000002", + version: "production", + }, + setCurrent: false, + state, + }); + + await experiment.id; + + expect(appGetJson).toHaveBeenCalledWith("api/dataset_snapshot/get", { + dataset_id: "00000000-0000-0000-0000-000000000002", + }); + expect(postJson).toHaveBeenCalledWith( + "api/experiment/register", + expect.objectContaining({ + dataset_id: "00000000-0000-0000-0000-000000000002", + dataset_version: "123", + }), + ); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + +test("init surfaces dataset environment lookup errors instead of falling back to latest", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + vi.spyOn(state.apiConn(), "get_json").mockRejectedValue( + new Error("environment lookup failed"), + ); + const postJson = vi.spyOn(state.appConn(), "post_json"); + + const experiment = init({ + project: "test-project", + experiment: "test-experiment", + dataset: { + id: "00000000-0000-0000-0000-000000000002", + environment: "production", + }, + setCurrent: false, + state, + }); + + await expect(experiment.id).rejects.toThrow("environment lookup failed"); + expect(postJson).not.toHaveBeenCalled(); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + describe("loader version precedence", () => { let state: BraintrustState; let getJson: ReturnType; @@ -741,6 +824,122 @@ describe("loader version precedence", () => { expect(requestBody).not.toHaveProperty("env"); }); + test("initDataset resolves snapshot names before experiment registration when passed as a Dataset object", async () => { + const postJson = vi.spyOn(state.appConn(), "post_json"); + const appGetJson = vi.spyOn(state.appConn(), "get_json"); + postJson + .mockResolvedValueOnce({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }) + .mockResolvedValueOnce({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + experiment: { + id: "00000000-0000-0000-0000-000000000003", + project_id: "00000000-0000-0000-0000-000000000001", + name: "test-experiment", + public: false, + }, + }); + appGetJson.mockResolvedValueOnce([ + { + id: "00000000-0000-0000-0000-000000000004", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "production", + description: null, + xact_id: "123", + created: "2026-03-31T00:00:00.000Z", + }, + ]); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + version: "production", + state, + }); + const experiment = init({ + project: "test-project", + experiment: "test-experiment", + dataset, + setCurrent: false, + state, + }); + + await experiment.id; + + expect(appGetJson).toHaveBeenCalledWith("api/dataset_snapshot/get", { + dataset_id: "00000000-0000-0000-0000-000000000002", + }); + expect(postJson).toHaveBeenNthCalledWith( + 2, + "api/experiment/register", + expect.objectContaining({ + dataset_id: "00000000-0000-0000-0000-000000000002", + dataset_version: "123", + }), + ); + }); + + test("initDataset surfaces snapshot lookup errors instead of falling back to latest", async () => { + vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }); + vi.spyOn(state.appConn(), "get_json").mockRejectedValueOnce( + new Error("snapshot lookup failed"), + ); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + version: "production", + state, + }); + + await expect(dataset.version()).rejects.toThrow("snapshot lookup failed"); + }); + + test("initDataset requires a matching snapshot name when a snapshot alias is requested", async () => { + vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }); + vi.spyOn(state.appConn(), "get_json").mockResolvedValueOnce([]); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + version: "production", + state, + }); + + await expect(dataset.version()).rejects.toThrow( + 'Dataset snapshot "production" not found for 00000000-0000-0000-0000-000000000002', + ); + }); + test("createSnapshot returns the created snapshot from the register response", async () => { const postJson = vi.spyOn(state.appConn(), "post_json"); postJson diff --git a/js/src/logger.ts b/js/src/logger.ts index 92b51a2f9..fc8a38516 100644 --- a/js/src/logger.ts +++ b/js/src/logger.ts @@ -3836,37 +3836,57 @@ export type FullInitDatasetOptions = { project?: string; } & InitDatasetOptions; +async function getDatasetSnapshots({ + state, + datasetId, +}: { + state: BraintrustState; + datasetId: string; +}): Promise { + return datasetSnapshotResponseSchema.array().parse( + await state.appConn().get_json("api/dataset_snapshot/get", { + dataset_id: datasetId, + }), + ); +} + async function resolveDatasetVersion({ state, - lazyMetadata, + datasetId, version, }: { state: BraintrustState; - lazyMetadata: LazyValue; + datasetId: string; version: string; -}): Promise { +}): Promise { // If it looks like a numeric xact_id, use it directly. if (/^\d+$/.test(version)) { return version; } - // Otherwise, treat it as a snapshot name and resolve via API. - const metadata = await lazyMetadata.get(); - const datasetId = metadata.dataset.id; - try { - const snapshots = datasetSnapshotResponseSchema.array().parse( - await state.appConn().get_json("api/dataset_snapshot/get", { - dataset_id: datasetId, - }), - ); - const match = snapshots.find((s) => s.name === version); - if (match) { - return match.xact_id; - } - } catch (e) { - console.warn(e); - // Fall through to HEAD on error. + + const snapshots = await getDatasetSnapshots({ state, datasetId }); + const match = snapshots.find((s) => s.name === version); + if (!match) { + throw new Error(`Dataset snapshot "${version}" not found for ${datasetId}`); } - return undefined; + return match.xact_id; +} + +async function resolveDatasetVersionForMetadata({ + state, + lazyMetadata, + version, +}: { + state: BraintrustState; + lazyMetadata: LazyValue; + version: string; +}): Promise { + const metadata = await lazyMetadata.get(); + return await resolveDatasetVersion({ + state, + datasetId: metadata.dataset.id, + version, + }); } async function resolveDatasetEnvironment({ @@ -3877,19 +3897,14 @@ async function resolveDatasetEnvironment({ state: BraintrustState; datasetId: string; environment: string; -}): Promise { - try { - const response = await state - .apiConn() - .get_json( - `environment-object/dataset/${datasetId}/${encodeURIComponent(environment)}`, - ); - return z.object({ object_version: z.string() }).parse(response) - .object_version; - } catch (e) { - console.warn(e); - return undefined; - } +}): Promise { + const response = await state + .apiConn() + .get_json( + `environment-object/dataset/${datasetId}/${encodeURIComponent(environment)}`, + ); + return z.object({ object_version: z.string() }).parse(response) + .object_version; } async function resolveDatasetEnvironmentForMetadata({ @@ -3900,7 +3915,7 @@ async function resolveDatasetEnvironmentForMetadata({ state: BraintrustState; lazyMetadata: LazyValue; environment: string; -}): Promise { +}): Promise { const metadata = await lazyMetadata.get(); return await resolveDatasetEnvironment({ state, @@ -3934,9 +3949,19 @@ async function serializeDatasetForExperiment({ }; } + if (dataset.version !== undefined) { + return { + datasetId: dataset.id, + datasetVersion: await resolveDatasetVersion({ + state, + datasetId: dataset.id, + version: dataset.version, + }), + }; + } + return { datasetId: dataset.id, - datasetVersion: dataset.version, }; } @@ -3952,9 +3977,19 @@ async function serializeDatasetForExperiment({ }; } + if (evalData.dataset_version !== undefined) { + return { + datasetId: evalData.dataset_id, + datasetVersion: await resolveDatasetVersion({ + state, + datasetId: evalData.dataset_id, + version: evalData.dataset_version, + }), + }; + } + return { datasetId: evalData.dataset_id, - datasetVersion: evalData.dataset_version, }; } @@ -3965,8 +4000,8 @@ async function serializeDatasetForExperiment({ * @param options.project The name of the project to create the dataset in. Must specify at least one of `project` or `projectId`. * @param options.dataset The name of the dataset to create. If not specified, a name will be generated automatically. * @param options.description An optional description of the dataset. - * @param options.version Pin the dataset to a specific version. Can be a numeric xact_id or a snapshot name. If a snapshot name is provided and no matching snapshot is found, falls back to the latest version. - * @param options.environment Pin the dataset to the version tagged with this environment slug. If no version is tagged with the environment, falls back to the latest version. + * @param options.version Pin the dataset to a specific version. Can be a numeric xact_id or a snapshot name. Snapshot names are resolved to a concrete xact_id and throw if no matching snapshot exists. + * @param options.environment Pin the dataset to the version tagged with this environment slug. Throws if the environment lookup fails. * @param options.appUrl The URL of the Braintrust App. Defaults to https://www.braintrust.dev. * @param options.apiKey The API key to use. If the parameter is not specified, will try to use the `BRAINTRUST_API_KEY` environment variable. If no API key is specified, will prompt the user to login. * @param options.orgName (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple. @@ -4084,7 +4119,7 @@ export function initDataset< const resolvedVersion = version !== undefined ? new LazyValue(async () => { - return await resolveDatasetVersion({ + return await resolveDatasetVersionForMetadata({ state, lazyMetadata, version, @@ -7060,6 +7095,7 @@ export class Dataset< dataset_environment?: string; _internal_btql?: Record; }> { + await this.getState(); const metadata = await this.lazyMetadata.get(); return { From 35986b464ca1028f9731e3aeb5891c71b8121552 Mon Sep 17 00:00:00 2001 From: max-braintrust Date: Wed, 1 Apr 2026 13:10:26 -0700 Subject: [PATCH 5/8] cleanup --- js/src/logger.ts | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/js/src/logger.ts b/js/src/logger.ts index fc8a38516..99ffa8bf7 100644 --- a/js/src/logger.ts +++ b/js/src/logger.ts @@ -3969,11 +3969,7 @@ async function serializeDatasetForExperiment({ if (evalData.dataset_environment !== undefined) { return { datasetId: evalData.dataset_id, - datasetVersion: await resolveDatasetEnvironment({ - state, - datasetId: evalData.dataset_id, - environment: evalData.dataset_environment, - }), + datasetVersion: await dataset.version(), }; } @@ -5842,7 +5838,9 @@ function validateAndSanitizeExperimentLogFullArgs( throw new Error("scores must be specified"); } - if (!hasDataset && event.datasetRecordId !== undefined) { + if (hasDataset && event.datasetRecordId === undefined) { + throw new Error("datasetRecordId must be specified when using a dataset"); + } else if (!hasDataset && event.datasetRecordId !== undefined) { throw new Error( "datasetRecordId cannot be specified when not using a dataset", ); From a16b04a1a2ce5cf57e727fe8ff807b087ed3799e Mon Sep 17 00:00:00 2001 From: max-braintrust Date: Wed, 1 Apr 2026 17:16:08 -0700 Subject: [PATCH 6/8] match prompt fallback behavior --- js/dev/server.ts | 76 +++++++++++++++-- js/src/generated_types.ts | 2 + js/src/logger.test.ts | 174 +++++++++++++++++++++++++++++++++++--- js/src/logger.ts | 163 ++++++++++++++++++++++++----------- 4 files changed, 342 insertions(+), 73 deletions(-) diff --git a/js/dev/server.ts b/js/dev/server.ts index e35d165ae..511a21ad2 100644 --- a/js/dev/server.ts +++ b/js/dev/server.ts @@ -310,28 +310,88 @@ async function getDataset( data: RunEvalRequest["data"], ): Promise> { if ("project_name" in data) { - return initDataset({ + const selectorCount = [ + data.dataset_version, + data.dataset_snapshot_name, + data.dataset_environment, + ].filter((value) => value != null).length; + if (selectorCount > 1) { + throw new Error( + "Cannot specify more than one of dataset_version, dataset_snapshot_name, and dataset_environment.", + ); + } + + const commonArgs = { state, project: data.project_name, dataset: data.dataset_name, - version: data.dataset_version ?? undefined, - environment: data.dataset_environment ?? undefined, _internal_btql: data._internal_btql ?? undefined, - }); + }; + + if (data.dataset_version != null) { + return initDataset({ + ...commonArgs, + version: data.dataset_version, + }); + } + if (data.dataset_snapshot_name != null) { + return initDataset({ + ...commonArgs, + snapshotName: data.dataset_snapshot_name, + }); + } + if (data.dataset_environment != null) { + return initDataset({ + ...commonArgs, + environment: data.dataset_environment, + }); + } + + return initDataset(commonArgs); } if ("dataset_id" in data) { + const selectorCount = [ + data.dataset_version, + data.dataset_snapshot_name, + data.dataset_environment, + ].filter((value) => value != null).length; + if (selectorCount > 1) { + throw new Error( + "Cannot specify more than one of dataset_version, dataset_snapshot_name, and dataset_environment.", + ); + } + const datasetInfo = await getDatasetById({ state, datasetId: data.dataset_id, }); - return initDataset({ + const commonArgs = { state, projectId: datasetInfo.projectId, dataset: datasetInfo.dataset, - version: data.dataset_version ?? undefined, - environment: data.dataset_environment ?? undefined, _internal_btql: data._internal_btql ?? undefined, - }); + }; + + if (data.dataset_version != null) { + return initDataset({ + ...commonArgs, + version: data.dataset_version, + }); + } + if (data.dataset_snapshot_name != null) { + return initDataset({ + ...commonArgs, + snapshotName: data.dataset_snapshot_name, + }); + } + if (data.dataset_environment != null) { + return initDataset({ + ...commonArgs, + environment: data.dataset_environment, + }); + } + + return initDataset(commonArgs); } // eslint-disable-next-line @typescript-eslint/consistent-type-assertions diff --git a/js/src/generated_types.ts b/js/src/generated_types.ts index dfc78157e..f43e37634 100644 --- a/js/src/generated_types.ts +++ b/js/src/generated_types.ts @@ -1827,6 +1827,7 @@ export const RunEval = z.object({ dataset_id: z.string(), dataset_version: z.union([z.string(), z.null()]).optional(), dataset_environment: z.union([z.string(), z.null()]).optional(), + dataset_snapshot_name: z.union([z.string(), z.null()]).optional(), _internal_btql: z .union([z.object({}).partial().passthrough(), z.null()]) .optional(), @@ -1836,6 +1837,7 @@ export const RunEval = z.object({ dataset_name: z.string(), dataset_version: z.union([z.string(), z.null()]).optional(), dataset_environment: z.union([z.string(), z.null()]).optional(), + dataset_snapshot_name: z.union([z.string(), z.null()]).optional(), _internal_btql: z .union([z.object({}).partial().passthrough(), z.null()]) .optional(), diff --git a/js/src/logger.test.ts b/js/src/logger.test.ts index 0aaf0145b..626501ad6 100644 --- a/js/src/logger.test.ts +++ b/js/src/logger.test.ts @@ -464,6 +464,16 @@ test("init accepts dataset with id and environment", () => { expect(datasetWithEnvironment.environment).toBe("production"); }); +test("init accepts dataset with id and snapshotName", () => { + const datasetWithSnapshot = { + id: "dataset-id-123", + snapshotName: "123", + }; + + expect(datasetWithSnapshot.id).toBe("dataset-id-123"); + expect(datasetWithSnapshot.snapshotName).toBe("123"); +}); + test("dataset.toEvalData preserves dataset_environment", async () => { const state = await _exportsForTestingOnly.simulateLoginForTests(); vi.spyOn(state, "login").mockResolvedValue(state); @@ -497,6 +507,82 @@ test("dataset.toEvalData preserves dataset_environment", async () => { vi.restoreAllMocks(); }); +test("dataset.toEvalData preserves dataset_snapshot_name", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + vi.spyOn(state.appConn(), "get_json").mockResolvedValue([ + { + id: "00000000-0000-0000-0000-000000000004", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "123", + description: null, + xact_id: "456", + created: "2026-03-31T00:00:00.000Z", + }, + ]); + vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + snapshotName: "123", + state, + }); + + await expect(dataset.toEvalData()).resolves.toEqual({ + dataset_id: "00000000-0000-0000-0000-000000000002", + dataset_snapshot_name: "123", + }); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + +test("init keeps plain dataset refs attached to the experiment", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + experiment: { + id: "00000000-0000-0000-0000-000000000003", + project_id: "00000000-0000-0000-0000-000000000001", + name: "test-experiment", + public: false, + }, + }); + + const experiment = init({ + project: "test-project", + experiment: "test-experiment", + dataset: { + id: "00000000-0000-0000-0000-000000000002", + }, + setCurrent: false, + state, + }); + + await experiment.id; + + expect(experiment.dataset).toMatchObject({ + id: "00000000-0000-0000-0000-000000000002", + }); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + test("init resolves dataset environment before experiment registration", async () => { const state = await _exportsForTestingOnly.simulateLoginForTests(); vi.spyOn(state, "login").mockResolvedValue(state); @@ -532,6 +618,10 @@ test("init resolves dataset environment before experiment registration", async ( expect(getJson).toHaveBeenCalledWith( "environment-object/dataset/00000000-0000-0000-0000-000000000002/production", ); + expect(experiment.dataset).toMatchObject({ + id: "00000000-0000-0000-0000-000000000002", + environment: "production", + }); expect(postJson).toHaveBeenCalledWith( "api/experiment/register", expect.objectContaining({ @@ -544,16 +634,16 @@ test("init resolves dataset environment before experiment registration", async ( vi.restoreAllMocks(); }); -test("init resolves dataset snapshot names before experiment registration", async () => { +test("init resolves dataset snapshots before experiment registration", async () => { const state = await _exportsForTestingOnly.simulateLoginForTests(); vi.spyOn(state, "login").mockResolvedValue(state); const appGetJson = vi.spyOn(state.appConn(), "get_json").mockResolvedValue([ { id: "00000000-0000-0000-0000-000000000004", dataset_id: "00000000-0000-0000-0000-000000000002", - name: "production", + name: "123", description: null, - xact_id: "123", + xact_id: "456", created: "2026-03-31T00:00:00.000Z", }, ]); @@ -575,7 +665,7 @@ test("init resolves dataset snapshot names before experiment registration", asyn experiment: "test-experiment", dataset: { id: "00000000-0000-0000-0000-000000000002", - version: "production", + snapshotName: "123", }, setCurrent: false, state, @@ -590,7 +680,7 @@ test("init resolves dataset snapshot names before experiment registration", asyn "api/experiment/register", expect.objectContaining({ dataset_id: "00000000-0000-0000-0000-000000000002", - dataset_version: "123", + dataset_version: "456", }), ); @@ -824,7 +914,7 @@ describe("loader version precedence", () => { expect(requestBody).not.toHaveProperty("env"); }); - test("initDataset resolves snapshot names before experiment registration when passed as a Dataset object", async () => { + test("initDataset resolves snapshots before experiment registration when passed as a Dataset object", async () => { const postJson = vi.spyOn(state.appConn(), "post_json"); const appGetJson = vi.spyOn(state.appConn(), "get_json"); postJson @@ -854,9 +944,9 @@ describe("loader version precedence", () => { { id: "00000000-0000-0000-0000-000000000004", dataset_id: "00000000-0000-0000-0000-000000000002", - name: "production", + name: "123", description: null, - xact_id: "123", + xact_id: "456", created: "2026-03-31T00:00:00.000Z", }, ]); @@ -864,7 +954,7 @@ describe("loader version precedence", () => { const dataset = initDataset({ project: "test-project", dataset: "test-dataset", - version: "production", + snapshotName: "123", state, }); const experiment = init({ @@ -885,7 +975,7 @@ describe("loader version precedence", () => { "api/experiment/register", expect.objectContaining({ dataset_id: "00000000-0000-0000-0000-000000000002", - dataset_version: "123", + dataset_version: "456", }), ); }); @@ -908,14 +998,14 @@ describe("loader version precedence", () => { const dataset = initDataset({ project: "test-project", dataset: "test-dataset", - version: "production", + snapshotName: "123", state, }); await expect(dataset.version()).rejects.toThrow("snapshot lookup failed"); }); - test("initDataset requires a matching snapshot name when a snapshot alias is requested", async () => { + test("initDataset requires a matching snapshot name when a snapshot is requested", async () => { vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ project: { id: "00000000-0000-0000-0000-000000000001", @@ -931,12 +1021,68 @@ describe("loader version precedence", () => { const dataset = initDataset({ project: "test-project", dataset: "test-dataset", - version: "production", + snapshotName: "123", state, }); await expect(dataset.version()).rejects.toThrow( - 'Dataset snapshot "production" not found for 00000000-0000-0000-0000-000000000002', + 'Dataset snapshot "123" not found for 00000000-0000-0000-0000-000000000002', + ); + }); + + test("createSnapshot allows purely numeric snapshot names", async () => { + const postJson = vi.spyOn(state.appConn(), "post_json"); + postJson + .mockResolvedValueOnce({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }) + .mockResolvedValueOnce({ + dataset_snapshot: { + id: "00000000-0000-0000-0000-000000000003", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "123", + description: null, + xact_id: "456", + created: "2026-03-31T00:00:00.000Z", + }, + found_existing: false, + }); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + version: "456", + state, + }); + + await expect( + dataset.createSnapshot({ + name: "123", + }), + ).resolves.toEqual({ + id: "00000000-0000-0000-0000-000000000003", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "123", + description: null, + xact_id: "456", + created: "2026-03-31T00:00:00.000Z", + }); + + expect(postJson).toHaveBeenNthCalledWith( + 2, + "api/dataset_snapshot/register", + expect.objectContaining({ + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "123", + xact_id: "456", + }), ); }); diff --git a/js/src/logger.ts b/js/src/logger.ts index 99ffa8bf7..9a2b7ca01 100644 --- a/js/src/logger.ts +++ b/js/src/logger.ts @@ -3425,14 +3425,34 @@ type InitOpenOption = { open?: IsOpen; }; +type DatasetSelection = + | { + version: string; + environment?: never; + snapshotName?: never; + } + | { + version?: never; + environment: string; + snapshotName?: never; + } + | { + version?: never; + environment?: never; + snapshotName: string; + } + | { + version?: never; + environment?: never; + snapshotName?: never; + }; + /** - * Reference to a dataset by ID and optional version. + * Reference to a dataset by ID and optional explicit selector. */ -export interface DatasetRef { +export type DatasetRef = { id: string; - version?: string; - environment?: string; -} +} & DatasetSelection; export interface ParametersRef { id: string; @@ -3734,9 +3754,7 @@ export function init( const ret = new Experiment( state, lazyMetadata, - dataset !== undefined && "version" in dataset - ? (dataset as AnyDataset) - : undefined, + dataset !== undefined ? (dataset as AnyDataset) : undefined, ); if (options.setCurrent ?? true) { state.currentExperiment = ret; @@ -3824,13 +3842,12 @@ export type InitDatasetOptions = FullLoginOptions & { dataset?: string; description?: string; - version?: string; - environment?: string; projectId?: string; metadata?: Record; state?: BraintrustState; _internal_btql?: Record; - } & UseOutputOption; + } & DatasetSelection & + UseOutputOption; export type FullInitDatasetOptions = { project?: string; @@ -3850,42 +3867,53 @@ async function getDatasetSnapshots({ ); } -async function resolveDatasetVersion({ +function countSpecifiedDatasetSelectors({ + version, + environment, + snapshotName, +}: { + version?: string; + environment?: string; + snapshotName?: string; +}): number { + return [version, environment, snapshotName].filter( + (value) => value !== undefined, + ).length; +} + +async function resolveDatasetSnapshotName({ state, datasetId, - version, + snapshotName, }: { state: BraintrustState; datasetId: string; - version: string; + snapshotName: string; }): Promise { - // If it looks like a numeric xact_id, use it directly. - if (/^\d+$/.test(version)) { - return version; - } - const snapshots = await getDatasetSnapshots({ state, datasetId }); - const match = snapshots.find((s) => s.name === version); + const match = snapshots.find((s) => s.name === snapshotName); if (!match) { - throw new Error(`Dataset snapshot "${version}" not found for ${datasetId}`); + throw new Error( + `Dataset snapshot "${snapshotName}" not found for ${datasetId}`, + ); } return match.xact_id; } -async function resolveDatasetVersionForMetadata({ +async function resolveDatasetSnapshotNameForMetadata({ state, lazyMetadata, - version, + snapshotName, }: { state: BraintrustState; lazyMetadata: LazyValue; - version: string; + snapshotName: string; }): Promise { const metadata = await lazyMetadata.get(); - return await resolveDatasetVersion({ + return await resolveDatasetSnapshotName({ state, datasetId: metadata.dataset.id, - version, + snapshotName, }); } @@ -3932,9 +3960,9 @@ async function serializeDatasetForExperiment({ state: BraintrustState; }): Promise<{ datasetId: string; datasetVersion?: string }> { if (!Dataset.isDataset(dataset)) { - if (dataset.version !== undefined && dataset.environment !== undefined) { + if (countSpecifiedDatasetSelectors(dataset) > 1) { throw new Error( - "Cannot specify both dataset.version and dataset.environment. Use one or the other.", + "Cannot specify more than one of dataset.version, dataset.environment, and dataset.snapshotName. Use exactly one selector.", ); } @@ -3949,17 +3977,24 @@ async function serializeDatasetForExperiment({ }; } - if (dataset.version !== undefined) { + if (dataset.snapshotName !== undefined) { return { datasetId: dataset.id, - datasetVersion: await resolveDatasetVersion({ + datasetVersion: await resolveDatasetSnapshotName({ state, datasetId: dataset.id, - version: dataset.version, + snapshotName: dataset.snapshotName, }), }; } + if (dataset.version !== undefined) { + return { + datasetId: dataset.id, + datasetVersion: dataset.version, + }; + } + return { datasetId: dataset.id, }; @@ -3973,14 +4008,17 @@ async function serializeDatasetForExperiment({ }; } + if (evalData.dataset_snapshot_name !== undefined) { + return { + datasetId: evalData.dataset_id, + datasetVersion: await dataset.version(), + }; + } + if (evalData.dataset_version !== undefined) { return { datasetId: evalData.dataset_id, - datasetVersion: await resolveDatasetVersion({ - state, - datasetId: evalData.dataset_id, - version: evalData.dataset_version, - }), + datasetVersion: evalData.dataset_version, }; } @@ -3996,7 +4034,8 @@ async function serializeDatasetForExperiment({ * @param options.project The name of the project to create the dataset in. Must specify at least one of `project` or `projectId`. * @param options.dataset The name of the dataset to create. If not specified, a name will be generated automatically. * @param options.description An optional description of the dataset. - * @param options.version Pin the dataset to a specific version. Can be a numeric xact_id or a snapshot name. Snapshot names are resolved to a concrete xact_id and throw if no matching snapshot exists. + * @param options.version Pin the dataset to a specific version xact_id. + * @param options.snapshotName Pin the dataset to the version captured by this named snapshot. Snapshot names are resolved to a concrete xact_id and throw if no matching snapshot exists. * @param options.environment Pin the dataset to the version tagged with this environment slug. Throws if the environment lookup fails. * @param options.appUrl The URL of the Braintrust App. Defaults to https://www.braintrust.dev. * @param options.apiKey The API key to use. If the parameter is not specified, will try to use the `BRAINTRUST_API_KEY` environment variable. If no API key is specified, will prompt the user to login. @@ -4054,6 +4093,7 @@ export function initDataset< dataset, description, version, + snapshotName, environment, appUrl, apiKey, @@ -4067,9 +4107,15 @@ export function initDataset< _internal_btql, } = options; - if (version !== undefined && environment !== undefined) { + if ( + countSpecifiedDatasetSelectors({ + version, + environment, + snapshotName, + }) > 1 + ) { throw new Error( - "Cannot specify both `version` and `environment`. Use one or the other.", + "Cannot specify more than one of `version`, `environment`, and `snapshotName`. Use exactly one selector.", ); } @@ -4114,28 +4160,31 @@ export function initDataset< const resolvedVersion = version !== undefined - ? new LazyValue(async () => { - return await resolveDatasetVersionForMetadata({ - state, - lazyMetadata, - version, - }); - }) - : environment !== undefined + ? version + : snapshotName !== undefined ? new LazyValue(async () => { - return await resolveDatasetEnvironmentForMetadata({ + return await resolveDatasetSnapshotNameForMetadata({ state, lazyMetadata, - environment, + snapshotName, }); }) - : undefined; + : environment !== undefined + ? new LazyValue(async () => { + return await resolveDatasetEnvironmentForMetadata({ + state, + lazyMetadata, + environment, + }); + }) + : undefined; return new Dataset( stateArg ?? _globalState, lazyMetadata, resolvedVersion, environment, + snapshotName, legacy, _internal_btql, ); @@ -7025,6 +7074,7 @@ export class Dataset< > extends ObjectFetcher> { private readonly lazyMetadata: LazyValue; private readonly lazyPinnedVersion: LazyValue | undefined; + private readonly pinnedSnapshotName: string | undefined; private readonly __braintrust_dataset_marker = true; private newRecords = 0; @@ -7033,6 +7083,7 @@ export class Dataset< lazyMetadata: LazyValue, pinnedVersion?: string | LazyValue, pinnedEnvironment?: string, + pinnedSnapshotName?: string, legacy?: IsLegacyDataset, _internal_btql?: Record, ) { @@ -7063,6 +7114,7 @@ export class Dataset< this.lazyMetadata = lazyMetadata; this.lazyPinnedVersion = pinnedVersion instanceof LazyValue ? pinnedVersion : undefined; + this.pinnedSnapshotName = pinnedSnapshotName; } public get id(): Promise { @@ -7091,6 +7143,7 @@ export class Dataset< dataset_id: string; dataset_version?: string; dataset_environment?: string; + dataset_snapshot_name?: string; _internal_btql?: Record; }> { await this.getState(); @@ -7104,6 +7157,13 @@ export class Dataset< } : {}), ...(this.pinnedEnvironment === undefined && + this.pinnedSnapshotName !== undefined + ? { + dataset_snapshot_name: this.pinnedSnapshotName, + } + : {}), + ...(this.pinnedEnvironment === undefined && + this.pinnedSnapshotName === undefined && this.pinnedVersion !== undefined ? { dataset_version: this.pinnedVersion, @@ -7118,7 +7178,8 @@ export class Dataset< protected async getState(): Promise { // Ensure the login state is populated by awaiting lazyMetadata. await this.lazyMetadata.get(); - // Resolve lazy pinned version (e.g. from environment or snapshot name lookup). + // Resolve lazy pinned version (e.g. from environment or snapshot-name + // lookup). if ( this.lazyPinnedVersion !== undefined && this.pinnedVersion === undefined From 5036bb706ed0efbc900bdcfe0bfd553b9a7beb78 Mon Sep 17 00:00:00 2001 From: max-braintrust Date: Fri, 3 Apr 2026 10:38:26 -0700 Subject: [PATCH 7/8] refactor --- js/dev/server.test.ts | 64 +++++++++++++ js/dev/server.ts | 185 +++++++++++++++++++++---------------- js/util/generated_types.ts | 6 ++ 3 files changed, 174 insertions(+), 81 deletions(-) create mode 100644 js/dev/server.test.ts diff --git a/js/dev/server.test.ts b/js/dev/server.test.ts new file mode 100644 index 000000000..a853694e9 --- /dev/null +++ b/js/dev/server.test.ts @@ -0,0 +1,64 @@ +import { describe, expect, test, vi } from "vitest"; +import { type BraintrustState } from "../src/logger"; +import { _exportsForTestingOnly } from "./server"; + +describe("run eval dataset selector helpers", () => { + const state = {} as BraintrustState; + + test("maps project dataset refs into initDataset args", async () => { + await expect( + _exportsForTestingOnly.buildRunEvalDatasetInitArgs(state, { + project_name: "test-project", + dataset_name: "test-dataset", + dataset_environment: "production", + _internal_btql: { limit: 10 }, + }), + ).resolves.toEqual({ + state, + project: "test-project", + dataset: "test-dataset", + environment: "production", + _internal_btql: { limit: 10 }, + }); + }); + + test("maps dataset id refs into initDataset args", async () => { + const lookupDatasetById = vi.fn().mockResolvedValue({ + projectId: "project-id-123", + dataset: "resolved-dataset", + }); + + await expect( + _exportsForTestingOnly.buildRunEvalDatasetInitArgs( + state, + { + dataset_id: "dataset-id-123", + dataset_snapshot_name: "release-candidate", + }, + lookupDatasetById, + ), + ).resolves.toEqual({ + state, + projectId: "project-id-123", + dataset: "resolved-dataset", + snapshotName: "release-candidate", + }); + expect(lookupDatasetById).toHaveBeenCalledWith({ + state, + datasetId: "dataset-id-123", + }); + }); + + test("rejects multiple dataset selectors", () => { + expect(() => + _exportsForTestingOnly.getRunEvalDatasetSelector({ + project_name: "test-project", + dataset_name: "test-dataset", + dataset_version: "123", + dataset_environment: "production", + }), + ).toThrow( + "Cannot specify more than one of dataset_version, dataset_snapshot_name, and dataset_environment.", + ); + }); +}); diff --git a/js/dev/server.ts b/js/dev/server.ts index 511a21ad2..1eda4eb07 100644 --- a/js/dev/server.ts +++ b/js/dev/server.ts @@ -305,97 +305,115 @@ const asyncHandler = Promise.resolve(fn(req, res, next)).catch(next); }; -async function getDataset( - state: BraintrustState, - data: RunEvalRequest["data"], -): Promise> { - if ("project_name" in data) { - const selectorCount = [ - data.dataset_version, - data.dataset_snapshot_name, - data.dataset_environment, - ].filter((value) => value != null).length; - if (selectorCount > 1) { - throw new Error( - "Cannot specify more than one of dataset_version, dataset_snapshot_name, and dataset_environment.", - ); - } - - const commonArgs = { - state, - project: data.project_name, - dataset: data.dataset_name, - _internal_btql: data._internal_btql ?? undefined, - }; - - if (data.dataset_version != null) { - return initDataset({ - ...commonArgs, - version: data.dataset_version, - }); +type RunEvalDatasetSelector = + | { + version: string; + environment?: never; + snapshotName?: never; } - if (data.dataset_snapshot_name != null) { - return initDataset({ - ...commonArgs, - snapshotName: data.dataset_snapshot_name, - }); + | { + version?: never; + environment: string; + snapshotName?: never; } - if (data.dataset_environment != null) { - return initDataset({ - ...commonArgs, - environment: data.dataset_environment, - }); + | { + version?: never; + environment?: never; + snapshotName: string; } + | { + version?: never; + environment?: never; + snapshotName?: never; + }; + +type RunEvalDatasetReference = + | Extract + | Extract; - return initDataset(commonArgs); +type RunEvalDatasetInitArgs = { + state: BraintrustState; + dataset: string; + _internal_btql?: Record; +} & ( + | { project: string; projectId?: never } + | { project?: never; projectId: string } +) & + RunEvalDatasetSelector; + +const RUN_EVAL_DATASET_SELECTOR_ERROR = + "Cannot specify more than one of dataset_version, dataset_snapshot_name, and dataset_environment."; + +function getRunEvalDatasetSelector( + data: RunEvalDatasetReference, +): RunEvalDatasetSelector { + const selectorCount = [ + data.dataset_version, + data.dataset_snapshot_name, + data.dataset_environment, + ].filter((value) => value != null).length; + + if (selectorCount > 1) { + throw new Error(RUN_EVAL_DATASET_SELECTOR_ERROR); } - if ("dataset_id" in data) { - const selectorCount = [ - data.dataset_version, - data.dataset_snapshot_name, - data.dataset_environment, - ].filter((value) => value != null).length; - if (selectorCount > 1) { - throw new Error( - "Cannot specify more than one of dataset_version, dataset_snapshot_name, and dataset_environment.", - ); - } - const datasetInfo = await getDatasetById({ - state, - datasetId: data.dataset_id, - }); - const commonArgs = { - state, - projectId: datasetInfo.projectId, - dataset: datasetInfo.dataset, - _internal_btql: data._internal_btql ?? undefined, - }; + if (data.dataset_version != null) { + return { version: data.dataset_version }; + } + if (data.dataset_snapshot_name != null) { + return { snapshotName: data.dataset_snapshot_name }; + } + if (data.dataset_environment != null) { + return { environment: data.dataset_environment }; + } - if (data.dataset_version != null) { - return initDataset({ - ...commonArgs, - version: data.dataset_version, - }); - } - if (data.dataset_snapshot_name != null) { - return initDataset({ - ...commonArgs, - snapshotName: data.dataset_snapshot_name, - }); - } - if (data.dataset_environment != null) { - return initDataset({ - ...commonArgs, - environment: data.dataset_environment, - }); - } + return {}; +} + +async function buildRunEvalDatasetInitArgs( + state: BraintrustState, + data: RunEvalDatasetReference, + lookupDatasetById: typeof getDatasetById = getDatasetById, +): Promise { + const commonArgs = { + state, + ...(data._internal_btql != null + ? { _internal_btql: data._internal_btql } + : {}), + ...getRunEvalDatasetSelector(data), + }; - return initDataset(commonArgs); + if ("project_name" in data) { + const args = { + ...commonArgs, + project: data.project_name, + dataset: data.dataset_name, + } satisfies RunEvalDatasetInitArgs; + return args; } - // eslint-disable-next-line @typescript-eslint/consistent-type-assertions - return data.data as EvalCase[]; + const datasetInfo = await lookupDatasetById({ + state, + datasetId: data.dataset_id, + }); + const args = { + ...commonArgs, + projectId: datasetInfo.projectId, + dataset: datasetInfo.dataset, + } satisfies RunEvalDatasetInitArgs; + return args; +} + +async function getDataset( + state: BraintrustState, + data: RunEvalRequest["data"], +): Promise> { + if ("data" in data) { + // eslint-disable-next-line @typescript-eslint/consistent-type-assertions + return data.data as EvalCase[]; + } + + return initDataset(await buildRunEvalDatasetInitArgs(state, data)); } const datasetFetchSchema = z.object({ @@ -419,6 +437,11 @@ async function getDatasetById({ return { projectId: parsed[0].project_id, dataset: parsed[0].name }; } +export const _exportsForTestingOnly = { + buildRunEvalDatasetInitArgs, + getRunEvalDatasetSelector, +}; + function makeScorer( state: BraintrustState, name: string, diff --git a/js/util/generated_types.ts b/js/util/generated_types.ts index e24a44951..b0659d95b 100644 --- a/js/util/generated_types.ts +++ b/js/util/generated_types.ts @@ -1295,6 +1295,9 @@ export const RunEval = z.object({ data: z.union([ z.object({ dataset_id: z.string(), + dataset_version: z.union([z.string(), z.null()]).optional(), + dataset_environment: z.union([z.string(), z.null()]).optional(), + dataset_snapshot_name: z.union([z.string(), z.null()]).optional(), _internal_btql: z .union([z.object({}).partial().passthrough(), z.null()]) .optional(), @@ -1302,6 +1305,9 @@ export const RunEval = z.object({ z.object({ project_name: z.string(), dataset_name: z.string(), + dataset_version: z.union([z.string(), z.null()]).optional(), + dataset_environment: z.union([z.string(), z.null()]).optional(), + dataset_snapshot_name: z.union([z.string(), z.null()]).optional(), _internal_btql: z .union([z.object({}).partial().passthrough(), z.null()]) .optional(), From 945c71d54c50c706f87530e641ca817445c55c65 Mon Sep 17 00:00:00 2001 From: max-braintrust Date: Wed, 8 Apr 2026 10:39:08 -0700 Subject: [PATCH 8/8] make sure dataset selection types are backwards compatible --- js/dev/server.test.ts | 24 ++++++-- js/dev/server.ts | 13 ---- js/src/logger.test.ts | 75 +++++++++++++++++++++++ js/src/logger.ts | 136 +++++++++++++++++++----------------------- 4 files changed, 156 insertions(+), 92 deletions(-) diff --git a/js/dev/server.test.ts b/js/dev/server.test.ts index a853694e9..e7b4d8e3e 100644 --- a/js/dev/server.test.ts +++ b/js/dev/server.test.ts @@ -49,16 +49,30 @@ describe("run eval dataset selector helpers", () => { }); }); - test("rejects multiple dataset selectors", () => { - expect(() => + test("prefers dataset_version over other dataset selectors", () => { + expect( _exportsForTestingOnly.getRunEvalDatasetSelector({ project_name: "test-project", dataset_name: "test-dataset", dataset_version: "123", + dataset_snapshot_name: "release-candidate", dataset_environment: "production", }), - ).toThrow( - "Cannot specify more than one of dataset_version, dataset_snapshot_name, and dataset_environment.", - ); + ).toEqual({ + version: "123", + }); + }); + + test("prefers dataset_snapshot_name over dataset_environment", () => { + expect( + _exportsForTestingOnly.getRunEvalDatasetSelector({ + project_name: "test-project", + dataset_name: "test-dataset", + dataset_snapshot_name: "release-candidate", + dataset_environment: "production", + }), + ).toEqual({ + snapshotName: "release-candidate", + }); }); }); diff --git a/js/dev/server.ts b/js/dev/server.ts index 1eda4eb07..186985c54 100644 --- a/js/dev/server.ts +++ b/js/dev/server.ts @@ -341,22 +341,9 @@ type RunEvalDatasetInitArgs = { ) & RunEvalDatasetSelector; -const RUN_EVAL_DATASET_SELECTOR_ERROR = - "Cannot specify more than one of dataset_version, dataset_snapshot_name, and dataset_environment."; - function getRunEvalDatasetSelector( data: RunEvalDatasetReference, ): RunEvalDatasetSelector { - const selectorCount = [ - data.dataset_version, - data.dataset_snapshot_name, - data.dataset_environment, - ].filter((value) => value != null).length; - - if (selectorCount > 1) { - throw new Error(RUN_EVAL_DATASET_SELECTOR_ERROR); - } - if (data.dataset_version != null) { return { version: data.dataset_version }; } diff --git a/js/src/logger.test.ts b/js/src/logger.test.ts index 626501ad6..cf7cb9913 100644 --- a/js/src/logger.test.ts +++ b/js/src/logger.test.ts @@ -474,6 +474,37 @@ test("init accepts dataset with id and snapshotName", () => { expect(datasetWithSnapshot.snapshotName).toBe("123"); }); +test("initDataset prefers version over environment in eval data", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + version: "123", + environment: "production", + state, + }); + + await expect(dataset.toEvalData()).resolves.toEqual({ + dataset_id: "00000000-0000-0000-0000-000000000002", + dataset_version: "123", + }); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + test("dataset.toEvalData preserves dataset_environment", async () => { const state = await _exportsForTestingOnly.simulateLoginForTests(); vi.spyOn(state, "login").mockResolvedValue(state); @@ -634,6 +665,50 @@ test("init resolves dataset environment before experiment registration", async ( vi.restoreAllMocks(); }); +test("init prefers dataset version over environment before experiment registration", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + const getJson = vi.spyOn(state.apiConn(), "get_json"); + const postJson = vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + experiment: { + id: "00000000-0000-0000-0000-000000000003", + project_id: "00000000-0000-0000-0000-000000000001", + name: "test-experiment", + public: false, + }, + }); + + const experiment = init({ + project: "test-project", + experiment: "test-experiment", + dataset: { + id: "00000000-0000-0000-0000-000000000002", + version: "123", + environment: "production", + }, + setCurrent: false, + state, + }); + + await experiment.id; + + expect(getJson).not.toHaveBeenCalled(); + expect(postJson).toHaveBeenCalledWith( + "api/experiment/register", + expect.objectContaining({ + dataset_id: "00000000-0000-0000-0000-000000000002", + dataset_version: "123", + }), + ); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + test("init resolves dataset snapshots before experiment registration", async () => { const state = await _exportsForTestingOnly.simulateLoginForTests(); vi.spyOn(state, "login").mockResolvedValue(state); diff --git a/js/src/logger.ts b/js/src/logger.ts index 9a2b7ca01..7cf027f57 100644 --- a/js/src/logger.ts +++ b/js/src/logger.ts @@ -3425,27 +3425,11 @@ type InitOpenOption = { open?: IsOpen; }; -type DatasetSelection = - | { - version: string; - environment?: never; - snapshotName?: never; - } - | { - version?: never; - environment: string; - snapshotName?: never; - } - | { - version?: never; - environment?: never; - snapshotName: string; - } - | { - version?: never; - environment?: never; - snapshotName?: never; - }; +type DatasetSelection = { + version?: string; + environment?: string; + snapshotName?: string; +}; /** * Reference to a dataset by ID and optional explicit selector. @@ -3867,7 +3851,7 @@ async function getDatasetSnapshots({ ); } -function countSpecifiedDatasetSelectors({ +function normalizeDatasetSelection({ version, environment, snapshotName, @@ -3875,10 +3859,20 @@ function countSpecifiedDatasetSelectors({ version?: string; environment?: string; snapshotName?: string; -}): number { - return [version, environment, snapshotName].filter( - (value) => value !== undefined, - ).length; +}): DatasetSelection { + if (version !== undefined) { + return { version }; + } + + if (snapshotName !== undefined) { + return { snapshotName }; + } + + if (environment !== undefined) { + return { environment }; + } + + return {}; } async function resolveDatasetSnapshotName({ @@ -3960,38 +3954,34 @@ async function serializeDatasetForExperiment({ state: BraintrustState; }): Promise<{ datasetId: string; datasetVersion?: string }> { if (!Dataset.isDataset(dataset)) { - if (countSpecifiedDatasetSelectors(dataset) > 1) { - throw new Error( - "Cannot specify more than one of dataset.version, dataset.environment, and dataset.snapshotName. Use exactly one selector.", - ); - } + const selection = normalizeDatasetSelection(dataset); - if (dataset.environment !== undefined) { + if (selection.version !== undefined) { return { datasetId: dataset.id, - datasetVersion: await resolveDatasetEnvironment({ - state, - datasetId: dataset.id, - environment: dataset.environment, - }), + datasetVersion: selection.version, }; } - if (dataset.snapshotName !== undefined) { + if (selection.snapshotName !== undefined) { return { datasetId: dataset.id, datasetVersion: await resolveDatasetSnapshotName({ state, datasetId: dataset.id, - snapshotName: dataset.snapshotName, + snapshotName: selection.snapshotName, }), }; } - if (dataset.version !== undefined) { + if (selection.environment !== undefined) { return { datasetId: dataset.id, - datasetVersion: dataset.version, + datasetVersion: await resolveDatasetEnvironment({ + state, + datasetId: dataset.id, + environment: selection.environment, + }), }; } @@ -4001,24 +3991,26 @@ async function serializeDatasetForExperiment({ } const evalData = await dataset.toEvalData(); - if (evalData.dataset_environment !== undefined) { - return { - datasetId: evalData.dataset_id, - datasetVersion: await dataset.version(), - }; - } + const selection = normalizeDatasetSelection({ + version: evalData.dataset_version, + environment: evalData.dataset_environment, + snapshotName: evalData.dataset_snapshot_name, + }); - if (evalData.dataset_snapshot_name !== undefined) { + if (selection.version !== undefined) { return { datasetId: evalData.dataset_id, - datasetVersion: await dataset.version(), + datasetVersion: selection.version, }; } - if (evalData.dataset_version !== undefined) { + if ( + selection.environment !== undefined || + selection.snapshotName !== undefined + ) { return { datasetId: evalData.dataset_id, - datasetVersion: evalData.dataset_version, + datasetVersion: await dataset.version(), }; } @@ -4034,9 +4026,9 @@ async function serializeDatasetForExperiment({ * @param options.project The name of the project to create the dataset in. Must specify at least one of `project` or `projectId`. * @param options.dataset The name of the dataset to create. If not specified, a name will be generated automatically. * @param options.description An optional description of the dataset. - * @param options.version Pin the dataset to a specific version xact_id. - * @param options.snapshotName Pin the dataset to the version captured by this named snapshot. Snapshot names are resolved to a concrete xact_id and throw if no matching snapshot exists. - * @param options.environment Pin the dataset to the version tagged with this environment slug. Throws if the environment lookup fails. + * @param options.version Pin the dataset to a specific version xact_id. If `snapshotName` or `environment` are also provided, `version` takes precedence. + * @param options.snapshotName Pin the dataset to the version captured by this named snapshot. Snapshot names are resolved to a concrete xact_id and throw if no matching snapshot exists. If `environment` is also provided, `snapshotName` takes precedence. + * @param options.environment Pin the dataset to the version tagged with this environment slug. Throws if the environment lookup fails when it is the selected dataset selector. * @param options.appUrl The URL of the Braintrust App. Defaults to https://www.braintrust.dev. * @param options.apiKey The API key to use. If the parameter is not specified, will try to use the `BRAINTRUST_API_KEY` environment variable. If no API key is specified, will prompt the user to login. * @param options.orgName (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple. @@ -4106,18 +4098,14 @@ export function initDataset< state: stateArg, _internal_btql, } = options; - - if ( - countSpecifiedDatasetSelectors({ - version, - environment, - snapshotName, - }) > 1 - ) { - throw new Error( - "Cannot specify more than one of `version`, `environment`, and `snapshotName`. Use exactly one selector.", - ); - } + const selection = normalizeDatasetSelection({ + version, + environment, + snapshotName, + }); + const normalizedVersion = selection.version; + const normalizedEnvironment = selection.environment; + const normalizedSnapshotName = selection.snapshotName; const state = stateArg ?? _globalState; @@ -4159,22 +4147,22 @@ export function initDataset< ); const resolvedVersion = - version !== undefined - ? version - : snapshotName !== undefined + normalizedVersion !== undefined + ? normalizedVersion + : normalizedSnapshotName !== undefined ? new LazyValue(async () => { return await resolveDatasetSnapshotNameForMetadata({ state, lazyMetadata, - snapshotName, + snapshotName: normalizedSnapshotName, }); }) - : environment !== undefined + : normalizedEnvironment !== undefined ? new LazyValue(async () => { return await resolveDatasetEnvironmentForMetadata({ state, lazyMetadata, - environment, + environment: normalizedEnvironment, }); }) : undefined; @@ -4183,8 +4171,8 @@ export function initDataset< stateArg ?? _globalState, lazyMetadata, resolvedVersion, - environment, - snapshotName, + normalizedEnvironment, + normalizedSnapshotName, legacy, _internal_btql, );