Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions js/dev/server.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import { describe, expect, test, vi } from "vitest";
import { type BraintrustState } from "../src/logger";
import { _exportsForTestingOnly } from "./server";

describe("run eval dataset selector helpers", () => {
const state = {} as BraintrustState;

test("maps project dataset refs into initDataset args", async () => {
await expect(
_exportsForTestingOnly.buildRunEvalDatasetInitArgs(state, {
project_name: "test-project",
dataset_name: "test-dataset",
dataset_environment: "production",
_internal_btql: { limit: 10 },
}),
).resolves.toEqual({
state,
project: "test-project",
dataset: "test-dataset",
environment: "production",
_internal_btql: { limit: 10 },
});
});

test("maps dataset id refs into initDataset args", async () => {
const lookupDatasetById = vi.fn().mockResolvedValue({
projectId: "project-id-123",
dataset: "resolved-dataset",
});

await expect(
_exportsForTestingOnly.buildRunEvalDatasetInitArgs(
state,
{
dataset_id: "dataset-id-123",
dataset_snapshot_name: "release-candidate",
},
lookupDatasetById,
),
).resolves.toEqual({
state,
projectId: "project-id-123",
dataset: "resolved-dataset",
snapshotName: "release-candidate",
});
expect(lookupDatasetById).toHaveBeenCalledWith({
state,
datasetId: "dataset-id-123",
});
});

test("prefers dataset_version over other dataset selectors", () => {
expect(
_exportsForTestingOnly.getRunEvalDatasetSelector({
project_name: "test-project",
dataset_name: "test-dataset",
dataset_version: "123",
dataset_snapshot_name: "release-candidate",
dataset_environment: "production",
}),
).toEqual({
version: "123",
});
});

test("prefers dataset_snapshot_name over dataset_environment", () => {
expect(
_exportsForTestingOnly.getRunEvalDatasetSelector({
project_name: "test-project",
dataset_name: "test-dataset",
dataset_snapshot_name: "release-candidate",
dataset_environment: "production",
}),
).toEqual({
snapshotName: "release-candidate",
});
});
});
113 changes: 94 additions & 19 deletions js/dev/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -305,32 +305,102 @@ const asyncHandler =
Promise.resolve(fn(req, res, next)).catch(next);
};

async function getDataset(
type RunEvalDatasetSelector =
| {
version: string;
environment?: never;
snapshotName?: never;
}
| {
version?: never;
environment: string;
snapshotName?: never;
}
| {
version?: never;
environment?: never;
snapshotName: string;
}
| {
version?: never;
environment?: never;
snapshotName?: never;
};

type RunEvalDatasetReference =
| Extract<RunEvalRequest["data"], { project_name: string }>
| Extract<RunEvalRequest["data"], { dataset_id: string }>;

type RunEvalDatasetInitArgs = {
state: BraintrustState;
dataset: string;
_internal_btql?: Record<string, unknown>;
} & (
| { project: string; projectId?: never }
| { project?: never; projectId: string }
) &
RunEvalDatasetSelector;

function getRunEvalDatasetSelector(
data: RunEvalDatasetReference,
): RunEvalDatasetSelector {
if (data.dataset_version != null) {
return { version: data.dataset_version };
}
if (data.dataset_snapshot_name != null) {
return { snapshotName: data.dataset_snapshot_name };
}
if (data.dataset_environment != null) {
return { environment: data.dataset_environment };
}

return {};
}

async function buildRunEvalDatasetInitArgs(
state: BraintrustState,
data: RunEvalRequest["data"],
): Promise<EvalData<unknown, unknown, BaseMetadata>> {
data: RunEvalDatasetReference,
lookupDatasetById: typeof getDatasetById = getDatasetById,
): Promise<RunEvalDatasetInitArgs> {
const commonArgs = {
state,
...(data._internal_btql != null
? { _internal_btql: data._internal_btql }
: {}),
...getRunEvalDatasetSelector(data),
};

if ("project_name" in data) {
return initDataset({
state,
const args = {
...commonArgs,
project: data.project_name,
dataset: data.dataset_name,
_internal_btql: data._internal_btql ?? undefined,
});
} else if ("dataset_id" in data) {
const datasetInfo = await getDatasetById({
state,
datasetId: data.dataset_id,
});
return initDataset({
state,
projectId: datasetInfo.projectId,
dataset: datasetInfo.dataset,
_internal_btql: data._internal_btql ?? undefined,
});
} else {
} satisfies RunEvalDatasetInitArgs;
return args;
}

const datasetInfo = await lookupDatasetById({
state,
datasetId: data.dataset_id,
});
const args = {
...commonArgs,
projectId: datasetInfo.projectId,
dataset: datasetInfo.dataset,
} satisfies RunEvalDatasetInitArgs;
return args;
}

async function getDataset(
state: BraintrustState,
data: RunEvalRequest["data"],
): Promise<EvalData<unknown, unknown, BaseMetadata>> {
if ("data" in data) {
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
return data.data as EvalCase<unknown, unknown, BaseMetadata>[];
}

return initDataset(await buildRunEvalDatasetInitArgs(state, data));
}

const datasetFetchSchema = z.object({
Expand All @@ -354,6 +424,11 @@ async function getDatasetById({
return { projectId: parsed[0].project_id, dataset: parsed[0].name };
}

export const _exportsForTestingOnly = {
buildRunEvalDatasetInitArgs,
getRunEvalDatasetSelector,
};

function makeScorer(
state: BraintrustState,
name: string,
Expand Down
147 changes: 147 additions & 0 deletions js/src/cli/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@
import {
login,
init as _initExperiment,
initDataset,
Experiment,
BaseMetadata,
Dataset,
type ParametersRef,
RemoteEvalParameters,
_internalGetGlobalState,
} from "../logger";
import type { ProgressReporter } from "../reporters/types";
import {
Expand Down Expand Up @@ -1032,13 +1034,93 @@
});
}

interface DatasetCommandArgs {
api_key?: string;
org_name?: string;
app_url?: string;
project: string;
dataset: string;
debug_logging?: "error" | "warn" | "info" | "debug";
}

async function openDataset(args: DatasetCommandArgs) {
await login({
apiKey: args.api_key,
orgName: args.org_name,
appUrl: args.app_url,
debugLogLevel: args.debug_logging,
});
return initDataset({
project: args.project,
dataset: args.dataset,
});
}

async function datasetSnapshotCommand(
args: DatasetCommandArgs & { name: string; description?: string },
) {
const dataset = await openDataset(args);
const snapshot = await dataset.createSnapshot({
name: args.name,
description: args.description,
});
console.log(
JSON.stringify(
{
id: snapshot.id,
name: snapshot.name,
xact_id: snapshot.xact_id,
created: snapshot.created,
},
null,
2,
),
);
}

async function datasetSnapshotsCommand(args: DatasetCommandArgs) {
const dataset = await openDataset(args);
const snapshots = await dataset.listSnapshots();
if (snapshots.length === 0) {
console.log("No snapshots.");
return;
}
for (const snap of snapshots) {

Check failure on line 1088 in js/src/cli/index.ts

View workflow job for this annotation

GitHub Actions / lint

'console.log' is restricted from being used. Use debugLogger instead of console for SDK logging
console.log(
`${snap.name}\txact_id=${snap.xact_id}\tid=${snap.id}\tcreated=${snap.created}`,
);
}
}

async function datasetTagEnvCommand(
args: DatasetCommandArgs & { env: string; version?: string },
) {
const dataset = await openDataset(args);
const datasetId = await dataset.id;
const objectVersion = args.version ?? (await dataset.version());
if (!objectVersion) {
console.error("Dataset has no records — nothing to tag.");
process.exit(1);
}
const state = _internalGetGlobalState();
await state

Check failure on line 1106 in js/src/cli/index.ts

View workflow job for this annotation

GitHub Actions / lint

'console.log' is restricted from being used. Use debugLogger instead of console for SDK logging
.apiConn()
.put_json(
`environment-object/dataset/${datasetId}/${encodeURIComponent(args.env)}`,
{ object_version: objectVersion },

Check failure on line 1110 in js/src/cli/index.ts

View workflow job for this annotation

GitHub Actions / lint

'console.log' is restricted from being used. Use debugLogger instead of console for SDK logging
);
console.log(
`Tagged version ${objectVersion} with environment "${args.env}".`,
);
}

async function main() {
const parser = new ArgumentParser({
description: "Braintrust CLI",
});

parser.add_argument("-v", "--version", { action: "version", version });

Check failure on line 1123 in js/src/cli/index.ts

View workflow job for this annotation

GitHub Actions / lint

'console.error' is restricted from being used. Use debugLogger instead of console for SDK logging
const parentParser = new ArgumentParser({ add_help: false });
parentParser.add_argument("--verbose", {
action: "store_true",
Expand All @@ -1048,7 +1130,7 @@
const subparser = parser.add_subparsers({
required: true,
});

Check failure on line 1133 in js/src/cli/index.ts

View workflow job for this annotation

GitHub Actions / lint

'console.log' is restricted from being used. Use debugLogger instead of console for SDK logging
const parser_run = subparser.add_parser("eval", {
help: "Run evals locally.",
parents: [parentParser],
Expand Down Expand Up @@ -1157,6 +1239,71 @@
});
parser_pull.set_defaults({ func: pullCommand });

// -- dataset-snapshot: create a named snapshot --
const parser_dataset_snapshot = subparser.add_parser("dataset-snapshot", {
help: "Create a named snapshot of a dataset's current version.",
parents: [parentParser],
});
addAuthArgs(parser_dataset_snapshot);
addDebugLoggingArg(parser_dataset_snapshot);
parser_dataset_snapshot.add_argument("--project", {
help: "The project containing the dataset.",
required: true,
});
parser_dataset_snapshot.add_argument("--dataset", {
help: "The name of the dataset.",
required: true,
});
parser_dataset_snapshot.add_argument("--name", {
help: "A name for the snapshot.",
required: true,
});
parser_dataset_snapshot.add_argument("--description", {
help: "An optional description for the snapshot.",
});
parser_dataset_snapshot.set_defaults({ func: datasetSnapshotCommand });

// -- dataset-snapshots: list snapshots --
const parser_dataset_snapshots = subparser.add_parser("dataset-snapshots", {
help: "List all named snapshots for a dataset.",
parents: [parentParser],
});
addAuthArgs(parser_dataset_snapshots);
addDebugLoggingArg(parser_dataset_snapshots);
parser_dataset_snapshots.add_argument("--project", {
help: "The project containing the dataset.",
required: true,
});
parser_dataset_snapshots.add_argument("--dataset", {
help: "The name of the dataset.",
required: true,
});
parser_dataset_snapshots.set_defaults({ func: datasetSnapshotsCommand });

// -- dataset-tag-env: tag a version with an environment --
const parser_dataset_tag_env = subparser.add_parser("dataset-tag-env", {
help: "Tag a dataset version with an environment (e.g. staging, production).",
parents: [parentParser],
});
addAuthArgs(parser_dataset_tag_env);
addDebugLoggingArg(parser_dataset_tag_env);
parser_dataset_tag_env.add_argument("--project", {
help: "The project containing the dataset.",
required: true,
});
parser_dataset_tag_env.add_argument("--dataset", {
help: "The name of the dataset.",
required: true,
});
parser_dataset_tag_env.add_argument("--env", {
help: "The environment slug to tag (e.g. staging, production).",
required: true,
});
parser_dataset_tag_env.add_argument("--version", {
help: "The version (xact_id) to tag. If omitted, uses the dataset's current version.",
});
parser_dataset_tag_env.set_defaults({ func: datasetTagEnvCommand });

const parsed = normalizeDebugLoggingArgs(parser.parse_args());

try {
Expand Down
1 change: 1 addition & 0 deletions js/src/exports.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export type {
CompiledPromptParams,
CompletionPrompt,
ContextParentSpanIds,
DatasetSnapshot,
DataSummary,
DatasetSummary,
DefaultMetadataType,
Expand Down
Loading
Loading