Skip to content

Commit 1421ef7

Browse files
committed
remove confidence, fix CI
we're not using confidence yet, so leave it out for now, we can always add it in later
1 parent 0eaf765 commit 1421ef7

5 files changed

Lines changed: 41 additions & 62 deletions

File tree

js/src/cli/functions/infer-source.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ export async function findCodeDefinition({
8585
fn =
8686
location.position.type === "task"
8787
? evaluator.task
88-
: evaluator.scores[location.position.index];
88+
: (evaluator.scores ?? [])[location.position.index];
8989
}
9090
} else if (location.type === "function") {
9191
fn = outFileModule.functions[location.index].handler;

js/src/cli/functions/upload.ts

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -180,23 +180,25 @@ export async function uploadHandleBundles({
180180
function_type: "task",
181181
origin,
182182
},
183-
...evaluator.evaluator.scores.map((score, i): BundledFunctionSpec => {
184-
const name = scorerName(score, i);
185-
return {
186-
...baseInfo,
187-
// There is a very small chance that someone names a function with the same convention, but
188-
// let's assume it's low enough that it doesn't matter.
189-
...formatNameAndSlug(["eval", namePrefix, "scorer", name]),
190-
description: `Score ${name} for eval ${namePrefix}`,
191-
location: {
192-
type: "experiment",
193-
eval_name: evaluator.evaluator.evalName,
194-
position: { type: "scorer", index: i },
195-
},
196-
function_type: "scorer",
197-
origin,
198-
};
199-
}),
183+
...(evaluator.evaluator.scores ?? []).map(
184+
(score, i): BundledFunctionSpec => {
185+
const name = scorerName(score, i);
186+
return {
187+
...baseInfo,
188+
// There is a very small chance that someone names a function with the same convention, but
189+
// let's assume it's low enough that it doesn't matter.
190+
...formatNameAndSlug(["eval", namePrefix, "scorer", name]),
191+
description: `Score ${name} for eval ${namePrefix}`,
192+
location: {
193+
type: "experiment",
194+
eval_name: evaluator.evaluator.evalName,
195+
position: { type: "scorer", index: i },
196+
},
197+
function_type: "scorer",
198+
origin,
199+
};
200+
},
201+
),
200202
];
201203

202204
bundleSpecs.push(...fileSpecs);
@@ -219,7 +221,7 @@ export async function uploadHandleBundles({
219221
serializeRemoteEvalParametersContainer(resolvedParameters),
220222
}
221223
: {}),
222-
scores: evaluator.evaluator.scores.map((score, i) => ({
224+
scores: (evaluator.evaluator.scores ?? []).map((score, i) => ({
223225
name: scorerName(score, i),
224226
})),
225227
};

js/src/framework.test.ts

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1503,7 +1503,6 @@ test("classifier-only evaluator populates classifications field", async () => {
15031503
name: "category",
15041504
id: "greeting",
15051505
label: "Greeting",
1506-
confidence: 0.91,
15071506
metadata: { source: "unit-test" },
15081507
}),
15091508
],
@@ -1517,7 +1516,6 @@ test("classifier-only evaluator populates classifications field", async () => {
15171516
{
15181517
id: "greeting",
15191518
label: "Greeting",
1520-
confidence: 0.91,
15211519
metadata: { source: "unit-test" },
15221520
},
15231521
]);

js/src/framework.ts

Lines changed: 20 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -219,13 +219,17 @@ type ErrorScoreHandler = (args: {
219219
unhandledScores: string[];
220220
}) => Record<string, number> | undefined | void;
221221

222-
type EvaluatorBase<
222+
/**
223+
* Defines an evaluator. At least one of `scores` or `classifiers` must be provided;
224+
* a runtime error is raised if neither is present.
225+
*/
226+
export interface Evaluator<
223227
Input,
224228
Output,
225229
Expected,
226230
Metadata extends BaseMetadata = DefaultMetadataType,
227231
Parameters extends EvalParameters = EvalParameters,
228-
> = {
232+
> {
229233
/**
230234
* A function that returns a list of inputs, expected outputs, and metadata.
231235
*/
@@ -236,6 +240,19 @@ type EvaluatorBase<
236240
*/
237241
task: EvalTask<Input, Output, Expected, Metadata, Parameters>;
238242

243+
/**
244+
* A set of functions that take an input, output, and expected value and return a {@link Score}.
245+
* At least one of `scores` or `classifiers` must be provided.
246+
*/
247+
scores?: EvalScorer<Input, Output, Expected, Metadata>[];
248+
249+
/**
250+
* A set of functions that take an input, output, and expected value and return a
251+
* {@link Classification}. Results are recorded under the `classifications` column.
252+
* At least one of `scores` or `classifiers` must be provided.
253+
*/
254+
classifiers?: EvalClassifier<Input, Output, Expected, Metadata>[];
255+
239256
/**
240257
* A set of parameters that will be passed to the evaluator.
241258
* Can be:
@@ -353,42 +370,7 @@ type EvaluatorBase<
353370
* Flushes spans before calling scoring functions
354371
*/
355372
flushBeforeScoring?: boolean;
356-
};
357-
358-
/**
359-
* Defines an evaluator. At least one of `scores` or `classifiers` must be provided.
360-
*/
361-
export type Evaluator<
362-
Input,
363-
Output,
364-
Expected,
365-
Metadata extends BaseMetadata = DefaultMetadataType,
366-
Parameters extends EvalParameters = EvalParameters,
367-
> = EvaluatorBase<Input, Output, Expected, Metadata, Parameters> &
368-
(
369-
| {
370-
/**
371-
* A set of functions that take an input, output, and expected value and return a {@link Score}.
372-
*/
373-
scores: EvalScorer<Input, Output, Expected, Metadata>[];
374-
/**
375-
* A set of functions that take an input, output, and expected value and return a
376-
* {@link Classification}. Results are recorded under the `classifications` column.
377-
*/
378-
classifiers?: EvalClassifier<Input, Output, Expected, Metadata>[];
379-
}
380-
| {
381-
/**
382-
* A set of functions that take an input, output, and expected value and return a {@link Score}.
383-
*/
384-
scores?: EvalScorer<Input, Output, Expected, Metadata>[];
385-
/**
386-
* A set of functions that take an input, output, and expected value and return a
387-
* {@link Classification}. Results are recorded under the `classifications` column.
388-
*/
389-
classifiers: EvalClassifier<Input, Output, Expected, Metadata>[];
390-
}
391-
);
373+
}
392374

393375
export class EvalResultWithSummary<
394376
Input,
@@ -1007,7 +989,6 @@ function toClassificationItem(c: Classification): ClassificationItem {
1007989
return {
1008990
id: c.id,
1009991
label: c.label ?? c.id,
1010-
...(c.confidence !== undefined ? { confidence: c.confidence } : {}),
1011992
...(c.metadata !== undefined ? { metadata: c.metadata } : {}),
1012993
};
1013994
}

js/util/score.ts

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ export interface Classification {
66
name: string;
77
id: string;
88
label?: string;
9-
confidence?: number | null;
109
metadata?: Record<string, unknown>;
1110
}
1211

@@ -16,7 +15,6 @@ export interface Classification {
1615
export interface ClassificationItem {
1716
id: string;
1817
label: string;
19-
confidence?: number | null;
2018
metadata?: Record<string, unknown>;
2119
}
2220

0 commit comments

Comments
 (0)