remove confidence, fix CI

aswink · aswink · commit 1421ef71cfd6 · 2026-03-17T10:58:05.000-07:00
we're not using confidence yet, so leave it out for now, we can always add it in later
diff --git a/js/src/cli/functions/infer-source.ts b/js/src/cli/functions/infer-source.ts
@@ -85,7 +85,7 @@ export async function findCodeDefinition({
       fn =
         location.position.type === "task"
           ? evaluator.task
-          : evaluator.scores[location.position.index];
+          : (evaluator.scores ?? [])[location.position.index];
     }
   } else if (location.type === "function") {
     fn = outFileModule.functions[location.index].handler;
diff --git a/js/src/cli/functions/upload.ts b/js/src/cli/functions/upload.ts
@@ -180,23 +180,25 @@ export async function uploadHandleBundles({
           function_type: "task",
           origin,
         },
-        ...evaluator.evaluator.scores.map((score, i): BundledFunctionSpec => {
-          const name = scorerName(score, i);
-          return {
-            ...baseInfo,
-            // There is a very small chance that someone names a function with the same convention, but
-            // let's assume it's low enough that it doesn't matter.
-            ...formatNameAndSlug(["eval", namePrefix, "scorer", name]),
-            description: `Score ${name} for eval ${namePrefix}`,
-            location: {
-              type: "experiment",
-              eval_name: evaluator.evaluator.evalName,
-              position: { type: "scorer", index: i },
-            },
-            function_type: "scorer",
-            origin,
-          };
-        }),
+        ...(evaluator.evaluator.scores ?? []).map(
+          (score, i): BundledFunctionSpec => {
+            const name = scorerName(score, i);
+            return {
+              ...baseInfo,
+              // There is a very small chance that someone names a function with the same convention, but
+              // let's assume it's low enough that it doesn't matter.
+              ...formatNameAndSlug(["eval", namePrefix, "scorer", name]),
+              description: `Score ${name} for eval ${namePrefix}`,
+              location: {
+                type: "experiment",
+                eval_name: evaluator.evaluator.evalName,
+                position: { type: "scorer", index: i },
+              },
+              function_type: "scorer",
+              origin,
+            };
+          },
+        ),
       ];
 
       bundleSpecs.push(...fileSpecs);
@@ -219,7 +221,7 @@ export async function uploadHandleBundles({
                   serializeRemoteEvalParametersContainer(resolvedParameters),
               }
             : {}),
-          scores: evaluator.evaluator.scores.map((score, i) => ({
+          scores: (evaluator.evaluator.scores ?? []).map((score, i) => ({
             name: scorerName(score, i),
           })),
         };
diff --git a/js/src/framework.test.ts b/js/src/framework.test.ts
@@ -1503,7 +1503,6 @@ test("classifier-only evaluator populates classifications field", async () => {
           name: "category",
           id: "greeting",
           label: "Greeting",
-          confidence: 0.91,
           metadata: { source: "unit-test" },
         }),
       ],
@@ -1517,7 +1516,6 @@ test("classifier-only evaluator populates classifications field", async () => {
     {
       id: "greeting",
       label: "Greeting",
-      confidence: 0.91,
       metadata: { source: "unit-test" },
     },
   ]);
diff --git a/js/src/framework.ts b/js/src/framework.ts
@@ -219,13 +219,17 @@ type ErrorScoreHandler = (args: {
   unhandledScores: string[];
 }) => Record<string, number> | undefined | void;
 
-type EvaluatorBase<
+/**
+ * Defines an evaluator. At least one of `scores` or `classifiers` must be provided;
+ * a runtime error is raised if neither is present.
+ */
+export interface Evaluator<
   Input,
   Output,
   Expected,
   Metadata extends BaseMetadata = DefaultMetadataType,
   Parameters extends EvalParameters = EvalParameters,
-> = {
+> {
   /**
    * A function that returns a list of inputs, expected outputs, and metadata.
    */
@@ -236,6 +240,19 @@ type EvaluatorBase<
    */
   task: EvalTask<Input, Output, Expected, Metadata, Parameters>;
 
+  /**
+   * A set of functions that take an input, output, and expected value and return a {@link Score}.
+   * At least one of `scores` or `classifiers` must be provided.
+   */
+  scores?: EvalScorer<Input, Output, Expected, Metadata>[];
+
+  /**
+   * A set of functions that take an input, output, and expected value and return a
+   * {@link Classification}. Results are recorded under the `classifications` column.
+   * At least one of `scores` or `classifiers` must be provided.
+   */
+  classifiers?: EvalClassifier<Input, Output, Expected, Metadata>[];
+
   /**
    * A set of parameters that will be passed to the evaluator.
    * Can be:
@@ -353,42 +370,7 @@ type EvaluatorBase<
    * Flushes spans before calling scoring functions
    */
   flushBeforeScoring?: boolean;
-};
-
-/**
- * Defines an evaluator. At least one of `scores` or `classifiers` must be provided.
- */
-export type Evaluator<
-  Input,
-  Output,
-  Expected,
-  Metadata extends BaseMetadata = DefaultMetadataType,
-  Parameters extends EvalParameters = EvalParameters,
-> = EvaluatorBase<Input, Output, Expected, Metadata, Parameters> &
-  (
-    | {
-        /**
-         * A set of functions that take an input, output, and expected value and return a {@link Score}.
-         */
-        scores: EvalScorer<Input, Output, Expected, Metadata>[];
-        /**
-         * A set of functions that take an input, output, and expected value and return a
-         * {@link Classification}. Results are recorded under the `classifications` column.
-         */
-        classifiers?: EvalClassifier<Input, Output, Expected, Metadata>[];
-      }
-    | {
-        /**
-         * A set of functions that take an input, output, and expected value and return a {@link Score}.
-         */
-        scores?: EvalScorer<Input, Output, Expected, Metadata>[];
-        /**
-         * A set of functions that take an input, output, and expected value and return a
-         * {@link Classification}. Results are recorded under the `classifications` column.
-         */
-        classifiers: EvalClassifier<Input, Output, Expected, Metadata>[];
-      }
-  );
+}
 
 export class EvalResultWithSummary<
   Input,
@@ -1007,7 +989,6 @@ function toClassificationItem(c: Classification): ClassificationItem {
   return {
     id: c.id,
     label: c.label ?? c.id,
-    ...(c.confidence !== undefined ? { confidence: c.confidence } : {}),
     ...(c.metadata !== undefined ? { metadata: c.metadata } : {}),
   };
 }
diff --git a/js/util/score.ts b/js/util/score.ts
@@ -6,7 +6,6 @@ export interface Classification {
   name: string;
   id: string;
   label?: string;
-  confidence?: number | null;
   metadata?: Record<string, unknown>;
 }
 
@@ -16,7 +15,6 @@ export interface Classification {
 export interface ClassificationItem {
   id: string;
   label: string;
-  confidence?: number | null;
   metadata?: Record<string, unknown>;
 }
 

Original file line number	Diff line number	Diff line change
`@@ -85,7 +85,7 @@ export async function findCodeDefinition({`
`85`	`85`	`fn =`
`86`	`86`	`location.position.type === "task"`
`87`	`87`	`? evaluator.task`
`88`		`- : evaluator.scores[location.position.index];`
	`88`	`+ : (evaluator.scores ?? [])[location.position.index];`
`89`	`89`	`}`
`90`	`90`	`} else if (location.type === "function") {`
`91`	`91`	`fn = outFileModule.functions[location.index].handler;`
Original file line number	Diff line number	Diff line change
`@@ -6,7 +6,6 @@ export interface Classification {`
`6`	`6`	`name: string;`
`7`	`7`	`id: string;`
`8`	`8`	`label?: string;`
`9`		`- confidence?: number \| null;`
`10`	`9`	`metadata?: Record<string, unknown>;`
`11`	`10`	`}`
`12`	`11`
`@@ -16,7 +15,6 @@ export interface Classification {`
`16`	`15`	`export interface ClassificationItem {`
`17`	`16`	`id: string;`
`18`	`17`	`label: string;`
`19`		`- confidence?: number \| null;`
`20`	`18`	`metadata?: Record<string, unknown>;`
`21`	`19`	`}`
`22`	`20`