EntityProcess · christso · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026
diff --git a/apps/cli/src/commands/eval/run-eval.ts b/apps/cli/src/commands/eval/run-eval.ts
@@ -800,19 +800,79 @@ export async function runEvalCommand(input: RunEvalCommandInput): Promise<void>
     console.log(`Trace file: ${path.resolve(options.traceFile)}`);
   }
 
+  const allResults: EvaluationResult[] = [];
+
+  // ── TypeScript eval file support ──────────────────────────────────
+  // Detect .ts/.js files and handle them via the Eval() API instead of the YAML pipeline
+  const tsEvalFiles = resolvedTestFiles.filter(
+    (f) => f.endsWith('.ts') || f.endsWith('.js') || f.endsWith('.mts') || f.endsWith('.mjs'),
+  );
+  const yamlEvalFiles = resolvedTestFiles.filter((f) => !tsEvalFiles.includes(f));
+
+  if (tsEvalFiles.length > 0) {
+    const { getRegisteredEvals, clearEvalRegistry } = await import('@agentv/core');
+
+    for (const tsFile of tsEvalFiles) {
+      await ensureFileExists(tsFile, 'TypeScript eval file');
+      clearEvalRegistry();
+
+      // Import the TS file — Eval() calls register during import
+      await import(pathToFileURL(tsFile).href);
+
+      const registered = getRegisteredEvals();
+      if (registered.size === 0) {
+        throw new Error(`No Eval() calls found in ${tsFile}`);
+      }
+
+      // Wait for all registered eval promises and collect results
+      for (const [evalName, entry] of registered) {
+        try {
+          const evalResult = await entry.promise;
+          // Print summary per eval
+          const passCount = evalResult.summary.passed;
+          const totalCount = evalResult.summary.total;
+          const meanScore = evalResult.summary.meanScore.toFixed(2);
+          console.log(
+            `  ${evalName}: ${passCount}/${totalCount} passed (mean score: ${meanScore})`,
+          );
+
+          // Write results to output and collect for overall summary
+          for (const result of evalResult.results) {
+            await outputWriter.append(result);
+          }
+          allResults.push(...evalResult.results);
+        } catch (err) {
+          console.error(
+            `  ${evalName}: ERROR — ${err instanceof Error ? err.message : String(err)}`,
+          );
+        }
+      }
+
+      clearEvalRegistry();
+    }
+
+    // If there are no YAML files, finalize output and return
+    if (yamlEvalFiles.length === 0) {
+      await outputWriter.close().catch(() => undefined);
+      const summary = calculateEvaluationSummary(allResults);
+      console.log(formatEvaluationSummary(summary));
+      if (allResults.length > 0) {
+        console.log(`\nResults written to: ${outputPath}`);
+      }
+      return;
+    }
+  }
+
+  // ── YAML eval file pipeline (existing) ─────────────────────────────
   // Determine cache state after loading file metadata (need YAML config)
   // We defer cache creation until after file metadata is loaded
   const evaluationRunner = await resolveEvaluationRunner();
-  const allResults: EvaluationResult[] = [];
   const seenEvalCases = new Set<string>();
   const displayIdTracker = createDisplayIdTracker();
 
   // Derive file-level concurrency from worker count (global) when provided
   const totalWorkers = options.workers ?? DEFAULT_WORKERS;
-  const fileConcurrency = Math.min(
-    Math.max(1, totalWorkers),
-    Math.max(1, resolvedTestFiles.length),
-  );
+  const fileConcurrency = Math.min(Math.max(1, totalWorkers), Math.max(1, yamlEvalFiles.length));
   const perFileWorkers = options.workers
     ? Math.max(1, Math.floor(totalWorkers / fileConcurrency))
     : undefined;
@@ -833,7 +893,7 @@ export async function runEvalCommand(input: RunEvalCommandInput): Promise<void>
       readonly failOnError?: FailOnError;
     }
   >();
-  for (const testFilePath of resolvedTestFiles) {
+  for (const testFilePath of yamlEvalFiles) {
     const meta = await prepareFileMetadata({
       testFilePath,
       repoRoot,
@@ -936,7 +996,7 @@ export async function runEvalCommand(input: RunEvalCommandInput): Promise<void>
   }
 
   try {
-    await runWithLimit(resolvedTestFiles, fileConcurrency, async (testFilePath) => {
+    await runWithLimit(yamlEvalFiles, fileConcurrency, async (testFilePath) => {
       const targetPrep = fileMetadata.get(testFilePath);
       if (!targetPrep) {
         throw new Error(`Missing metadata for ${testFilePath}`);

diff --git a/apps/cli/src/commands/eval/shared.ts b/apps/cli/src/commands/eval/shared.ts
@@ -3,6 +3,9 @@ import { access, stat } from 'node:fs/promises';
 import path from 'node:path';
 import fg from 'fast-glob';
 
+/** Supported eval file extensions: YAML, JSONL, and TypeScript/JavaScript */
+const EVAL_FILE_RE = /\.(ya?ml|jsonl|ts|js|mts|mjs)$/i;
+
 export async function resolveEvalPaths(evalPaths: string[], cwd: string): Promise<string[]> {
   const normalizedInputs = evalPaths.map((value) => value?.trim()).filter((value) => value);
   if (normalizedInputs.length === 0) {
@@ -19,7 +22,7 @@ export async function resolveEvalPaths(evalPaths: string[], cwd: string): Promis
       : path.resolve(cwd, pattern);
     try {
       const stats = await stat(candidatePath);
-      if (stats.isFile() && /\.(ya?ml|jsonl)$/i.test(candidatePath)) {
+      if (stats.isFile() && EVAL_FILE_RE.test(candidatePath)) {
         results.add(candidatePath);
         continue;
       }
@@ -37,13 +40,13 @@ export async function resolveEvalPaths(evalPaths: string[], cwd: string): Promis
       followSymbolicLinks: true,
     });
 
-    const yamlMatches = matches.filter((filePath) => /\.(ya?ml|jsonl)$/i.test(filePath));
-    if (yamlMatches.length === 0) {
+    const evalMatches = matches.filter((filePath) => EVAL_FILE_RE.test(filePath));
+    if (evalMatches.length === 0) {
       unmatched.push(pattern);
       continue;
     }
 
-    for (const filePath of yamlMatches) {
+    for (const filePath of evalMatches) {
       results.add(path.normalize(filePath));
     }
   }
@@ -52,7 +55,7 @@ export async function resolveEvalPaths(evalPaths: string[], cwd: string): Promis
     throw new Error(
       `No eval files matched: ${unmatched.join(
         ', ',
-      )}. Provide YAML or JSONL paths or globs (e.g., "evals/**/*.yaml", "evals/**/*.jsonl").`,
+      )}. Provide YAML, JSONL, or TypeScript paths or globs (e.g., "evals/**/*.yaml", "evals/**/*.eval.ts").`,
     );
   }
 

diff --git a/examples/features/sdk-eval-api/evals/basic.eval.ts b/examples/features/sdk-eval-api/evals/basic.eval.ts
@@ -0,0 +1,38 @@
+import { Contains, Eval, ExactMatch } from '@agentv/core';
+
+Eval('sdk-example/basic', {
+  data: [
+    {
+      id: 'greeting',
+      input: 'Say hello',
+      expectedOutput: 'hello',
+      criteria: 'Response should contain a greeting',
+    },
+    {
+      id: 'math',
+      input: 'What is 2+2?',
+      expectedOutput: '4',
+      criteria: 'Response should contain the correct answer',
+    },
+  ],
+  target: { name: 'default', provider: 'mock', response: 'hello, the answer is 4' },
+  assert: [
+    Contains('hello'),
+    ({ output, expectedOutput }) => ({
+      name: 'has-expected',
+      score: output.includes(expectedOutput ?? '') ? 1.0 : 0.0,
+    }),
+  ],
+});
+
+Eval('sdk-example/exact', {
+  data: [
+    {
+      id: 'exact-check',
+      input: 'Echo back: hello world',
+      expectedOutput: 'hello world',
+    },
+  ],
+  target: { name: 'default', provider: 'mock', response: 'hello world' },
+  assert: [ExactMatch],
+});
diff --git a/examples/features/sdk-programmatic-api/evaluate.ts b/examples/features/sdk-programmatic-api/evaluate.ts
@@ -1,38 +1,29 @@
 /**
  * Programmatic API Example
  *
- * Uses evaluate() from @agentv/core to run evaluations as a library.
+ * Uses Eval() from @agentv/core to run evaluations as a library.
  * No YAML needed — tests defined inline with full type safety.
  *
  * Run: bun run evaluate.ts
  * (Uses 'default' target from .agentv/targets.yaml and .env credentials)
  */
-import { evaluate } from '@agentv/core';
+import { Contains, Eval } from '@agentv/core';
 
-const { results, summary } = await evaluate({
-  tests: [
+const { results, summary } = await Eval('programmatic-api-example', {
+  data: [
     {
       id: 'greeting',
       input: 'Say hello and introduce yourself briefly.',
-      expected_output: "Hello! I'm an AI assistant here to help you.",
-      assert: [{ type: 'contains', value: 'Hello' }],
+      expectedOutput: "Hello! I'm an AI assistant here to help you.",
     },
     {
       id: 'json-output',
-      input: [
-        { role: 'system', content: 'Respond only with valid JSON. No markdown.' },
-        { role: 'user', content: 'Return a JSON object with a "status" field set to "ok".' },
-      ],
-      expected_output: '{"status": "ok"}',
-      assert: [
-        { type: 'is-json', required: true },
-        { type: 'contains', value: 'ok' },
-      ],
+      input: 'Return a JSON object with a "status" field set to "ok".',
+      expectedOutput: '{"status": "ok"}',
     },
   ],
-  onResult: (result) => {
-    console.log(`  ${result.testId}: score=${result.score.toFixed(2)}`);
-  },
+  target: { provider: 'mock', response: 'Hello! I am an AI assistant. {"status": "ok"}' },
+  assert: [Contains('Hello'), { type: 'contains', value: 'ok' }],
 });
 
 console.log('\n--- Summary ---');

diff --git a/packages/core/src/evaluation/assertions.ts b/packages/core/src/evaluation/assertions.ts
@@ -0,0 +1,101 @@
+/**
+ * Built-in assertion factories for the Eval() API.
+ *
+ * Each factory returns an AssertFn — a plain function that takes
+ * { input, output, expectedOutput, criteria, metadata } and returns
+ * { name, score }. These wrap the same logic as the built-in evaluator
+ * types but are usable as inline functions in the assert array.
+ */
+
+/** Context passed to inline assertion functions */
+export interface AssertContext {
+  readonly input: string;
+  readonly output: string;
+  readonly expectedOutput?: string;
+  readonly criteria?: string;
+  readonly metadata?: Record<string, unknown>;
+}
+
+/** Result from an inline assertion function */
+export interface AssertResult {
+  readonly name: string;
+  readonly score: number;
+  readonly metadata?: Record<string, unknown>;
+}
+
+/** Inline assertion function signature */
+export type AssertFn = (ctx: AssertContext) => AssertResult | Promise<AssertResult>;
+
+/** Checks if output contains the given substring. */
+export function Contains(value: string): AssertFn {
+  return ({ output }) => ({
+    name: 'contains',
+    score: output.includes(value) ? 1.0 : 0.0,
+  });
+}
+
+/** Case-insensitive contains check. */
+export function IContains(value: string): AssertFn {
+  const lower = value.toLowerCase();
+  return ({ output }) => ({
+    name: 'icontains',
+    score: output.toLowerCase().includes(lower) ? 1.0 : 0.0,
+  });
+}
+
+/** Checks if output contains ALL of the given substrings. */
+export function ContainsAll(values: readonly string[]): AssertFn {
+  return ({ output }) => ({
+    name: 'contains-all',
+    score: values.every((v) => output.includes(v)) ? 1.0 : 0.0,
+  });
+}
+
+/** Checks if output contains ANY of the given substrings. */
+export function ContainsAny(values: readonly string[]): AssertFn {
+  return ({ output }) => ({
+    name: 'contains-any',
+    score: values.some((v) => output.includes(v)) ? 1.0 : 0.0,
+  });
+}
+
+/** Checks if trimmed output exactly equals trimmed expectedOutput. */
+export const ExactMatch: AssertFn = ({ output, expectedOutput }) => ({
+  name: 'exact-match',
+  score: expectedOutput !== undefined && output.trim() === expectedOutput.trim() ? 1.0 : 0.0,
+});
+
+/** Checks if trimmed output starts with the given value. */
+export function StartsWith(value: string): AssertFn {
+  return ({ output }) => ({
+    name: 'starts-with',
+    score: output.trim().startsWith(value.trim()) ? 1.0 : 0.0,
+  });
+}
+
+/** Checks if trimmed output ends with the given value. */
+export function EndsWith(value: string): AssertFn {
+  return ({ output }) => ({
+    name: 'ends-with',
+    score: output.trim().endsWith(value.trim()) ? 1.0 : 0.0,
+  });
+}
+
+/** Checks if output matches the given regex pattern. */
+export function Regex(pattern: string, flags?: string): AssertFn {
+  const re = new RegExp(pattern, flags);
+  return ({ output }) => ({
+    name: 'regex',
+    score: re.test(output) ? 1.0 : 0.0,
+  });
+}
+
+/** Checks if output is valid JSON. */
+export const IsJson: AssertFn = ({ output }) => {
+  try {
+    JSON.parse(output);
+    return { name: 'is-json', score: 1.0 };
+  } catch {
+    return { name: 'is-json', score: 0.0 };
+  }
+};