From d024dcd6fd34386cad070fc1111c1f21b6d20b28 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Fri, 13 Mar 2026 09:00:09 +0000
Subject: [PATCH 01/10] feat(core): add built-in assertion factories for Eval()
 API

---
 packages/core/src/evaluation/assertions.ts    | 101 +++++++++++++++
 .../core/test/evaluation/assertions.test.ts   | 120 ++++++++++++++++++
 2 files changed, 221 insertions(+)
 create mode 100644 packages/core/src/evaluation/assertions.ts
 create mode 100644 packages/core/test/evaluation/assertions.test.ts
diff --git a/packages/core/src/evaluation/assertions.ts b/packages/core/src/evaluation/assertions.ts
new file mode 100644
index 00000000..d7f3803b
--- /dev/null
+++ b/packages/core/src/evaluation/assertions.ts
@@ -0,0 +1,101 @@
+/**
+ * Built-in assertion factories for the Eval() API.
+ *
+ * Each factory returns an AssertFn — a plain function that takes
+ * { input, output, expectedOutput, criteria, metadata } and returns
+ * { name, score }. These wrap the same logic as the built-in evaluator
+ * types but are usable as inline functions in the assert array.
+ */
+
+/** Context passed to inline assertion functions */
+export interface AssertContext {
+  readonly input: string;
+  readonly output: string;
+  readonly expectedOutput?: string;
+  readonly criteria?: string;
+  readonly metadata?: Record<string, unknown>;
+}
+
+/** Result from an inline assertion function */
+export interface AssertResult {
+  readonly name: string;
+  readonly score: number;
+  readonly metadata?: Record<string, unknown>;
+}
+
+/** Inline assertion function signature */
+export type AssertFn = (ctx: AssertContext) => AssertResult | Promise<AssertResult>;
+
+/** Checks if output contains the given substring. */
+export function Contains(value: string): AssertFn {
+  return ({ output }) => ({
+    name: 'contains',
+    score: output.includes(value) ? 1.0 : 0.0,
+  });
+}
+
+/** Case-insensitive contains check. */
+export function IContains(value: string): AssertFn {
+  const lower = value.toLowerCase();
+  return ({ output }) => ({
+    name: 'icontains',
+    score: output.toLowerCase().includes(lower) ? 1.0 : 0.0,
+  });
+}
+
+/** Checks if output contains ALL of the given substrings. */
+export function ContainsAll(values: readonly string[]): AssertFn {
+  return ({ output }) => ({
+    name: 'contains-all',
+    score: values.every((v) => output.includes(v)) ? 1.0 : 0.0,
+  });
+}
+
+/** Checks if output contains ANY of the given substrings. */
+export function ContainsAny(values: readonly string[]): AssertFn {
+  return ({ output }) => ({
+    name: 'contains-any',
+    score: values.some((v) => output.includes(v)) ? 1.0 : 0.0,
+  });
+}
+
+/** Checks if trimmed output exactly equals trimmed expectedOutput. */
+export const ExactMatch: AssertFn = ({ output, expectedOutput }) => ({
+  name: 'exact-match',
+  score: expectedOutput !== undefined && output.trim() === expectedOutput.trim() ? 1.0 : 0.0,
+});
+
+/** Checks if trimmed output starts with the given value. */
+export function StartsWith(value: string): AssertFn {
+  return ({ output }) => ({
+    name: 'starts-with',
+    score: output.trim().startsWith(value.trim()) ? 1.0 : 0.0,
+  });
+}
+
+/** Checks if trimmed output ends with the given value. */
+export function EndsWith(value: string): AssertFn {
+  return ({ output }) => ({
+    name: 'ends-with',
+    score: output.trim().endsWith(value.trim()) ? 1.0 : 0.0,
+  });
+}
+
+/** Checks if output matches the given regex pattern. */
+export function Regex(pattern: string, flags?: string): AssertFn {
+  const re = new RegExp(pattern, flags);
+  return ({ output }) => ({
+    name: 'regex',
+    score: re.test(output) ? 1.0 : 0.0,
+  });
+}
+
+/** Checks if output is valid JSON. */
+export const IsJson: AssertFn = ({ output }) => {
+  try {
+    JSON.parse(output);
+    return { name: 'is-json', score: 1.0 };
+  } catch {
+    return { name: 'is-json', score: 0.0 };
+  }
+};
diff --git a/packages/core/test/evaluation/assertions.test.ts b/packages/core/test/evaluation/assertions.test.ts
new file mode 100644
index 00000000..42bb1bd4
--- /dev/null
+++ b/packages/core/test/evaluation/assertions.test.ts
@@ -0,0 +1,120 @@
+import { describe, expect, it } from 'vitest';
+import {
+  Contains,
+  ContainsAll,
+  ContainsAny,
+  EndsWith,
+  ExactMatch,
+  IContains,
+  IsJson,
+  Regex,
+  StartsWith,
+} from '../../src/evaluation/assertions.js';
+
+const ctx = (output: string, expectedOutput?: string) => ({
+  input: 'test-input',
+  output,
+  expectedOutput,
+});
+
+describe('Contains', () => {
+  it('scores 1.0 when output contains value', () => {
+    const fn = Contains('hello');
+    const result = fn(ctx('hello world'));
+    expect(result.score).toBe(1.0);
+    expect(result.name).toBe('contains');
+  });
+
+  it('scores 0.0 when output does not contain value', () => {
+    const fn = Contains('goodbye');
+    const result = fn(ctx('hello world'));
+    expect(result.score).toBe(0.0);
+  });
+});
+
+describe('IContains', () => {
+  it('scores 1.0 case-insensitively', () => {
+    const fn = IContains('HELLO');
+    const result = fn(ctx('hello world'));
+    expect(result.score).toBe(1.0);
+  });
+});
+
+describe('ContainsAll', () => {
+  it('scores 1.0 when all values present', () => {
+    const fn = ContainsAll(['hello', 'world']);
+    expect(fn(ctx('hello world')).score).toBe(1.0);
+  });
+
+  it('scores 0.0 when any value missing', () => {
+    const fn = ContainsAll(['hello', 'goodbye']);
+    expect(fn(ctx('hello world')).score).toBe(0.0);
+  });
+});
+
+describe('ContainsAny', () => {
+  it('scores 1.0 when any value present', () => {
+    const fn = ContainsAny(['goodbye', 'world']);
+    expect(fn(ctx('hello world')).score).toBe(1.0);
+  });
+
+  it('scores 0.0 when no values present', () => {
+    const fn = ContainsAny(['goodbye', 'farewell']);
+    expect(fn(ctx('hello world')).score).toBe(0.0);
+  });
+});
+
+describe('ExactMatch', () => {
+  it('scores 1.0 on exact match (trimmed)', () => {
+    expect(ExactMatch(ctx('hello', 'hello')).score).toBe(1.0);
+    expect(ExactMatch(ctx('  hello  ', 'hello')).score).toBe(1.0);
+  });
+
+  it('scores 0.0 on mismatch', () => {
+    expect(ExactMatch(ctx('hello', 'world')).score).toBe(0.0);
+  });
+
+  it('scores 0.0 when no expectedOutput', () => {
+    expect(ExactMatch(ctx('hello')).score).toBe(0.0);
+  });
+});
+
+describe('StartsWith', () => {
+  it('scores 1.0 when output starts with value', () => {
+    expect(StartsWith('hello')(ctx('hello world')).score).toBe(1.0);
+  });
+
+  it('scores 0.0 when it does not', () => {
+    expect(StartsWith('world')(ctx('hello world')).score).toBe(0.0);
+  });
+});
+
+describe('EndsWith', () => {
+  it('scores 1.0 when output ends with value', () => {
+    expect(EndsWith('world')(ctx('hello world')).score).toBe(1.0);
+  });
+});
+
+describe('Regex', () => {
+  it('scores 1.0 on match', () => {
+    expect(Regex('\\d+')(ctx('abc 123 def')).score).toBe(1.0);
+  });
+
+  it('scores 0.0 on no match', () => {
+    expect(Regex('^\\d+$')(ctx('abc')).score).toBe(0.0);
+  });
+
+  it('supports flags', () => {
+    expect(Regex('HELLO', 'i')(ctx('hello')).score).toBe(1.0);
+  });
+});
+
+describe('IsJson', () => {
+  it('scores 1.0 for valid JSON', () => {
+    expect(IsJson(ctx('{"a": 1}')).score).toBe(1.0);
+  });
+
+  it('scores 0.0 for invalid JSON', () => {
+    expect(IsJson(ctx('not json')).score).toBe(0.0);
+  });
+});

From c4cc915be09ee4bf1163b4c10ce4bf630ba44cff Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Fri, 13 Mar 2026 09:00:56 +0000
Subject: [PATCH 02/10] feat(core): add function provider for Eval() task
 functions

---
 .../evaluation/providers/function-provider.ts | 27 ++++++++++++
 .../providers/function-provider.test.ts       | 43 +++++++++++++++++++
 2 files changed, 70 insertions(+)
 create mode 100644 packages/core/src/evaluation/providers/function-provider.ts
 create mode 100644 packages/core/test/evaluation/providers/function-provider.test.ts

diff --git a/packages/core/src/evaluation/providers/function-provider.ts b/packages/core/src/evaluation/providers/function-provider.ts
new file mode 100644
index 00000000..27bd5b37
--- /dev/null
+++ b/packages/core/src/evaluation/providers/function-provider.ts
@@ -0,0 +1,27 @@
+import type { Provider, ProviderKind, ProviderRequest, ProviderResponse } from './types.js';
+
+/**
+ * Wraps a user-provided task function as a Provider.
+ * Used by Eval() when `task` is specified instead of `target`.
+ */
+export function createFunctionProvider(
+  taskFn: (input: string) => string | Promise<string>,
+): Provider {
+  return {
+    id: 'function-provider',
+    kind: 'mock' as ProviderKind,
+    targetName: 'custom-task',
+    async invoke(request: ProviderRequest): Promise<ProviderResponse> {
+      const startTime = new Date().toISOString();
+      const start = Date.now();
+      const result = await taskFn(request.question);
+      const endTime = new Date().toISOString();
+      return {
+        output: [{ role: 'assistant', content: result }],
+        durationMs: Date.now() - start,
+        startTime,
+        endTime,
+      };
+    },
+  };
+}
diff --git a/packages/core/test/evaluation/providers/function-provider.test.ts b/packages/core/test/evaluation/providers/function-provider.test.ts
new file mode 100644
index 00000000..71a5cae7
--- /dev/null
+++ b/packages/core/test/evaluation/providers/function-provider.test.ts
@@ -0,0 +1,43 @@
+import { describe, expect, it } from 'vitest';
+import { createFunctionProvider } from '../../../src/evaluation/providers/function-provider.js';
+
+describe('createFunctionProvider', () => {
+  it('wraps a sync task function as a Provider', async () => {
+    const provider = createFunctionProvider((input) => `Echo: ${input}`);
+
+    expect(provider.id).toBe('function-provider');
+    expect(provider.targetName).toBe('custom-task');
+
+    const response = await provider.invoke({ question: 'hello' });
+    expect(response.output).toHaveLength(1);
+    expect(response.output![0].role).toBe('assistant');
+    expect(response.output![0].content).toBe('Echo: hello');
+  });
+
+  it('wraps an async task function', async () => {
+    const provider = createFunctionProvider(async (input) => {
+      return `Async: ${input}`;
+    });
+
+    const response = await provider.invoke({ question: 'world' });
+    expect(response.output![0].content).toBe('Async: world');
+  });
+
+  it('measures duration', async () => {
+    const provider = createFunctionProvider(async () => {
+      await new Promise((r) => setTimeout(r, 50));
+      return 'done';
+    });
+
+    const response = await provider.invoke({ question: 'test' });
+    expect(response.durationMs).toBeGreaterThanOrEqual(40);
+  });
+
+  it('propagates errors from the task function', async () => {
+    const provider = createFunctionProvider(() => {
+      throw new Error('task failed');
+    });
+
+    await expect(provider.invoke({ question: 'test' })).rejects.toThrow('task failed');
+  });
+});

From ffeadcf11ffe24c02b3749dcc9b7c0101fc71fd4 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Fri, 13 Mar 2026 09:03:42 +0000
Subject: [PATCH 03/10] feat(core): add Eval() function with registry and
 execution

---
 packages/core/src/evaluation/eval-api.ts      | 262 ++++++++++++++++++
 .../core/test/evaluation/eval-api.test.ts     | 153 ++++++++++
 2 files changed, 415 insertions(+)
 create mode 100644 packages/core/src/evaluation/eval-api.ts
 create mode 100644 packages/core/test/evaluation/eval-api.test.ts

diff --git a/packages/core/src/evaluation/eval-api.ts b/packages/core/src/evaluation/eval-api.ts
new file mode 100644
index 00000000..85d8e82b
--- /dev/null
+++ b/packages/core/src/evaluation/eval-api.ts
@@ -0,0 +1,262 @@
+/**
+ * Declarative Eval() API for single-file TypeScript evaluations.
+ *
+ * Eval() registers an eval in a global registry and returns a Promise
+ * with results. Serves both CLI discovery (module-level, no await) and
+ * programmatic use (await for results).
+ *
+ * @example Single-file eval with built-in target
+ * ```typescript
+ * import { Eval, Contains } from '@agentv/core';
+ *
+ * Eval('my-eval', {
+ *   data: [{ input: 'What is 2+2?', expectedOutput: '4' }],
+ *   target: { provider: 'claude_agent' },
+ *   assert: [Contains('4')],
+ * });
+ * ```
+ *
+ * @module
+ */
+
+import path from 'node:path';
+import type { TargetDefinition } from './providers/types.js';
+import type { EvalRunResult, EvalSummary, EvalAssertionInput } from './evaluate.js';
+import type { AssertFn, AssertContext, AssertResult } from './assertions.js';
+import type { EvalTest, EvaluatorConfig, EvaluationResult } from './types.js';
+
+// ─── Types ───────────────────────────────────────────────────────────
+
+/** Data item — aligns with YAML test fields (camelCase) */
+export interface EvalDataItem {
+  readonly id?: string;
+  readonly input: string | readonly { role: string; content: string }[];
+  readonly expectedOutput?: string;
+  readonly criteria?: string;
+  readonly metadata?: Record<string, unknown>;
+}
+
+/** Assert entry: inline function, built-in factory result, or assertion config */
+export type AssertEntry = AssertFn | EvalAssertionInput;
+
+/** Configuration for Eval() */
+export interface EvalOptions {
+  readonly data:
+    | readonly EvalDataItem[]
+    | (() => readonly EvalDataItem[] | Promise<readonly EvalDataItem[]>);
+  readonly target?: TargetDefinition;
+  readonly task?: (input: string) => string | Promise<string>;
+  readonly assert: readonly AssertEntry[];
+  readonly metadata?: Record<string, unknown>;
+  readonly workers?: number;
+  readonly agentTimeoutMs?: number;
+}
+
+// ─── Registry ────────────────────────────────────────────────────────
+
+interface RegisteredEval {
+  readonly name: string;
+  readonly options: EvalOptions;
+  readonly promise: Promise<EvalRunResult>;
+}
+
+const evalRegistry = new Map<string, RegisteredEval>();
+
+/** Get all registered evals (used by CLI to discover evals in .ts files). */
+export function getRegisteredEvals(): ReadonlyMap<string, RegisteredEval> {
+  return evalRegistry;
+}
+
+/** Clear the registry (used in tests). */
+export function clearEvalRegistry(): void {
+  evalRegistry.clear();
+}
+
+// ─── Eval() ──────────────────────────────────────────────────────────
+
+/**
+ * Declare and run an evaluation.
+ *
+ * Registers the eval in a global registry (for CLI discovery) and
+ * returns a Promise with results (for programmatic use).
+ */
+export function Eval(name: string, options: EvalOptions): Promise<EvalRunResult> {
+  // Validate
+  if (evalRegistry.has(name)) {
+    throw new Error(`Eval "${name}" already registered`);
+  }
+  if (options.task && options.target) {
+    throw new Error('Cannot specify both "task" and "target" — use one or the other.');
+  }
+  if (!options.task && !options.target) {
+    throw new Error('Must specify either "task" or "target".');
+  }
+
+  // Create the execution promise
+  const promise = runEval(name, options);
+
+  evalRegistry.set(name, { name, options, promise });
+  return promise;
+}
+
+// ─── Internal execution ──────────────────────────────────────────────
+
+async function runEval(name: string, options: EvalOptions): Promise<EvalRunResult> {
+  // Lazy imports to avoid circular dependencies and keep module lightweight
+  const { runEvaluation } = await import('./orchestrator.js');
+  const { resolveTargetDefinition } = await import('./providers/targets.js');
+  const { findGitRoot } = await import('./file-utils.js');
+  const { createFunctionProvider } = await import('./providers/function-provider.js');
+
+  // Resolve data
+  const data = typeof options.data === 'function' ? await options.data() : options.data;
+
+  // Partition assert entries into inline functions and config objects
+  const configAsserts: EvalAssertionInput[] = [];
+
+  for (let i = 0; i < options.assert.length; i++) {
+    const entry = options.assert[i];
+    if (typeof entry === 'function') {
+      // Register inline function and create a placeholder assertion config
+      configAsserts.push({
+        type: 'inline-assert',
+        name: `inline-assert-${i}`,
+      });
+    } else {
+      configAsserts.push(entry as EvalAssertionInput);
+    }
+  }
+
+  // Store inline functions for the inline-assert evaluator to pick up
+  const inlineFns = options.assert
+    .filter((entry): entry is AssertFn => typeof entry === 'function');
+  setInlineAssertFns(inlineFns);
+
+  // Resolve target
+  const targetDef: TargetDefinition = options.target ?? { name: 'custom-task', provider: 'mock' };
+  const resolvedTarget = resolveTargetDefinition(targetDef);
+
+  // Build providerFactory for task functions
+  let providerFactory: ((target: any) => any) | undefined;
+  if (options.task) {
+    const taskProvider = createFunctionProvider(options.task);
+    providerFactory = () => taskProvider;
+  }
+
+  // Convert EvalAssertionInput[] to EvaluatorConfig[]
+  const evaluatorConfigs = configAsserts.map((a, i) => {
+    const { type: rawType, ...rest } = a;
+    return {
+      ...rest,
+      name: a.name ?? `${rawType}_${i}`,
+      type: rawType.replace(/_/g, '-'),
+    } as unknown as EvaluatorConfig;
+  });
+
+  // Convert data items to EvalTest[]
+  const evalCases: EvalTest[] = data.map((item, i) => {
+    const input =
+      typeof item.input === 'string'
+        ? ([{ role: 'user' as const, content: item.input }] as EvalTest['input'])
+        : (item.input as unknown as EvalTest['input']);
+
+    const question =
+      typeof item.input === 'string'
+        ? item.input
+        : ((item.input.find((m) => m.role === 'user')?.content as string) ?? '');
+
+    const expectedOutput = item.expectedOutput
+      ? ([{ role: 'assistant' as const, content: item.expectedOutput }] as EvalTest['expected_output'])
+      : [];
+
+    return {
+      id: item.id ? `${name}/${item.id}` : `${name}/${i}`,
+      criteria: item.criteria ?? '',
+      question: String(question),
+      input,
+      input_segments: [],
+      expected_output: expectedOutput,
+      reference_answer: item.expectedOutput,
+      guideline_paths: [],
+      guideline_patterns: [],
+      file_paths: [],
+      evaluators: evaluatorConfigs.length > 0 ? evaluatorConfigs : undefined,
+      metadata: item.metadata,
+    };
+  });
+
+  const startTime = Date.now();
+  const repoRoot = (await findGitRoot(process.cwd())) ?? process.cwd();
+  const testFilePath = path.join(process.cwd(), '__eval_api__.yaml');
+
+  const collectedResults: EvaluationResult[] = [];
+
+  await runEvaluation({
+    testFilePath,
+    repoRoot,
+    target: resolvedTarget,
+    maxRetries: 2,
+    agentTimeoutMs: options.agentTimeoutMs,
+    maxConcurrency: options.workers ?? 3,
+    evalCases,
+    ...(providerFactory ? { providerFactory } : {}),
+    onResult: async (result) => {
+      collectedResults.push(result);
+    },
+  });
+
+  // Clear inline functions after execution
+  setInlineAssertFns([]);
+
+  const durationMs = Date.now() - startTime;
+  return {
+    results: collectedResults,
+    summary: computeSummary(collectedResults, durationMs),
+  };
+}
+
+// ─── Summary computation ─────────────────────────────────────────────
+
+function computeSummary(results: readonly EvaluationResult[], durationMs: number): EvalSummary {
+  const total = results.length;
+  let passed = 0;
+  let failed = 0;
+  let borderline = 0;
+  let scoreSum = 0;
+
+  for (const r of results) {
+    scoreSum += r.score;
+    if (r.score >= 0.8) {
+      passed++;
+    } else if (r.score < 0.5) {
+      failed++;
+    } else {
+      borderline++;
+    }
+  }
+
+  return {
+    total,
+    passed,
+    failed,
+    borderline,
+    durationMs,
+    meanScore: total > 0 ? scoreSum / total : 0,
+  };
+}
+
+// ─── Inline assert function storage ──────────────────────────────────
+
+let _inlineAssertFns: AssertFn[] = [];
+
+export function setInlineAssertFns(fns: AssertFn[]): void {
+  _inlineAssertFns = fns;
+}
+
+export function getInlineAssertFns(): AssertFn[] {
+  return _inlineAssertFns;
+}
+
+// Re-export types
+export type { AssertContext, AssertResult, AssertFn } from './assertions.js';
+export type { EvalRunResult, EvalSummary } from './evaluate.js';
diff --git a/packages/core/test/evaluation/eval-api.test.ts b/packages/core/test/evaluation/eval-api.test.ts
new file mode 100644
index 00000000..3c89881a
--- /dev/null
+++ b/packages/core/test/evaluation/eval-api.test.ts
@@ -0,0 +1,153 @@
+import { afterEach, describe, expect, it } from 'vitest';
+import {
+  Eval,
+  clearEvalRegistry,
+  getRegisteredEvals,
+  type EvalDataItem,
+} from '../../src/evaluation/eval-api.js';
+import { Contains } from '../../src/evaluation/assertions.js';
+
+afterEach(() => {
+  clearEvalRegistry();
+});
+
+describe('Eval() registration', () => {
+  it('registers an eval in the global registry', () => {
+    Eval('test-eval', {
+      data: [{ input: 'hello', expectedOutput: 'world' }],
+      target: { name: 'default', provider: 'mock', response: 'world' },
+      assert: [Contains('world')],
+    });
+
+    const registry = getRegisteredEvals();
+    expect(registry.size).toBe(1);
+    expect(registry.has('test-eval')).toBe(true);
+  });
+
+  it('throws on duplicate eval names', () => {
+    Eval('dup', {
+      data: [{ input: 'a' }],
+      target: { name: 'default', provider: 'mock' },
+      assert: [Contains('a')],
+    });
+
+    expect(() => {
+      Eval('dup', {
+        data: [{ input: 'b' }],
+        target: { name: 'default', provider: 'mock' },
+        assert: [Contains('b')],
+      });
+    }).toThrow('Eval "dup" already registered');
+  });
+
+  it('throws when both task and target are provided', () => {
+    expect(() => {
+      Eval('bad', {
+        data: [{ input: 'a' }],
+        target: { name: 'default', provider: 'mock' },
+        task: async (input) => input,
+        assert: [Contains('a')],
+      });
+    }).toThrow('Cannot specify both "task" and "target"');
+  });
+
+  it('throws when neither task nor target is provided', () => {
+    expect(() => {
+      Eval('bad', {
+        data: [{ input: 'a' }],
+        assert: [Contains('a')],
+      });
+    }).toThrow('Must specify either "task" or "target"');
+  });
+});
+
+describe('Eval() execution with mock target', () => {
+  it('returns results when awaited', async () => {
+    const result = await Eval('exec-test', {
+      data: [
+        { id: 'case-1', input: 'hello', expectedOutput: 'world' },
+      ],
+      target: { name: 'default', provider: 'mock', response: 'world' },
+      assert: [Contains('world')],
+    });
+
+    expect(result.results).toHaveLength(1);
+    expect(result.summary.total).toBe(1);
+    expect(result.summary.passed).toBe(1);
+  });
+
+  it('auto-generates test IDs from eval name and index', async () => {
+    const result = await Eval('auto-id', {
+      data: [
+        { input: 'first' },
+        { input: 'second' },
+      ],
+      target: { name: 'default', provider: 'mock', response: 'first second' },
+      assert: [Contains('first')],
+    });
+
+    expect(result.results[0].testId).toBe('auto-id/0');
+    expect(result.results[1].testId).toBe('auto-id/1');
+  });
+
+  it('uses explicit data item IDs in test IDs', async () => {
+    const result = await Eval('explicit-id', {
+      data: [
+        { id: 'my-case', input: 'hello' },
+      ],
+      target: { name: 'default', provider: 'mock', response: 'hello' },
+      assert: [Contains('hello')],
+    });
+
+    expect(result.results[0].testId).toBe('explicit-id/my-case');
+  });
+
+  it('supports inline assertion functions', async () => {
+    const result = await Eval('inline-fn', {
+      data: [{ input: 'test', expectedOutput: 'test' }],
+      target: { name: 'default', provider: 'mock', response: 'test' },
+      assert: [
+        ({ output, expectedOutput }) => ({
+          name: 'custom',
+          score: output === expectedOutput ? 1.0 : 0.0,
+        }),
+      ],
+    });
+
+    expect(result.summary.passed).toBe(1);
+  });
+
+  it('supports async data factory', async () => {
+    const result = await Eval('async-data', {
+      data: async () => [{ input: 'hello' }],
+      target: { name: 'default', provider: 'mock', response: 'hello' },
+      assert: [Contains('hello')],
+    });
+
+    expect(result.results).toHaveLength(1);
+  });
+
+  it('supports task function instead of target', async () => {
+    const result = await Eval('task-fn', {
+      data: [{ input: 'hello' }],
+      task: async (input) => `Echo: ${input}`,
+      assert: [Contains('Echo: hello')],
+    });
+
+    expect(result.summary.passed).toBe(1);
+  });
+
+  it('supports mixing inline functions and assertion configs', async () => {
+    const result = await Eval('mixed', {
+      data: [{ input: 'hello world' }],
+      target: { name: 'default', provider: 'mock', response: 'hello world' },
+      assert: [
+        Contains('hello'),
+        ({ output }) => ({ name: 'has-world', score: output.includes('world') ? 1.0 : 0.0 }),
+        { type: 'contains', value: 'world' },
+      ],
+    });
+
+    expect(result.summary.passed).toBe(1);
+  });
+});

From 8fc604386dfbaf205c5de93f014d8b40ff75d2c5 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Fri, 13 Mar 2026 09:12:17 +0000
Subject: [PATCH 04/10] feat(core): add inline-assert evaluator type for Eval()
 API

---
 packages/core/src/evaluation/eval-api.ts      | 56 +++++++------
 .../evaluation/evaluators/inline-assert.ts    | 38 +++++++++
 .../evaluation/registry/builtin-evaluators.ts | 21 ++++-
 .../evaluators/inline-assert.test.ts          | 84 +++++++++++++++++++
 4 files changed, 173 insertions(+), 26 deletions(-)
 create mode 100644 packages/core/src/evaluation/evaluators/inline-assert.ts
 create mode 100644 packages/core/test/evaluation/evaluators/inline-assert.test.ts

diff --git a/packages/core/src/evaluation/eval-api.ts b/packages/core/src/evaluation/eval-api.ts
index 85d8e82b..bbba49a4 100644
--- a/packages/core/src/evaluation/eval-api.ts
+++ b/packages/core/src/evaluation/eval-api.ts
@@ -99,6 +99,14 @@ export function Eval(name: string, options: EvalOptions): Promise<EvalRunResult>
   return promise;
 }
 
+// ─── Inline function storage via symbol ──────────────────────────────
+
+/**
+ * Symbol used to attach inline AssertFn references to EvaluatorConfig objects.
+ * This avoids global state and race conditions between concurrent Eval() calls.
+ */
+export const INLINE_ASSERT_FN = Symbol.for('agentv.inline-assert-fn');
+
 // ─── Internal execution ──────────────────────────────────────────────
 
 async function runEval(name: string, options: EvalOptions): Promise<EvalRunResult> {
@@ -112,26 +120,30 @@ async function runEval(name: string, options: EvalOptions): Promise<EvalRunResul
   const data = typeof options.data === 'function' ? await options.data() : options.data;
 
   // Partition assert entries into inline functions and config objects
-  const configAsserts: EvalAssertionInput[] = [];
+  const evaluatorConfigs: EvaluatorConfig[] = [];
 
   for (let i = 0; i < options.assert.length; i++) {
     const entry = options.assert[i];
     if (typeof entry === 'function') {
-      // Register inline function and create a placeholder assertion config
-      configAsserts.push({
+      // Inline function: create an evaluator config with the function attached via symbol
+      const config = {
         type: 'inline-assert',
         name: `inline-assert-${i}`,
-      });
+        [INLINE_ASSERT_FN]: entry as AssertFn,
+      } as unknown as EvaluatorConfig;
+      evaluatorConfigs.push(config);
     } else {
-      configAsserts.push(entry as EvalAssertionInput);
+      // Config object: normalize type and pass through
+      const a = entry as EvalAssertionInput;
+      const { type: rawType, ...rest } = a;
+      evaluatorConfigs.push({
+        ...rest,
+        name: a.name ?? `${rawType}_${i}`,
+        type: rawType.replace(/_/g, '-'),
+      } as unknown as EvaluatorConfig);
     }
   }
 
-  // Store inline functions for the inline-assert evaluator to pick up
-  const inlineFns = options.assert
-    .filter((entry): entry is AssertFn => typeof entry === 'function');
-  setInlineAssertFns(inlineFns);
-
   // Resolve target
   const targetDef: TargetDefinition = options.target ?? { name: 'custom-task', provider: 'mock' };
   const resolvedTarget = resolveTargetDefinition(targetDef);
@@ -143,16 +155,6 @@ async function runEval(name: string, options: EvalOptions): Promise<EvalRunResul
     providerFactory = () => taskProvider;
   }
 
-  // Convert EvalAssertionInput[] to EvaluatorConfig[]
-  const evaluatorConfigs = configAsserts.map((a, i) => {
-    const { type: rawType, ...rest } = a;
-    return {
-      ...rest,
-      name: a.name ?? `${rawType}_${i}`,
-      type: rawType.replace(/_/g, '-'),
-    } as unknown as EvaluatorConfig;
-  });
-
   // Convert data items to EvalTest[]
   const evalCases: EvalTest[] = data.map((item, i) => {
     const input =
@@ -169,12 +171,19 @@ async function runEval(name: string, options: EvalOptions): Promise<EvalRunResul
       ? ([{ role: 'assistant' as const, content: item.expectedOutput }] as EvalTest['expected_output'])
       : [];
 
+    // Build input_segments so buildPromptInputs can extract the question
+    const inputSegments = typeof item.input === 'string'
+      ? [{ type: 'text' as const, value: item.input }]
+      : (item.input as readonly { role: string; content: string }[])
+          .filter((m) => m.role === 'user' && typeof m.content === 'string')
+          .map((m) => ({ type: 'text' as const, value: m.content }));
+
     return {
       id: item.id ? `${name}/${item.id}` : `${name}/${i}`,
       criteria: item.criteria ?? '',
       question: String(question),
       input,
-      input_segments: [],
+      input_segments: inputSegments,
       expected_output: expectedOutput,
       reference_answer: item.expectedOutput,
       guideline_paths: [],
@@ -205,9 +214,6 @@ async function runEval(name: string, options: EvalOptions): Promise<EvalRunResul
     },
   });
 
-  // Clear inline functions after execution
-  setInlineAssertFns([]);
-
   const durationMs = Date.now() - startTime;
   return {
     results: collectedResults,
@@ -245,7 +251,7 @@ function computeSummary(results: readonly EvaluationResult[], durationMs: number
   };
 }
 
-// ─── Inline assert function storage ──────────────────────────────────
+// ─── Legacy inline assert function storage (for backward compat) ─────
 
 let _inlineAssertFns: AssertFn[] = [];
 
diff --git a/packages/core/src/evaluation/evaluators/inline-assert.ts b/packages/core/src/evaluation/evaluators/inline-assert.ts
new file mode 100644
index 00000000..1a5b5352
--- /dev/null
+++ b/packages/core/src/evaluation/evaluators/inline-assert.ts
@@ -0,0 +1,38 @@
+import type { AssertFn } from '../assertions.js';
+import type { EvaluationContext, EvaluationScore, Evaluator } from './types.js';
+import { scoreToVerdict } from './scoring.js';
+
+/**
+ * Evaluator that wraps an inline AssertFn and runs it in-process.
+ * No subprocess, no stdin/stdout — just calls the function directly.
+ */
+export class InlineAssertEvaluator implements Evaluator {
+  readonly kind = 'inline-assert';
+
+  constructor(
+    private readonly fn: AssertFn,
+    private readonly name: string,
+  ) {}
+
+  async evaluate(context: EvaluationContext): Promise<EvaluationScore> {
+    const result = await this.fn({
+      input: context.evalCase.question,
+      output: context.candidate,
+      expectedOutput: context.evalCase.reference_answer,
+      criteria: context.evalCase.criteria,
+      metadata: context.evalCase.metadata,
+    });
+
+    const score = Math.max(0, Math.min(1, result.score));
+
+    return {
+      score,
+      verdict: scoreToVerdict(score),
+      hits: score >= 0.8 ? [result.name] : [],
+      misses: score < 0.5 ? [result.name] : [],
+      expectedAspectCount: 1,
+      reasoning: undefined,
+      details: result.metadata ? (result.metadata as any) : undefined,
+    };
+  }
+}
diff --git a/packages/core/src/evaluation/registry/builtin-evaluators.ts b/packages/core/src/evaluation/registry/builtin-evaluators.ts
index e992acce..517af233 100644
--- a/packages/core/src/evaluation/registry/builtin-evaluators.ts
+++ b/packages/core/src/evaluation/registry/builtin-evaluators.ts
@@ -30,6 +30,8 @@ import {
   runRegexAssertion,
   runStartsWithAssertion,
 } from '../evaluators.js';
+import { InlineAssertEvaluator } from '../evaluators/inline-assert.js';
+import { INLINE_ASSERT_FN, getInlineAssertFns } from '../eval-api.js';
 import { resolveCustomPrompt } from '../evaluators/prompt-resolution.js';
 import type { Provider } from '../providers/types.js';
 import type { ToolTrajectoryEvaluatorConfig } from '../trace.js';
@@ -421,7 +423,24 @@ export function createBuiltinRegistry(): EvaluatorRegistry {
     .register('ends-with', endsWithFactory)
     .register('regex', regexFactory)
     .register('is-json', isJsonFactory)
-    .register('equals', equalsFactory);
+    .register('equals', equalsFactory)
+    .register('inline-assert', (config, _context) => {
+      // Prefer the function attached directly to the config via symbol (concurrent-safe)
+      const symbolFn = (config as any)[INLINE_ASSERT_FN] as
+        | import('../assertions.js').AssertFn
+        | undefined;
+      if (symbolFn) {
+        return new InlineAssertEvaluator(symbolFn, config.name ?? 'inline-assert');
+      }
+      // Fallback: legacy global storage
+      const fns = getInlineAssertFns();
+      const index = Number.parseInt(config.name?.replace('inline-assert-', '') ?? '0', 10);
+      const fn = fns[index];
+      if (!fn) {
+        throw new Error(`No inline assert function found at index ${index}`);
+      }
+      return new InlineAssertEvaluator(fn, config.name ?? `inline-assert-${index}`);
+    });
 
   return registry;
 }
diff --git a/packages/core/test/evaluation/evaluators/inline-assert.test.ts b/packages/core/test/evaluation/evaluators/inline-assert.test.ts
new file mode 100644
index 00000000..757b6c07
--- /dev/null
+++ b/packages/core/test/evaluation/evaluators/inline-assert.test.ts
@@ -0,0 +1,84 @@
+import { describe, expect, it } from 'vitest';
+import { InlineAssertEvaluator } from '../../../src/evaluation/evaluators/inline-assert.js';
+import type { AssertFn } from '../../../src/evaluation/assertions.js';
+
+describe('InlineAssertEvaluator', () => {
+  it('runs an inline assert function and returns EvaluationScore', async () => {
+    const fn: AssertFn = ({ output }) => ({
+      name: 'test-assert',
+      score: output.includes('hello') ? 1.0 : 0.0,
+    });
+
+    const evaluator = new InlineAssertEvaluator(fn, 'test-assert');
+
+    const score = await evaluator.evaluate({
+      evalCase: {
+        id: 'test',
+        question: 'greet me',
+        criteria: '',
+        input: [{ role: 'user', content: 'greet me' }],
+        input_segments: [],
+        expected_output: [],
+        guideline_paths: [],
+        file_paths: [],
+        reference_answer: 'hello world',
+      },
+      candidate: 'hello world',
+    } as any);
+
+    expect(score.score).toBe(1.0);
+    expect(score.verdict).toBe('pass');
+  });
+
+  it('handles async assert functions', async () => {
+    const fn: AssertFn = async ({ output }) => ({
+      name: 'async-assert',
+      score: output.length > 0 ? 1.0 : 0.0,
+    });
+
+    const evaluator = new InlineAssertEvaluator(fn, 'async-assert');
+
+    const score = await evaluator.evaluate({
+      evalCase: {
+        id: 'test',
+        question: 'test',
+        criteria: '',
+        input: [{ role: 'user', content: 'test' }],
+        input_segments: [],
+        expected_output: [],
+        guideline_paths: [],
+        file_paths: [],
+      },
+      candidate: 'some output',
+    } as any);
+
+    expect(score.score).toBe(1.0);
+  });
+
+  it('returns fail verdict for score 0', async () => {
+    const fn: AssertFn = () => ({
+      name: 'always-fail',
+      score: 0.0,
+    });
+
+    const evaluator = new InlineAssertEvaluator(fn, 'always-fail');
+
+    const score = await evaluator.evaluate({
+      evalCase: {
+        id: 'test',
+        question: 'test',
+        criteria: '',
+        input: [{ role: 'user', content: 'test' }],
+        input_segments: [],
+        expected_output: [],
+        guideline_paths: [],
+        file_paths: [],
+      },
+      candidate: 'output',
+    } as any);
+
+    expect(score.score).toBe(0.0);
+    expect(score.verdict).toBe('fail');
+    expect(score.misses).toContain('always-fail');
+  });
+});

From cf65eaf3e06ee3e0ea0632ce82c246363fee913c Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Fri, 13 Mar 2026 09:13:10 +0000
Subject: [PATCH 05/10] feat(core): export Eval() API and assertion factories
 from @agentv/core

---
 packages/core/src/index.ts | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index 0fdb7496..469877a2 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -13,6 +13,30 @@ export {
   type EvalRunResult,
   type EvalSummary,
 } from './evaluation/evaluate.js';
+
+// Eval() API — single-file TypeScript evaluations
+export {
+  Eval,
+  getRegisteredEvals,
+  clearEvalRegistry,
+  type EvalDataItem,
+  type EvalOptions,
+  type AssertEntry,
+} from './evaluation/eval-api.js';
+export {
+  Contains,
+  IContains,
+  ContainsAll,
+  ContainsAny,
+  ExactMatch,
+  StartsWith,
+  EndsWith,
+  Regex,
+  IsJson,
+  type AssertFn,
+  type AssertContext,
+  type AssertResult,
+} from './evaluation/assertions.js';
 export {
   defineConfig,
   loadTsConfig,

From 648f2dfdb00f63673e71175bc555b3fc74d57180 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Fri, 13 Mar 2026 09:17:39 +0000
Subject: [PATCH 06/10] feat(cli): support TypeScript eval files in agentv eval
 command

Detect .ts/.js/.mts/.mjs files and route them through the Eval() API
instead of the YAML pipeline. Imports the file, discovers registered
evals, awaits their promises, and collects results for summary output.
---
 apps/cli/src/commands/eval/run-eval.ts | 73 ++++++++++++++++++++++++--
 1 file changed, 69 insertions(+), 4 deletions(-)

diff --git a/apps/cli/src/commands/eval/run-eval.ts b/apps/cli/src/commands/eval/run-eval.ts
index aee3a5ff..7aa21afe 100644
--- a/apps/cli/src/commands/eval/run-eval.ts
+++ b/apps/cli/src/commands/eval/run-eval.ts
@@ -800,10 +800,75 @@ export async function runEvalCommand(input: RunEvalCommandInput): Promise<void>
     console.log(`Trace file: ${path.resolve(options.traceFile)}`);
   }
 
+  const allResults: EvaluationResult[] = [];
+
+  // ── TypeScript eval file support ──────────────────────────────────
+  // Detect .ts/.js files and handle them via the Eval() API instead of the YAML pipeline
+  const tsEvalFiles = resolvedTestFiles.filter(
+    (f) => f.endsWith('.ts') || f.endsWith('.js') || f.endsWith('.mts') || f.endsWith('.mjs'),
+  );
+  const yamlEvalFiles = resolvedTestFiles.filter(
+    (f) => !tsEvalFiles.includes(f),
+  );
+
+  if (tsEvalFiles.length > 0) {
+    const { getRegisteredEvals, clearEvalRegistry } = await import('@agentv/core');
+
+    for (const tsFile of tsEvalFiles) {
+      await ensureFileExists(tsFile, 'TypeScript eval file');
+      clearEvalRegistry();
+
+      // Import the TS file — Eval() calls register during import
+      await import(pathToFileURL(tsFile).href);
+
+      const registered = getRegisteredEvals();
+      if (registered.size === 0) {
+        throw new Error(`No Eval() calls found in ${tsFile}`);
+      }
+
+      // Wait for all registered eval promises and collect results
+      for (const [evalName, entry] of registered) {
+        try {
+          const evalResult = await entry.promise;
+          // Print summary per eval
+          const passCount = evalResult.summary.passed;
+          const totalCount = evalResult.summary.total;
+          const meanScore = evalResult.summary.meanScore.toFixed(2);
+          console.log(
+            `  ${evalName}: ${passCount}/${totalCount} passed (mean score: ${meanScore})`,
+          );
+
+          // Collect results for overall summary
+          allResults.push(...evalResult.results);
+        } catch (err) {
+          console.error(
+            `  ${evalName}: ERROR — ${err instanceof Error ? err.message : String(err)}`,
+          );
+        }
+      }
+
+      clearEvalRegistry();
+    }
+
+    // If there are no YAML files, write results and return
+    if (yamlEvalFiles.length === 0) {
+      for (const result of allResults) {
+        await outputWriter.append(result);
+      }
+      await outputWriter.close().catch(() => undefined);
+      const summary = calculateEvaluationSummary(allResults);
+      console.log(formatEvaluationSummary(summary));
+      if (allResults.length > 0) {
+        console.log(`\nResults written to: ${outputPath}`);
+      }
+      return;
+    }
+  }
+
+  // ── YAML eval file pipeline (existing) ─────────────────────────────
   // Determine cache state after loading file metadata (need YAML config)
   // We defer cache creation until after file metadata is loaded
   const evaluationRunner = await resolveEvaluationRunner();
-  const allResults: EvaluationResult[] = [];
   const seenEvalCases = new Set<string>();
   const displayIdTracker = createDisplayIdTracker();
 
@@ -811,7 +876,7 @@ export async function runEvalCommand(input: RunEvalCommandInput): Promise<void>
   const totalWorkers = options.workers ?? DEFAULT_WORKERS;
   const fileConcurrency = Math.min(
     Math.max(1, totalWorkers),
-    Math.max(1, resolvedTestFiles.length),
+    Math.max(1, yamlEvalFiles.length),
   );
   const perFileWorkers = options.workers
     ? Math.max(1, Math.floor(totalWorkers / fileConcurrency))
@@ -833,7 +898,7 @@ export async function runEvalCommand(input: RunEvalCommandInput): Promise<void>
       readonly failOnError?: FailOnError;
     }
   >();
-  for (const testFilePath of resolvedTestFiles) {
+  for (const testFilePath of yamlEvalFiles) {
     const meta = await prepareFileMetadata({
       testFilePath,
       repoRoot,
@@ -936,7 +1001,7 @@ export async function runEvalCommand(input: RunEvalCommandInput): Promise<void>
   }
 
   try {
-    await runWithLimit(resolvedTestFiles, fileConcurrency, async (testFilePath) => {
+    await runWithLimit(yamlEvalFiles, fileConcurrency, async (testFilePath) => {
       const targetPrep = fileMetadata.get(testFilePath);
       if (!targetPrep) {
         throw new Error(`Missing metadata for ${testFilePath}`);

From 998b7beb5bfa30f343670abd84d77fc09eeffa02 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Fri, 13 Mar 2026 09:24:25 +0000
Subject: [PATCH 07/10] feat(cli): accept TypeScript and JavaScript files in
 eval path resolution

Extend resolveEvalPaths to recognize .ts, .js, .mts, and .mjs extensions
alongside YAML and JSONL, enabling `agentv eval path/to/eval.ts`.
---
 apps/cli/src/commands/eval/shared.ts | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/apps/cli/src/commands/eval/shared.ts b/apps/cli/src/commands/eval/shared.ts
index 586d2f6f..653ada24 100644
--- a/apps/cli/src/commands/eval/shared.ts
+++ b/apps/cli/src/commands/eval/shared.ts
@@ -3,6 +3,9 @@ import { access, stat } from 'node:fs/promises';
 import path from 'node:path';
 import fg from 'fast-glob';
 
+/** Supported eval file extensions: YAML, JSONL, and TypeScript/JavaScript */
+const EVAL_FILE_RE = /\.(ya?ml|jsonl|ts|js|mts|mjs)$/i;
+
 export async function resolveEvalPaths(evalPaths: string[], cwd: string): Promise<string[]> {
   const normalizedInputs = evalPaths.map((value) => value?.trim()).filter((value) => value);
   if (normalizedInputs.length === 0) {
@@ -19,7 +22,7 @@ export async function resolveEvalPaths(evalPaths: string[], cwd: string): Promis
       : path.resolve(cwd, pattern);
     try {
       const stats = await stat(candidatePath);
-      if (stats.isFile() && /\.(ya?ml|jsonl)$/i.test(candidatePath)) {
+      if (stats.isFile() && EVAL_FILE_RE.test(candidatePath)) {
         results.add(candidatePath);
         continue;
       }
@@ -37,13 +40,13 @@ export async function resolveEvalPaths(evalPaths: string[], cwd: string): Promis
       followSymbolicLinks: true,
     });
 
-    const yamlMatches = matches.filter((filePath) => /\.(ya?ml|jsonl)$/i.test(filePath));
-    if (yamlMatches.length === 0) {
+    const evalMatches = matches.filter((filePath) => EVAL_FILE_RE.test(filePath));
+    if (evalMatches.length === 0) {
       unmatched.push(pattern);
       continue;
     }
 
-    for (const filePath of yamlMatches) {
+    for (const filePath of evalMatches) {
       results.add(path.normalize(filePath));
     }
   }
@@ -52,7 +55,7 @@ export async function resolveEvalPaths(evalPaths: string[], cwd: string): Promis
     throw new Error(
       `No eval files matched: ${unmatched.join(
         ', ',
-      )}. Provide YAML or JSONL paths or globs (e.g., "evals/**/*.yaml", "evals/**/*.jsonl").`,
+      )}. Provide YAML, JSONL, or TypeScript paths or globs (e.g., "evals/**/*.yaml", "evals/**/*.eval.ts").`,
     );
   }
 

From 1bdf58cb62eacff6a60766a594ae58beac043507 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Fri, 13 Mar 2026 09:24:32 +0000
Subject: [PATCH 08/10] chore: fix lint errors in Eval() API implementation

Address biome lint issues: import ordering, noExplicitAny suppressions,
non-null assertions replaced with optional chaining, proper JsonObject
typing, and formatting fixes.
---
 apps/cli/src/commands/eval/run-eval.ts        |  9 ++-----
 packages/core/src/evaluation/eval-api.ts      | 26 +++++++++++--------
 .../evaluation/evaluators/inline-assert.ts    |  5 ++--
 .../evaluation/registry/builtin-evaluators.ts |  3 ++-
 .../core/test/evaluation/eval-api.test.ts     | 17 ++++--------
 .../evaluators/inline-assert.test.ts          |  5 +++-
 .../providers/function-provider.test.ts       |  6 ++---
 7 files changed, 34 insertions(+), 37 deletions(-)

diff --git a/apps/cli/src/commands/eval/run-eval.ts b/apps/cli/src/commands/eval/run-eval.ts
index 7aa21afe..247cf806 100644
--- a/apps/cli/src/commands/eval/run-eval.ts
+++ b/apps/cli/src/commands/eval/run-eval.ts
@@ -807,9 +807,7 @@ export async function runEvalCommand(input: RunEvalCommandInput): Promise<void>
   const tsEvalFiles = resolvedTestFiles.filter(
     (f) => f.endsWith('.ts') || f.endsWith('.js') || f.endsWith('.mts') || f.endsWith('.mjs'),
   );
-  const yamlEvalFiles = resolvedTestFiles.filter(
-    (f) => !tsEvalFiles.includes(f),
-  );
+  const yamlEvalFiles = resolvedTestFiles.filter((f) => !tsEvalFiles.includes(f));
 
   if (tsEvalFiles.length > 0) {
     const { getRegisteredEvals, clearEvalRegistry } = await import('@agentv/core');
@@ -874,10 +872,7 @@ export async function runEvalCommand(input: RunEvalCommandInput): Promise<void>
 
   // Derive file-level concurrency from worker count (global) when provided
   const totalWorkers = options.workers ?? DEFAULT_WORKERS;
-  const fileConcurrency = Math.min(
-    Math.max(1, totalWorkers),
-    Math.max(1, yamlEvalFiles.length),
-  );
+  const fileConcurrency = Math.min(Math.max(1, totalWorkers), Math.max(1, yamlEvalFiles.length));
   const perFileWorkers = options.workers
     ? Math.max(1, Math.floor(totalWorkers / fileConcurrency))
     : undefined;
diff --git a/packages/core/src/evaluation/eval-api.ts b/packages/core/src/evaluation/eval-api.ts
index bbba49a4..2dc4560b 100644
--- a/packages/core/src/evaluation/eval-api.ts
+++ b/packages/core/src/evaluation/eval-api.ts
@@ -20,10 +20,11 @@
  */
 
 import path from 'node:path';
-import type { TargetDefinition } from './providers/types.js';
-import type { EvalRunResult, EvalSummary, EvalAssertionInput } from './evaluate.js';
-import type { AssertFn, AssertContext, AssertResult } from './assertions.js';
-import type { EvalTest, EvaluatorConfig, EvaluationResult } from './types.js';
+import type { AssertContext, AssertFn, AssertResult } from './assertions.js';
+import type { EvalAssertionInput, EvalRunResult, EvalSummary } from './evaluate.js';
+import type { ResolvedTarget } from './providers/targets.js';
+import type { Provider, TargetDefinition } from './providers/types.js';
+import type { EvalTest, EvaluationResult, EvaluatorConfig } from './types.js';
 
 // ─── Types ───────────────────────────────────────────────────────────
 
@@ -149,7 +150,7 @@ async function runEval(name: string, options: EvalOptions): Promise<EvalRunResul
   const resolvedTarget = resolveTargetDefinition(targetDef);
 
   // Build providerFactory for task functions
-  let providerFactory: ((target: any) => any) | undefined;
+  let providerFactory: ((target: ResolvedTarget) => Provider) | undefined;
   if (options.task) {
     const taskProvider = createFunctionProvider(options.task);
     providerFactory = () => taskProvider;
@@ -168,15 +169,18 @@ async function runEval(name: string, options: EvalOptions): Promise<EvalRunResul
         : ((item.input.find((m) => m.role === 'user')?.content as string) ?? '');
 
     const expectedOutput = item.expectedOutput
-      ? ([{ role: 'assistant' as const, content: item.expectedOutput }] as EvalTest['expected_output'])
+      ? ([
+          { role: 'assistant' as const, content: item.expectedOutput },
+        ] as EvalTest['expected_output'])
       : [];
 
     // Build input_segments so buildPromptInputs can extract the question
-    const inputSegments = typeof item.input === 'string'
-      ? [{ type: 'text' as const, value: item.input }]
-      : (item.input as readonly { role: string; content: string }[])
-          .filter((m) => m.role === 'user' && typeof m.content === 'string')
-          .map((m) => ({ type: 'text' as const, value: m.content }));
+    const inputSegments =
+      typeof item.input === 'string'
+        ? [{ type: 'text' as const, value: item.input }]
+        : (item.input as readonly { role: string; content: string }[])
+            .filter((m) => m.role === 'user' && typeof m.content === 'string')
+            .map((m) => ({ type: 'text' as const, value: m.content }));
 
     return {
       id: item.id ? `${name}/${item.id}` : `${name}/${i}`,
diff --git a/packages/core/src/evaluation/evaluators/inline-assert.ts b/packages/core/src/evaluation/evaluators/inline-assert.ts
index 1a5b5352..8963e74a 100644
--- a/packages/core/src/evaluation/evaluators/inline-assert.ts
+++ b/packages/core/src/evaluation/evaluators/inline-assert.ts
@@ -1,6 +1,7 @@
 import type { AssertFn } from '../assertions.js';
-import type { EvaluationContext, EvaluationScore, Evaluator } from './types.js';
+import type { JsonObject } from '../types.js';
 import { scoreToVerdict } from './scoring.js';
+import type { EvaluationContext, EvaluationScore, Evaluator } from './types.js';
 
 /**
  * Evaluator that wraps an inline AssertFn and runs it in-process.
@@ -32,7 +33,7 @@ export class InlineAssertEvaluator implements Evaluator {
       misses: score < 0.5 ? [result.name] : [],
       expectedAspectCount: 1,
       reasoning: undefined,
-      details: result.metadata ? (result.metadata as any) : undefined,
+      details: result.metadata ? (result.metadata as JsonObject) : undefined,
     };
   }
 }
diff --git a/packages/core/src/evaluation/registry/builtin-evaluators.ts b/packages/core/src/evaluation/registry/builtin-evaluators.ts
index 517af233..de5586f0 100644
--- a/packages/core/src/evaluation/registry/builtin-evaluators.ts
+++ b/packages/core/src/evaluation/registry/builtin-evaluators.ts
@@ -7,6 +7,7 @@
  */
 
 import { readFileSync } from 'node:fs';
+import { INLINE_ASSERT_FN, getInlineAssertFns } from '../eval-api.js';
 import {
   AgentJudgeEvaluator,
   CodeEvaluator,
@@ -31,7 +32,6 @@ import {
   runStartsWithAssertion,
 } from '../evaluators.js';
 import { InlineAssertEvaluator } from '../evaluators/inline-assert.js';
-import { INLINE_ASSERT_FN, getInlineAssertFns } from '../eval-api.js';
 import { resolveCustomPrompt } from '../evaluators/prompt-resolution.js';
 import type { Provider } from '../providers/types.js';
 import type { ToolTrajectoryEvaluatorConfig } from '../trace.js';
@@ -426,6 +426,7 @@ export function createBuiltinRegistry(): EvaluatorRegistry {
     .register('equals', equalsFactory)
     .register('inline-assert', (config, _context) => {
       // Prefer the function attached directly to the config via symbol (concurrent-safe)
+      // biome-ignore lint/suspicious/noExplicitAny: symbol key access requires dynamic cast
       const symbolFn = (config as any)[INLINE_ASSERT_FN] as
         | import('../assertions.js').AssertFn
         | undefined;
diff --git a/packages/core/test/evaluation/eval-api.test.ts b/packages/core/test/evaluation/eval-api.test.ts
index 3c89881a..fc15ddde 100644
--- a/packages/core/test/evaluation/eval-api.test.ts
+++ b/packages/core/test/evaluation/eval-api.test.ts
@@ -1,11 +1,11 @@
 import { afterEach, describe, expect, it } from 'vitest';
+import { Contains } from '../../src/evaluation/assertions.js';
 import {
   Eval,
+  type EvalDataItem,
   clearEvalRegistry,
   getRegisteredEvals,
-  type EvalDataItem,
 } from '../../src/evaluation/eval-api.js';
-import { Contains } from '../../src/evaluation/assertions.js';
 
 afterEach(() => {
   clearEvalRegistry();
@@ -64,9 +64,7 @@ describe('Eval() registration', () => {
 describe('Eval() execution with mock target', () => {
   it('returns results when awaited', async () => {
     const result = await Eval('exec-test', {
-      data: [
-        { id: 'case-1', input: 'hello', expectedOutput: 'world' },
-      ],
+      data: [{ id: 'case-1', input: 'hello', expectedOutput: 'world' }],
       target: { name: 'default', provider: 'mock', response: 'world' },
       assert: [Contains('world')],
     });
@@ -78,10 +76,7 @@ describe('Eval() execution with mock target', () => {
 
   it('auto-generates test IDs from eval name and index', async () => {
     const result = await Eval('auto-id', {
-      data: [
-        { input: 'first' },
-        { input: 'second' },
-      ],
+      data: [{ input: 'first' }, { input: 'second' }],
       target: { name: 'default', provider: 'mock', response: 'first second' },
       assert: [Contains('first')],
     });
@@ -92,9 +87,7 @@ describe('Eval() execution with mock target', () => {
 
   it('uses explicit data item IDs in test IDs', async () => {
     const result = await Eval('explicit-id', {
-      data: [
-        { id: 'my-case', input: 'hello' },
-      ],
+      data: [{ id: 'my-case', input: 'hello' }],
       target: { name: 'default', provider: 'mock', response: 'hello' },
       assert: [Contains('hello')],
     });
diff --git a/packages/core/test/evaluation/evaluators/inline-assert.test.ts b/packages/core/test/evaluation/evaluators/inline-assert.test.ts
index 757b6c07..504e1b00 100644
--- a/packages/core/test/evaluation/evaluators/inline-assert.test.ts
+++ b/packages/core/test/evaluation/evaluators/inline-assert.test.ts
@@ -1,6 +1,6 @@
 import { describe, expect, it } from 'vitest';
-import { InlineAssertEvaluator } from '../../../src/evaluation/evaluators/inline-assert.js';
 import type { AssertFn } from '../../../src/evaluation/assertions.js';
+import { InlineAssertEvaluator } from '../../../src/evaluation/evaluators/inline-assert.js';
 
 describe('InlineAssertEvaluator', () => {
   it('runs an inline assert function and returns EvaluationScore', async () => {
@@ -24,6 +24,7 @@ describe('InlineAssertEvaluator', () => {
         reference_answer: 'hello world',
       },
       candidate: 'hello world',
+      // biome-ignore lint/suspicious/noExplicitAny: partial context for unit testing
     } as any);
 
     expect(score.score).toBe(1.0);
@@ -50,6 +51,7 @@ describe('InlineAssertEvaluator', () => {
         file_paths: [],
       },
       candidate: 'some output',
+      // biome-ignore lint/suspicious/noExplicitAny: partial context for unit testing
     } as any);
 
     expect(score.score).toBe(1.0);
@@ -75,6 +77,7 @@ describe('InlineAssertEvaluator', () => {
         file_paths: [],
       },
       candidate: 'output',
+      // biome-ignore lint/suspicious/noExplicitAny: partial context for unit testing
     } as any);
 
     expect(score.score).toBe(0.0);
diff --git a/packages/core/test/evaluation/providers/function-provider.test.ts b/packages/core/test/evaluation/providers/function-provider.test.ts
index 71a5cae7..f4116e38 100644
--- a/packages/core/test/evaluation/providers/function-provider.test.ts
+++ b/packages/core/test/evaluation/providers/function-provider.test.ts
@@ -10,8 +10,8 @@ describe('createFunctionProvider', () => {
 
     const response = await provider.invoke({ question: 'hello' });
     expect(response.output).toHaveLength(1);
-    expect(response.output![0].role).toBe('assistant');
-    expect(response.output![0].content).toBe('Echo: hello');
+    expect(response.output?.[0].role).toBe('assistant');
+    expect(response.output?.[0].content).toBe('Echo: hello');
   });
 
   it('wraps an async task function', async () => {
@@ -20,7 +20,7 @@ describe('createFunctionProvider', () => {
     });
 
     const response = await provider.invoke({ question: 'world' });
-    expect(response.output![0].content).toBe('Async: world');
+    expect(response.output?.[0].content).toBe('Async: world');
   });
 
   it('measures duration', async () => {

From 14e6c107019aa78c95b3bcf3c34dfaabfab4bb43 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Fri, 13 Mar 2026 09:24:38 +0000
Subject: [PATCH 09/10] feat: add TypeScript Eval() API example

Demonstrates single-file eval with mock target, built-in Contains()
assertion, and inline assertion function.
---
 .../features/sdk-eval-api/evals/basic.eval.ts | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 examples/features/sdk-eval-api/evals/basic.eval.ts

diff --git a/examples/features/sdk-eval-api/evals/basic.eval.ts b/examples/features/sdk-eval-api/evals/basic.eval.ts
new file mode 100644
index 00000000..fd5082a1
--- /dev/null
+++ b/examples/features/sdk-eval-api/evals/basic.eval.ts
@@ -0,0 +1,26 @@
+import { Contains, Eval } from '@agentv/core';
+
+Eval('sdk-example/basic', {
+  data: [
+    {
+      id: 'greeting',
+      input: 'Say hello',
+      expectedOutput: 'hello',
+      criteria: 'Response should contain a greeting',
+    },
+    {
+      id: 'math',
+      input: 'What is 2+2?',
+      expectedOutput: '4',
+      criteria: 'Response should contain the correct answer',
+    },
+  ],
+  target: { name: 'default', provider: 'mock', response: 'hello, the answer is 4' },
+  assert: [
+    Contains('hello'),
+    ({ output, expectedOutput }) => ({
+      name: 'has-expected',
+      score: output.includes(expectedOutput ?? '') ? 1.0 : 0.0,
+    }),
+  ],
+});

From aba42b3f6422160acb2aad786150a8753cfa1317 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Fri, 13 Mar 2026 09:52:31 +0000
Subject: [PATCH 10/10] refactor(core): address Eval() API code review findings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Remove dead code: legacy setInlineAssertFns/getInlineAssertFns and
  the fallback path in builtin-evaluators registry
- Remove evaluate() from public exports, migrate example to Eval()
- Add InlineAssertEvaluatorConfig to type system, remove unsafe casts
- Replace fake __eval_api__.yaml path with <eval-api> virtual marker
- Deduplicate computeSummary() — export from evaluate.ts, import in eval-api.ts
- Fix mixed TS+YAML output: write TS results to outputWriter immediately
- Add ExactMatch usage to sdk-eval-api example
- Add borderline score test case (0.5 <= score < 0.8)
---
 apps/cli/src/commands/eval/run-eval.ts        | 10 +--
 .../features/sdk-eval-api/evals/basic.eval.ts | 14 ++++-
 .../features/sdk-programmatic-api/evaluate.ts | 27 +++-----
 packages/core/src/evaluation/eval-api.ts      | 61 ++++---------------
 packages/core/src/evaluation/evaluate.ts      |  6 +-
 .../evaluation/registry/builtin-evaluators.ts | 18 ++----
 packages/core/src/evaluation/types.ts         | 18 +++++-
 packages/core/src/index.ts                    | 13 ++--
 .../core/test/evaluation/eval-api.test.ts     | 19 ++++++
 9 files changed, 92 insertions(+), 94 deletions(-)

diff --git a/apps/cli/src/commands/eval/run-eval.ts b/apps/cli/src/commands/eval/run-eval.ts
index 247cf806..3017bf81 100644
--- a/apps/cli/src/commands/eval/run-eval.ts
+++ b/apps/cli/src/commands/eval/run-eval.ts
@@ -836,7 +836,10 @@ export async function runEvalCommand(input: RunEvalCommandInput): Promise<void>
             `  ${evalName}: ${passCount}/${totalCount} passed (mean score: ${meanScore})`,
           );
 
-          // Collect results for overall summary
+          // Write results to output and collect for overall summary
+          for (const result of evalResult.results) {
+            await outputWriter.append(result);
+          }
           allResults.push(...evalResult.results);
         } catch (err) {
           console.error(
@@ -848,11 +851,8 @@ export async function runEvalCommand(input: RunEvalCommandInput): Promise<void>
       clearEvalRegistry();
     }
 
-    // If there are no YAML files, write results and return
+    // If there are no YAML files, finalize output and return
     if (yamlEvalFiles.length === 0) {
-      for (const result of allResults) {
-        await outputWriter.append(result);
-      }
       await outputWriter.close().catch(() => undefined);
       const summary = calculateEvaluationSummary(allResults);
       console.log(formatEvaluationSummary(summary));
diff --git a/examples/features/sdk-eval-api/evals/basic.eval.ts b/examples/features/sdk-eval-api/evals/basic.eval.ts
index fd5082a1..56ffff8e 100644
--- a/examples/features/sdk-eval-api/evals/basic.eval.ts
+++ b/examples/features/sdk-eval-api/evals/basic.eval.ts
@@ -1,4 +1,4 @@
-import { Contains, Eval } from '@agentv/core';
+import { Contains, Eval, ExactMatch } from '@agentv/core';
 
 Eval('sdk-example/basic', {
   data: [
@@ -24,3 +24,15 @@ Eval('sdk-example/basic', {
     }),
   ],
 });
+
+Eval('sdk-example/exact', {
+  data: [
+    {
+      id: 'exact-check',
+      input: 'Echo back: hello world',
+      expectedOutput: 'hello world',
+    },
+  ],
+  target: { name: 'default', provider: 'mock', response: 'hello world' },
+  assert: [ExactMatch],
+});
diff --git a/examples/features/sdk-programmatic-api/evaluate.ts b/examples/features/sdk-programmatic-api/evaluate.ts
index 43e63088..0995b9a8 100644
--- a/examples/features/sdk-programmatic-api/evaluate.ts
+++ b/examples/features/sdk-programmatic-api/evaluate.ts
@@ -1,38 +1,29 @@
 /**
  * Programmatic API Example
  *
- * Uses evaluate() from @agentv/core to run evaluations as a library.
+ * Uses Eval() from @agentv/core to run evaluations as a library.
  * No YAML needed — tests defined inline with full type safety.
  *
  * Run: bun run evaluate.ts
  * (Uses 'default' target from .agentv/targets.yaml and .env credentials)
  */
-import { evaluate } from '@agentv/core';
+import { Contains, Eval } from '@agentv/core';
 
-const { results, summary } = await evaluate({
-  tests: [
+const { results, summary } = await Eval('programmatic-api-example', {
+  data: [
     {
       id: 'greeting',
       input: 'Say hello and introduce yourself briefly.',
-      expected_output: "Hello! I'm an AI assistant here to help you.",
-      assert: [{ type: 'contains', value: 'Hello' }],
+      expectedOutput: "Hello! I'm an AI assistant here to help you.",
     },
     {
       id: 'json-output',
-      input: [
-        { role: 'system', content: 'Respond only with valid JSON. No markdown.' },
-        { role: 'user', content: 'Return a JSON object with a "status" field set to "ok".' },
-      ],
-      expected_output: '{"status": "ok"}',
-      assert: [
-        { type: 'is-json', required: true },
-        { type: 'contains', value: 'ok' },
-      ],
+      input: 'Return a JSON object with a "status" field set to "ok".',
+      expectedOutput: '{"status": "ok"}',
     },
   ],
-  onResult: (result) => {
-    console.log(`  ${result.testId}: score=${result.score.toFixed(2)}`);
-  },
+  target: { provider: 'mock', response: 'Hello! I am an AI assistant. {"status": "ok"}' },
+  assert: [Contains('Hello'), { type: 'contains', value: 'ok' }],
 });
 
 console.log('\n--- Summary ---');
diff --git a/packages/core/src/evaluation/eval-api.ts b/packages/core/src/evaluation/eval-api.ts
index 2dc4560b..2e7dafbf 100644
--- a/packages/core/src/evaluation/eval-api.ts
+++ b/packages/core/src/evaluation/eval-api.ts
@@ -19,12 +19,17 @@
  * @module
  */
 
-import path from 'node:path';
 import type { AssertContext, AssertFn, AssertResult } from './assertions.js';
+import { computeSummary } from './evaluate.js';
 import type { EvalAssertionInput, EvalRunResult, EvalSummary } from './evaluate.js';
 import type { ResolvedTarget } from './providers/targets.js';
 import type { Provider, TargetDefinition } from './providers/types.js';
-import type { EvalTest, EvaluationResult, EvaluatorConfig } from './types.js';
+import type {
+  EvalTest,
+  EvaluationResult,
+  EvaluatorConfig,
+  InlineAssertEvaluatorConfig,
+} from './types.js';
 
 // ─── Types ───────────────────────────────────────────────────────────
 
@@ -127,11 +132,11 @@ async function runEval(name: string, options: EvalOptions): Promise<EvalRunResul
     const entry = options.assert[i];
     if (typeof entry === 'function') {
       // Inline function: create an evaluator config with the function attached via symbol
-      const config = {
+      const base: InlineAssertEvaluatorConfig = {
         type: 'inline-assert',
         name: `inline-assert-${i}`,
-        [INLINE_ASSERT_FN]: entry as AssertFn,
-      } as unknown as EvaluatorConfig;
+      };
+      const config = Object.assign(base, { [INLINE_ASSERT_FN]: entry as AssertFn });
       evaluatorConfigs.push(config);
     } else {
       // Config object: normalize type and pass through
@@ -141,7 +146,7 @@ async function runEval(name: string, options: EvalOptions): Promise<EvalRunResul
         ...rest,
         name: a.name ?? `${rawType}_${i}`,
         type: rawType.replace(/_/g, '-'),
-      } as unknown as EvaluatorConfig);
+      } as EvaluatorConfig);
     }
   }
 
@@ -200,7 +205,7 @@ async function runEval(name: string, options: EvalOptions): Promise<EvalRunResul
 
   const startTime = Date.now();
   const repoRoot = (await findGitRoot(process.cwd())) ?? process.cwd();
-  const testFilePath = path.join(process.cwd(), '__eval_api__.yaml');
+  const testFilePath = '<eval-api>';
 
   const collectedResults: EvaluationResult[] = [];
 
@@ -225,48 +230,6 @@ async function runEval(name: string, options: EvalOptions): Promise<EvalRunResul
   };
 }
 
-// ─── Summary computation ─────────────────────────────────────────────
-
-function computeSummary(results: readonly EvaluationResult[], durationMs: number): EvalSummary {
-  const total = results.length;
-  let passed = 0;
-  let failed = 0;
-  let borderline = 0;
-  let scoreSum = 0;
-
-  for (const r of results) {
-    scoreSum += r.score;
-    if (r.score >= 0.8) {
-      passed++;
-    } else if (r.score < 0.5) {
-      failed++;
-    } else {
-      borderline++;
-    }
-  }
-
-  return {
-    total,
-    passed,
-    failed,
-    borderline,
-    durationMs,
-    meanScore: total > 0 ? scoreSum / total : 0,
-  };
-}
-
-// ─── Legacy inline assert function storage (for backward compat) ─────
-
-let _inlineAssertFns: AssertFn[] = [];
-
-export function setInlineAssertFns(fns: AssertFn[]): void {
-  _inlineAssertFns = fns;
-}
-
-export function getInlineAssertFns(): AssertFn[] {
-  return _inlineAssertFns;
-}
-
 // Re-export types
 export type { AssertContext, AssertResult, AssertFn } from './assertions.js';
 export type { EvalRunResult, EvalSummary } from './evaluate.js';
diff --git a/packages/core/src/evaluation/evaluate.ts b/packages/core/src/evaluation/evaluate.ts
index f6043ba8..ad581611 100644
--- a/packages/core/src/evaluation/evaluate.ts
+++ b/packages/core/src/evaluation/evaluate.ts
@@ -305,8 +305,12 @@ function mapAssertionType(type: string): string {
 
 /**
  * Compute summary statistics from evaluation results.
+ * Shared by both evaluate() and Eval() APIs.
  */
-function computeSummary(results: readonly EvaluationResult[], durationMs: number): EvalSummary {
+export function computeSummary(
+  results: readonly EvaluationResult[],
+  durationMs: number,
+): EvalSummary {
   const total = results.length;
   let passed = 0;
   let failed = 0;
diff --git a/packages/core/src/evaluation/registry/builtin-evaluators.ts b/packages/core/src/evaluation/registry/builtin-evaluators.ts
index de5586f0..20ec6f02 100644
--- a/packages/core/src/evaluation/registry/builtin-evaluators.ts
+++ b/packages/core/src/evaluation/registry/builtin-evaluators.ts
@@ -7,7 +7,7 @@
  */
 
 import { readFileSync } from 'node:fs';
-import { INLINE_ASSERT_FN, getInlineAssertFns } from '../eval-api.js';
+import { INLINE_ASSERT_FN } from '../eval-api.js';
 import {
   AgentJudgeEvaluator,
   CodeEvaluator,
@@ -425,22 +425,16 @@ export function createBuiltinRegistry(): EvaluatorRegistry {
     .register('is-json', isJsonFactory)
     .register('equals', equalsFactory)
     .register('inline-assert', (config, _context) => {
-      // Prefer the function attached directly to the config via symbol (concurrent-safe)
       // biome-ignore lint/suspicious/noExplicitAny: symbol key access requires dynamic cast
-      const symbolFn = (config as any)[INLINE_ASSERT_FN] as
+      const fn = (config as any)[INLINE_ASSERT_FN] as
         | import('../assertions.js').AssertFn
         | undefined;
-      if (symbolFn) {
-        return new InlineAssertEvaluator(symbolFn, config.name ?? 'inline-assert');
-      }
-      // Fallback: legacy global storage
-      const fns = getInlineAssertFns();
-      const index = Number.parseInt(config.name?.replace('inline-assert-', '') ?? '0', 10);
-      const fn = fns[index];
       if (!fn) {
-        throw new Error(`No inline assert function found at index ${index}`);
+        throw new Error(
+          `No inline assert function found on config for "${config.name}". Inline assert functions must be attached via INLINE_ASSERT_FN symbol.`,
+        );
       }
-      return new InlineAssertEvaluator(fn, config.name ?? `inline-assert-${index}`);
+      return new InlineAssertEvaluator(fn, config.name ?? 'inline-assert');
     });
 
   return registry;
diff --git a/packages/core/src/evaluation/types.ts b/packages/core/src/evaluation/types.ts
index 4ad65239..3fa7a4c6 100644
--- a/packages/core/src/evaluation/types.ts
+++ b/packages/core/src/evaluation/types.ts
@@ -171,6 +171,7 @@ const EVALUATOR_KIND_VALUES = [
   'is-json',
   'equals',
   'rubrics',
+  'inline-assert',
 ] as const;
 
 export type EvaluatorKind = (typeof EVALUATOR_KIND_VALUES)[number];
@@ -723,6 +724,20 @@ export type RubricsEvaluatorConfig = {
   readonly negate?: boolean;
 };
 
+/**
+ * Configuration for the inline-assert evaluator.
+ * Used by the Eval() API to wrap inline assertion functions.
+ * The actual function is attached via the INLINE_ASSERT_FN symbol at runtime.
+ */
+export type InlineAssertEvaluatorConfig = {
+  readonly name: string;
+  readonly type: 'inline-assert';
+  readonly weight?: number;
+  readonly required?: boolean | number;
+  /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */
+  readonly negate?: boolean;
+};
+
 export type EvaluatorConfig =
   | CodeEvaluatorConfig
   | LlmJudgeEvaluatorConfig
@@ -745,7 +760,8 @@ export type EvaluatorConfig =
   | RegexEvaluatorConfig
   | IsJsonEvaluatorConfig
   | EqualsEvaluatorConfig
-  | RubricsEvaluatorConfig;
+  | RubricsEvaluatorConfig
+  | InlineAssertEvaluatorConfig;
 
 /**
  * Eval test definition sourced from AgentV specs.
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index 469877a2..9aff4890 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -5,13 +5,12 @@ export * from './evaluation/file-utils.js';
 export * from './evaluation/providers/index.js';
 export * from './evaluation/evaluators.js';
 export * from './evaluation/orchestrator.js';
-export {
-  evaluate,
-  type EvalConfig,
-  type EvalTestInput,
-  type EvalAssertionInput,
-  type EvalRunResult,
-  type EvalSummary,
+export type {
+  EvalConfig,
+  EvalTestInput,
+  EvalAssertionInput,
+  EvalRunResult,
+  EvalSummary,
 } from './evaluation/evaluate.js';
 
 // Eval() API — single-file TypeScript evaluations
diff --git a/packages/core/test/evaluation/eval-api.test.ts b/packages/core/test/evaluation/eval-api.test.ts
index fc15ddde..7108a9ab 100644
--- a/packages/core/test/evaluation/eval-api.test.ts
+++ b/packages/core/test/evaluation/eval-api.test.ts
@@ -143,4 +143,23 @@ describe('Eval() execution with mock target', () => {
 
     expect(result.summary.passed).toBe(1);
   });
+
+  it('classifies borderline scores (0.5 <= score < 0.8) correctly', async () => {
+    const result = await Eval('borderline', {
+      data: [{ input: 'hello world' }],
+      target: { name: 'default', provider: 'mock', response: 'hello world' },
+      assert: [
+        // One passes (score 1.0), one fails (score 0.0) — average score is 0.5, which is borderline
+        Contains('hello'),
+        Contains('missing-text'),
+      ],
+    });
+
+    // With two assertions where one passes and one fails, the average score should be 0.5
+    // Score 0.5 is borderline (>= 0.5 and < 0.8)
+    expect(result.summary.total).toBe(1);
+    expect(result.summary.borderline).toBe(1);
+    expect(result.summary.passed).toBe(0);
+    expect(result.summary.failed).toBe(0);
+  });
 });