Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 116 additions & 0 deletions src/__tests__/fuzzyAnchor.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import { describe, expect, it } from "@jest/globals";
import { fuzzyAnchorRange } from "../utils/fuzzyAnchor";

describe("fuzzyAnchorRange", () => {
// ==========================================================================
// BASIC MATCHING
// ==========================================================================

describe("basic matching", () => {
it("finds anchor at start of phrase", () => {
const result = fuzzyAnchorRange("brown fox jumps over the lazy dog", "brown fox");
expect(result).toEqual({ start: 0, end: 9 });
});

it("finds anchor in the middle of a phrase", () => {
const result = fuzzyAnchorRange("The quick brown fox jumps over the lazy dog", "brown fox");
expect(result).toEqual({ start: 10, end: 19 });
});

it("finds anchor at end of phrase", () => {
const result = fuzzyAnchorRange("The quick brown fox", "brown fox");
expect(result).toEqual({ start: 10, end: 19 });
});

it("is case-insensitive", () => {
const result = fuzzyAnchorRange("Includes Business Associate Agreement and more", "business associate agreement");
expect(result).not.toBeNull();
expect(result!.start).toBe(9); // "Business" starts at 9

Check warning on line 28 in src/__tests__/fuzzyAnchor.test.ts

View workflow job for this annotation

GitHub Actions / lint-and-validate

lint/style/noNonNullAssertion

Forbidden non-null assertion.
});
});

// ==========================================================================
// INLINE CITATION INSERTIONS
// Handles PDF text that has inserted inline references breaking exact match.
// e.g. anchor="retrieval failure and generation bottleneck" in
// text="retrieval failure (§6.1) and generation bottleneck (§6.2)"
// ==========================================================================

describe("inline citation insertions", () => {
it("finds anchor across inserted inline citations like (§6.1)", () => {
const result = fuzzyAnchorRange(
"retrieval failure (§6.1) and generation bottleneck (§6.2)",
"retrieval failure and generation bottleneck",
);
expect(result).not.toBeNull();
expect(result!.start).toBe(0);

Check warning on line 46 in src/__tests__/fuzzyAnchor.test.ts

View workflow job for this annotation

GitHub Actions / lint-and-validate

lint/style/noNonNullAssertion

Forbidden non-null assertion.
expect(result!.end).toBeGreaterThan(40);

Check warning on line 47 in src/__tests__/fuzzyAnchor.test.ts

View workflow job for this annotation

GitHub Actions / lint-and-validate

lint/style/noNonNullAssertion

Forbidden non-null assertion.
});
});

// ==========================================================================
// OCR WORD-SPLIT ARTIFACTS
// PDF OCR can split a word into fragments: "Business Asso ciate Agreement"
// (the word "Associate" becomes "Asso ciate" with an extra space).
//
// fuzzyAnchorRange works word-by-word via indexOf. If an anchor word is
// OCR-split ("associate" → "asso ciate"), it is NOT found as a substring.
// The function falls back on the 60% threshold: if enough OTHER words in the
// anchor are found, it still returns the spanning range.
// ==========================================================================

describe("OCR word-split artifacts (DPA / landing demo fixture)", () => {
// Source: deepcitation.com/legal/dpa OCR extraction.
// The word "Associate" is rendered as "Asso ciate" in the OCR output.
const OCR_SNIPPET = "Includes Business Asso ciate Agreement BAA) and Information Manager Agreement IMA";

it("finds 3-word anchor when 1 word is OCR-split — 2/3 ≥ 60% threshold", () => {
// "business" ✓, "associate" ✗ (text has "asso ciate"), "agreement" ✓ → 2/3 = 66.7%
const result = fuzzyAnchorRange(OCR_SNIPPET, "Business Associate Agreement");
expect(result).not.toBeNull();
// Range should start at "Business" (position 9) and end at "Agreement" end (position 38)
expect(result!.start).toBe(9);

Check warning on line 72 in src/__tests__/fuzzyAnchor.test.ts

View workflow job for this annotation

GitHub Actions / lint-and-validate

lint/style/noNonNullAssertion

Forbidden non-null assertion.
expect(result!.end).toBe(38);

Check warning on line 73 in src/__tests__/fuzzyAnchor.test.ts

View workflow job for this annotation

GitHub Actions / lint-and-validate

lint/style/noNonNullAssertion

Forbidden non-null assertion.
});

it("highlighted slice from 3-word anchor spans the OCR-garbled form", () => {
const result = fuzzyAnchorRange(OCR_SNIPPET, "Business Associate Agreement");
expect(result).not.toBeNull();
const highlighted = OCR_SNIPPET.slice(result!.start, result!.end);

Check warning on line 79 in src/__tests__/fuzzyAnchor.test.ts

View workflow job for this annotation

GitHub Actions / lint-and-validate

lint/style/noNonNullAssertion

Forbidden non-null assertion.

Check warning on line 79 in src/__tests__/fuzzyAnchor.test.ts

View workflow job for this annotation

GitHub Actions / lint-and-validate

lint/style/noNonNullAssertion

Forbidden non-null assertion.
// The garbled form "Business Asso ciate Agreement" is the expected highlighted text
expect(highlighted).toBe("Business Asso ciate Agreement");
});

it("2-word anchor where the only split word fails — 1/2 = 50% < 60% threshold → null", () => {
// "associate" ✗ (text has "asso ciate"), "agreement" ✓ → 1/2 = 50% → null
// This documents a known limitation: a 2-word anchor fails when one word is OCR-split.
const result = fuzzyAnchorRange(OCR_SNIPPET, "Associate Agreement");
expect(result).toBeNull();
});

it("returns null when no anchor words are found at all", () => {
const result = fuzzyAnchorRange("completely unrelated text here", "Business Associate");
expect(result).toBeNull();
});
});

// ==========================================================================
// SHORT / EDGE CASES
// ==========================================================================

describe("edge cases", () => {
it("returns null for empty anchor", () => {
expect(fuzzyAnchorRange("some text", "")).toBeNull();
});

it("returns null when anchor words are all single characters (filtered out)", () => {
// Words < 2 chars are excluded from matching
expect(fuzzyAnchorRange("a b c", "a b")).toBeNull();
});

it("returns null when fewer than 60% of anchor words are found", () => {
// Only "quick" found in "red blue green sky" → 1/3 = 33% < 60%
expect(fuzzyAnchorRange("a red sky today", "quick brown fox")).toBeNull();
});
});
});
9 changes: 7 additions & 2 deletions src/auth.ts
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ function sendJson(res: ServerResponse, status: number, body: Record<string, unkn

export function startCallbackServer(
expectedNonce: string,
): Promise<{ port: number; result: Promise<CallbackPayload> }> {
): Promise<{ port: number; result: Promise<CallbackPayload>; cancel: () => void }> {
return new Promise((resolveServer, rejectServer) => {
let resolveResult: (payload: CallbackPayload) => void;
let rejectResult: (err: Error) => void;
Expand Down Expand Up @@ -264,7 +264,12 @@ export function startCallbackServer(
// but DO keep it alive for the server (it must stay up to receive the callback).
loginTimeout.unref();

resolveServer({ port: addr.port, result });
const cancel = () => {
if (loginTimeout) clearTimeout(loginTimeout);
server.close();
rejectResult(new Error("Login cancelled"));
};
resolveServer({ port: addr.port, result, cancel });
});

server.on("error", err => {
Expand Down
11 changes: 11 additions & 0 deletions src/billing.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/**
* Canonical usage warning thresholds for DeepCitation.
* Expressed as percentages of the monthly budget *used*.
* Shared across the CLI, web dashboard, and SDK consumers.
*/

/** Percentage of budget used at which a visual/informational usage warning appears. */
export const USAGE_WARN_PCT = 80;

/** Percentage of budget used at which an urgent "action needed" warning appears. */
export const USAGE_CRITICAL_PCT = 90;
Loading
Loading