From 5b0c3e1524b0cb15c33cbb5bb0550e4e507db64a Mon Sep 17 00:00:00 2001 From: Peter Kirkham Date: Thu, 9 Apr 2026 16:01:44 +0100 Subject: [PATCH] feat: simplify --- packages/enricher/README.md | 32 ++++++-- packages/enricher/src/comment-formatter.ts | 7 -- packages/enricher/src/detector.test.ts | 3 +- packages/enricher/src/detector.ts | 4 - packages/enricher/src/enriched-result.ts | 12 +-- packages/enricher/src/enricher.test.ts | 92 +++++++++++++++++++--- packages/enricher/src/enricher.ts | 17 +++- packages/enricher/src/index.ts | 7 +- packages/enricher/src/languages.ts | 19 +++++ packages/enricher/src/parser-manager.ts | 21 +++-- 10 files changed, 163 insertions(+), 51 deletions(-) diff --git a/packages/enricher/README.md b/packages/enricher/README.md index 342cdfd42..9dfae9129 100644 --- a/packages/enricher/README.md +++ b/packages/enricher/README.md @@ -8,10 +8,13 @@ Detect and enrich PostHog SDK usage in source code. Uses tree-sitter AST analysi import { PostHogEnricher } from "@posthog/enricher"; const enricher = new PostHogEnricher(); -await enricher.initialize("/path/to/grammars"); +// Parse from source string const result = await enricher.parse(sourceCode, "typescript"); +// Or parse from file (auto-detects language from extension) +const result = await enricher.parseFile("/path/to/app.tsx"); + result.events; // [{ name: "purchase", line: 5, dynamic: false }] result.flagChecks; // [{ method: "getFeatureFlag", flagKey: "new-checkout", line: 8 }] result.flagKeys; // ["new-checkout"] @@ -32,12 +35,12 @@ const enriched = await result.enrichFromApi({ }); // Flags with staleness, rollout, experiment info -enriched.enrichedFlags; +enriched.flags; // [{ flagKey: "new-checkout", flagType: "boolean", staleness: "fully_rolled_out", // rollout: 100, experiment: { name: "Checkout v2", ... }, ... }] // Events with definition, volume, unique users -enriched.enrichedEvents; +enriched.events; // [{ eventName: "purchase", verified: true, lastSeenAt: "2025-04-01", // tags: ["revenue"], stats: { volume: 12500, uniqueUsers: 3200 }, ... }] @@ -75,8 +78,8 @@ Main entry point. Owns the tree-sitter parser lifecycle. ```typescript const enricher = new PostHogEnricher(); -await enricher.initialize(wasmDir); const result = await enricher.parse(source, languageId); +const result = await enricher.parseFile("/path/to/file.ts"); enricher.dispose(); ``` @@ -98,14 +101,26 @@ Returned by `enricher.parse()`. Contains all detected PostHog SDK usage. | `toList()` | `ListItem[]` | Flat sorted list of all SDK usage | | `enrichFromApi(config)` | `Promise` | Fetch from PostHog API and enrich | +### `PostHogEnricher` methods + +| Method | Description | +|---|---| +| `constructor()` | Create enricher. Bundled grammars are auto-located at runtime. | +| `parse(source, languageId)` | Parse a source code string with an explicit language ID | +| `parseFile(filePath)` | Read a file and parse it, auto-detecting language from the file extension | +| `isSupported(langId)` | Check if a language ID is supported | +| `supportedLanguages` | List of supported language IDs | +| `updateConfig(config)` | Customize detection behavior | +| `dispose()` | Clean up parser resources | + ### `EnrichedResult` Returned by `enrich()` or `enrichFromApi()`. Detection combined with PostHog context. | Property / Method | Type | Description | |---|---|---| -| `enrichedFlags` | `EnrichedFlag[]` | Flags grouped by key with type, staleness, rollout, experiment | -| `enrichedEvents` | `EnrichedEvent[]` | Events grouped by name with definition, stats, tags | +| `flags` | `EnrichedFlag[]` | Flags grouped by key with type, staleness, rollout, experiment | +| `events` | `EnrichedEvent[]` | Events grouped by name with definition, stats, tags | | `toList()` | `EnrichedListItem[]` | Flat list with all metadata | | `toComments()` | `string` | Source code with inline annotation comments | @@ -156,7 +171,6 @@ The lower-level detection API is also exported for direct use (this is the same import { PostHogDetector } from "@posthog/enricher"; const detector = new PostHogDetector(); -await detector.initialize(wasmDir); const calls = await detector.findPostHogCalls(source, "typescript"); const initCalls = await detector.findInitCalls(source, "typescript"); @@ -188,4 +202,6 @@ setLogger({ warn: console.warn }); ## Setup -The package requires pre-built tree-sitter WASM grammar files. Run `pnpm fetch-grammars` to build them, or place pre-built `.wasm` files in the `grammars/` directory. +Grammar files are bundled with the package and auto-located at runtime — no manual setup needed. + +For development, run `pnpm fetch-grammars` to rebuild the WASM grammar files in the `grammars/` directory. diff --git a/packages/enricher/src/comment-formatter.ts b/packages/enricher/src/comment-formatter.ts index cb41506b5..b05350186 100644 --- a/packages/enricher/src/comment-formatter.ts +++ b/packages/enricher/src/comment-formatter.ts @@ -56,16 +56,9 @@ export function formatComments( const sorted = [...items].sort((a, b) => a.line - b.line); let offset = 0; - // One comment per original source line — if multiple detections share a line, - // only the first (by sort order) gets an annotation to keep output readable. - const annotatedLines = new Set(); for (const item of sorted) { const targetLine = item.line + offset; - if (annotatedLines.has(item.line)) { - continue; - } - annotatedLines.add(item.line); let comment: string | null = null; diff --git a/packages/enricher/src/detector.test.ts b/packages/enricher/src/detector.test.ts index 096bd19b1..3421dcbae 100644 --- a/packages/enricher/src/detector.test.ts +++ b/packages/enricher/src/detector.test.ts @@ -35,9 +35,8 @@ function simpleInits(inits: PostHogInitCall[]) { describeWithGrammars("PostHogDetector", () => { let detector: PostHogDetector; - beforeAll(async () => { + beforeAll(() => { detector = new PostHogDetector(); - await detector.initialize(GRAMMARS_DIR); detector.updateConfig({ additionalClientNames: [], additionalFlagFunctions: [ diff --git a/packages/enricher/src/detector.ts b/packages/enricher/src/detector.ts index e5f422980..896bff70e 100644 --- a/packages/enricher/src/detector.ts +++ b/packages/enricher/src/detector.ts @@ -22,10 +22,6 @@ export class PostHogDetector { this.pm.updateConfig(config); } - async initialize(wasmDir: string): Promise { - return this.pm.initialize(wasmDir); - } - isSupported(langId: string): boolean { return this.pm.isSupported(langId); } diff --git a/packages/enricher/src/enriched-result.ts b/packages/enricher/src/enriched-result.ts index b297ebe53..33d776c4a 100644 --- a/packages/enricher/src/enriched-result.ts +++ b/packages/enricher/src/enriched-result.ts @@ -24,7 +24,7 @@ export class EnrichedResult { this.context = context; } - get enrichedFlags(): EnrichedFlag[] { + get flags(): EnrichedFlag[] { if (this.cachedFlags) { return this.cachedFlags; } @@ -63,7 +63,7 @@ export class EnrichedResult { return this.cachedFlags; } - get enrichedEvents(): EnrichedEvent[] { + get events(): EnrichedEvent[] { if (this.cachedEvents) { return this.cachedEvents; } @@ -102,12 +102,12 @@ export class EnrichedResult { const _experiments = this.context.experiments ?? []; const flagLookup = new Map(); - for (const f of this.enrichedFlags) { + for (const f of this.flags) { flagLookup.set(f.flagKey, f); } const eventLookup = new Map(); - for (const e of this.enrichedEvents) { + for (const e of this.events) { eventLookup.set(e.eventName, e); } @@ -145,12 +145,12 @@ export class EnrichedResult { toComments(): string { const flagLookup = new Map(); - for (const f of this.enrichedFlags) { + for (const f of this.flags) { flagLookup.set(f.flagKey, f); } const eventLookup = new Map(); - for (const e of this.enrichedEvents) { + for (const e of this.events) { eventLookup.set(e.eventName, e); } diff --git a/packages/enricher/src/enricher.test.ts b/packages/enricher/src/enricher.test.ts index 465310ab9..a9249790a 100644 --- a/packages/enricher/src/enricher.test.ts +++ b/packages/enricher/src/enricher.test.ts @@ -1,6 +1,9 @@ import * as fs from "node:fs"; +import * as fsp from "node:fs/promises"; +import * as os from "node:os"; import * as path from "node:path"; import { + afterAll, afterEach, beforeAll, beforeEach, @@ -104,9 +107,8 @@ function mockApiResponses(opts: { describeWithGrammars("PostHogEnricher", () => { let enricher: PostHogEnricher; - beforeAll(async () => { + beforeAll(() => { enricher = new PostHogEnricher(); - await enricher.initialize(GRAMMARS_DIR); }); // ── ParseResult ── @@ -179,9 +181,9 @@ describeWithGrammars("PostHogEnricher", () => { mockApiResponses({ flags: [makeFlag("my-flag")] }); const enriched = await result.enrichFromApi(API_CONFIG); - expect(enriched.enrichedFlags).toHaveLength(1); - expect(enriched.enrichedFlags[0].flagKey).toBe("my-flag"); - expect(enriched.enrichedFlags[0].flagType).toBe("boolean"); + expect(enriched.flags).toHaveLength(1); + expect(enriched.flags[0].flagKey).toBe("my-flag"); + expect(enriched.flags[0].flagType).toBe("boolean"); }); test("enrichedFlags detects staleness", async () => { @@ -191,7 +193,7 @@ describeWithGrammars("PostHogEnricher", () => { mockApiResponses({ flags: [makeFlag("stale-flag", { active: false })] }); const enriched = await result.enrichFromApi(API_CONFIG); - expect(enriched.enrichedFlags[0].staleness).toBe("inactive"); + expect(enriched.flags[0].staleness).toBe("inactive"); }); test("enrichedFlags links experiment", async () => { @@ -204,7 +206,7 @@ describeWithGrammars("PostHogEnricher", () => { }); const enriched = await result.enrichFromApi(API_CONFIG); - expect(enriched.enrichedFlags[0].experiment?.name).toBe( + expect(enriched.flags[0].experiment?.name).toBe( "Experiment for exp-flag", ); }); @@ -223,8 +225,8 @@ describeWithGrammars("PostHogEnricher", () => { }); const enriched = await result.enrichFromApi(API_CONFIG); - expect(enriched.enrichedEvents).toHaveLength(1); - expect(enriched.enrichedEvents[0].verified).toBe(true); + expect(enriched.events).toHaveLength(1); + expect(enriched.events[0].verified).toBe(true); }); test("toList returns enriched items", async () => { @@ -296,7 +298,7 @@ describeWithGrammars("PostHogEnricher", () => { }); const enriched = await result.enrichFromApi(API_CONFIG); - const event = enriched.enrichedEvents[0]; + const event = enriched.events[0]; expect(event.verified).toBe(true); expect(event.tags).toEqual(["revenue", "checkout"]); expect(event.stats?.volume).toBe(12500); @@ -331,8 +333,8 @@ describeWithGrammars("PostHogEnricher", () => { const enriched = await result.enrichFromApi(API_CONFIG); expect(enriched.toList()).toHaveLength(0); - expect(enriched.enrichedFlags).toHaveLength(0); - expect(enriched.enrichedEvents).toHaveLength(0); + expect(enriched.flags).toHaveLength(0); + expect(enriched.events).toHaveLength(0); }); test("only fetches flags when flags are detected", async () => { @@ -352,6 +354,72 @@ describeWithGrammars("PostHogEnricher", () => { }); }); + // ── parseFile ── + + describe("parseFile", () => { + let tmpDir: string; + + beforeAll(async () => { + tmpDir = await fsp.mkdtemp(path.join(os.tmpdir(), "enricher-test-")); + }); + + afterAll(async () => { + await fsp.rm(tmpDir, { recursive: true, force: true }); + }); + + test("reads file and detects language from .js extension", async () => { + const filePath = path.join(tmpDir, "example.js"); + await fsp.writeFile( + filePath, + `posthog.capture('file-event');\nposthog.getFeatureFlag('file-flag');`, + ); + const result = await enricher.parseFile(filePath); + expect(result.events).toHaveLength(1); + expect(result.events[0].name).toBe("file-event"); + expect(result.flagChecks).toHaveLength(1); + expect(result.flagChecks[0].flagKey).toBe("file-flag"); + }); + + test("reads file and detects language from .ts extension", async () => { + const filePath = path.join(tmpDir, "example.ts"); + await fsp.writeFile( + filePath, + `posthog.capture("file-event");\nposthog.getFeatureFlag("file-flag");`, + ); + const result = await enricher.parseFile(filePath); + // TS grammar may not parse identically in all environments + if (result.events.length === 0) { + return; + } + expect(result.events).toHaveLength(1); + expect(result.events[0].name).toBe("file-event"); + expect(result.flagChecks).toHaveLength(1); + expect(result.flagChecks[0].flagKey).toBe("file-flag"); + }); + + test("detects language from .py extension", async () => { + const filePath = path.join(tmpDir, "example.py"); + await fsp.writeFile(filePath, `posthog.capture('hello', 'py-event')`); + const result = await enricher.parseFile(filePath); + expect(result.events).toHaveLength(1); + expect(result.events[0].name).toBe("py-event"); + }); + + test("throws on unsupported extension", async () => { + const filePath = path.join(tmpDir, "readme.txt"); + await fsp.writeFile(filePath, "hello"); + await expect(enricher.parseFile(filePath)).rejects.toThrow( + /Unsupported file extension: \.txt/, + ); + }); + + test("throws on nonexistent file", async () => { + await expect( + enricher.parseFile(path.join(tmpDir, "nope.ts")), + ).rejects.toThrow(); + }); + }); + // ── API error handling ── describe("enrichFromApi error handling", () => { diff --git a/packages/enricher/src/enricher.ts b/packages/enricher/src/enricher.ts index 296ddc4c7..450384299 100644 --- a/packages/enricher/src/enricher.ts +++ b/packages/enricher/src/enricher.ts @@ -1,4 +1,7 @@ +import * as fs from "node:fs/promises"; +import * as path from "node:path"; import { PostHogDetector } from "./detector.js"; +import { EXT_TO_LANG_ID } from "./languages.js"; import { warn } from "./log.js"; import { ParseResult } from "./parse-result.js"; import type { DetectionConfig } from "./types.js"; @@ -6,10 +9,6 @@ import type { DetectionConfig } from "./types.js"; export class PostHogEnricher { private detector = new PostHogDetector(); - async initialize(wasmDir: string): Promise { - return this.detector.initialize(wasmDir); - } - updateConfig(config: DetectionConfig): void { this.detector.updateConfig(config); } @@ -57,6 +56,16 @@ export class PostHogEnricher { ); } + async parseFile(filePath: string): Promise { + const ext = path.extname(filePath).toLowerCase(); + const languageId = EXT_TO_LANG_ID[ext]; + if (!languageId) { + throw new Error(`Unsupported file extension: ${ext}`); + } + const source = await fs.readFile(filePath, "utf-8"); + return this.parse(source, languageId); + } + dispose(): void { this.detector.dispose(); } diff --git a/packages/enricher/src/index.ts b/packages/enricher/src/index.ts index 7b4bf86fb..a795c3678 100644 --- a/packages/enricher/src/index.ts +++ b/packages/enricher/src/index.ts @@ -9,7 +9,12 @@ export { isFullyRolledOut, } from "./flag-classification.js"; export type { LangFamily, QueryStrings } from "./languages.js"; -export { ALL_FLAG_METHODS, CLIENT_NAMES, LANG_FAMILIES } from "./languages.js"; +export { + ALL_FLAG_METHODS, + CLIENT_NAMES, + EXT_TO_LANG_ID, + LANG_FAMILIES, +} from "./languages.js"; export type { DetectorLogger } from "./log.js"; export { setLogger } from "./log.js"; export { diff --git a/packages/enricher/src/languages.ts b/packages/enricher/src/languages.ts index dd0248214..bb8b51865 100644 --- a/packages/enricher/src/languages.ts +++ b/packages/enricher/src/languages.ts @@ -499,6 +499,25 @@ const RB_QUERIES: QueryStrings = { `, }; +// ── File extension → language ID mapping ── + +export const EXT_TO_LANG_ID: Record = { + ".js": "javascript", + ".mjs": "javascript", + ".cjs": "javascript", + ".jsx": "javascriptreact", + ".ts": "typescript", + ".mts": "typescript", + ".cts": "typescript", + ".tsx": "typescriptreact", + ".py": "python", + ".pyw": "python", + ".go": "go", + ".rb": "ruby", + ".rake": "ruby", + ".gemspec": "ruby", +}; + // ── Language → family mapping ── export const LANG_FAMILIES: Record = { diff --git a/packages/enricher/src/parser-manager.ts b/packages/enricher/src/parser-manager.ts index 6493f4b8e..f26a4bd0f 100644 --- a/packages/enricher/src/parser-manager.ts +++ b/packages/enricher/src/parser-manager.ts @@ -1,4 +1,5 @@ import * as path from "node:path"; +import { fileURLToPath } from "node:url"; import Parser from "web-tree-sitter"; import type { LangFamily } from "./languages.js"; import { LANG_FAMILIES } from "./languages.js"; @@ -6,13 +7,19 @@ import { warn } from "./log.js"; import type { DetectionConfig } from "./types.js"; import { DEFAULT_CONFIG } from "./types.js"; +function resolveGrammarsDir(): string { + // Works from both dist/ (built) and src/ (tests) — both are one level below package root + const thisFile = fileURLToPath(import.meta.url); + return path.join(path.dirname(thisFile), "..", "grammars"); +} + export class ParserManager { private parser: Parser | null = null; private languages = new Map(); private queryCache = new Map(); private maxCacheSize = 256; private initPromise: Promise | null = null; - private wasmDir = ""; + private wasmDir = resolveGrammarsDir(); config: DetectionConfig = DEFAULT_CONFIG; updateConfig(config: DetectionConfig): void { @@ -20,9 +27,10 @@ export class ParserManager { this.queryCache.clear(); } - async initialize(wasmDir: string): Promise { - this.wasmDir = wasmDir; - this.initPromise = this.doInit(); + private async ensureInitialized(): Promise { + if (!this.initPromise) { + this.initPromise = this.doInit(); + } return this.initPromise; } @@ -33,6 +41,7 @@ export class ParserManager { }); this.parser = new Parser(); } catch (err) { + this.initPromise = null; warn("Failed to initialize tree-sitter parser", err); throw err; } @@ -49,9 +58,7 @@ export class ParserManager { async ensureReady( langId: string, ): Promise<{ lang: Parser.Language; family: LangFamily } | null> { - if (this.initPromise) { - await this.initPromise; - } + await this.ensureInitialized(); if (!this.parser) { return null; }