diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml new file mode 100644 index 0000000..32a159e --- /dev/null +++ b/.github/workflows/e2e.yml @@ -0,0 +1,56 @@ +name: E2E Tests + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + e2e: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + + - name: Cache Cargo + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + target + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + restore-keys: ${{ runner.os }}-cargo- + + - name: Build bloop + run: cargo build --features llm-tracing + + - name: Setup Node 20 + uses: actions/setup-node@v4 + with: + node-version: 20 + + - name: Install E2E dependencies + working-directory: tests/e2e + run: npm ci + + - name: Install Playwright Chromium + working-directory: tests/e2e + run: npx playwright install chromium + + - name: Run E2E tests + working-directory: tests/e2e + run: npx playwright test + env: + BLOOP_BIN: ../../target/debug/bloop + + - name: Upload test results + if: failure() + uses: actions/upload-artifact@v4 + with: + name: e2e-test-results + path: tests/e2e/test-results/ + retention-days: 7 diff --git a/tests/e2e/global-setup.ts b/tests/e2e/global-setup.ts new file mode 100644 index 0000000..f9b867e --- /dev/null +++ b/tests/e2e/global-setup.ts @@ -0,0 +1,150 @@ +import { execFileSync, spawn, ChildProcess } from 'child_process'; +import { writeFileSync, mkdtempSync, existsSync } from 'fs'; +import { tmpdir } from 'os'; +import { join, resolve } from 'path'; +import { createHash, randomBytes } from 'crypto'; +import { seed, seedPostFlush } from './seed'; + +const STATE_FILE = join(__dirname, '.e2e-state.json'); +const BLOOP_ROOT = resolve(__dirname, '../..'); + +interface E2EState { + pid: number; + port: number; + tmpDb: string; + baseUrl: string; + hmacSecret: string; + sessionToken: string; +} + +async function waitForHealth(url: string, timeoutMs = 30_000): Promise { + const start = Date.now(); + while (Date.now() - start < timeoutMs) { + try { + const r = await fetch(url); + if (r.ok) return; + } catch {} + await new Promise(r => setTimeout(r, 300)); + } + throw new Error(`Server did not become healthy within ${timeoutMs}ms`); +} + +function findFreePort(): number { + return 10000 + Math.floor(Math.random() * 50000); +} + +/** + * Insert a test user and session directly into SQLite. + * Returns the plaintext session token (for the cookie). + */ +async function createTestSession(baseUrl: string, tmpDb: string): Promise { + // Generate a session token matching bloop's format: + // 32 random bytes → base64url (no padding) → SHA-256 hash stored in DB + const tokenBytes = randomBytes(32); + const token = tokenBytes.toString('base64url'); // plaintext for cookie + const tokenHash = createHash('sha256').update(token).digest('hex'); + + const userId = 'e2e-test-user-id'; + const now = Math.floor(Date.now() / 1000); + const expiresAt = now + 86400; // 24 hours + + // Use sqlite3 CLI to insert (available on macOS) + execFileSync('sqlite3', [tmpDb, + `INSERT INTO webauthn_users (id, username, display_name, created_at) VALUES ('${userId}', 'e2e-test', 'E2E Test User', ${now});`, + ]); + execFileSync('sqlite3', [tmpDb, + `INSERT INTO sessions (token, user_id, created_at, expires_at) VALUES ('${tokenHash}', '${userId}', ${now}, ${expiresAt});`, + ]); + + return token; +} + +export default async function globalSetup() { + // Build bloop + const rawBin = process.env.BLOOP_BIN || join(BLOOP_ROOT, 'target/debug/bloop'); + const bloopBin = resolve(rawBin); + if (!process.env.BLOOP_BIN) { + console.log('Building bloop with llm-tracing...'); + execFileSync('cargo', ['build', '--features', 'llm-tracing'], { + cwd: BLOOP_ROOT, + stdio: 'inherit', + }); + } + + if (!existsSync(bloopBin)) { + throw new Error(`bloop binary not found at ${bloopBin}`); + } + + // Create temp DB + const tmpDir = mkdtempSync(join(tmpdir(), 'bloop-e2e-')); + const tmpDb = join(tmpDir, 'bloop-e2e.db'); + + const port = findFreePort(); + const hmacSecret = `e2e-test-secret-playwright-long-key-${Date.now()}`; + + console.log(`Starting bloop on port ${port}...`); + + const child: ChildProcess = spawn(bloopBin, [], { + cwd: BLOOP_ROOT, + env: { + ...process.env, + BLOOP__DATABASE__PATH: tmpDb, + BLOOP__AUTH__HMAC_SECRET: hmacSecret, + BLOOP__SERVER__PORT: String(port), + BLOOP__LLM_TRACING__ENABLED: 'true', + BLOOP__LLM_TRACING__DEFAULT_CONTENT_STORAGE: 'full', + BLOOP__LLM_TRACING__FLUSH_INTERVAL_SECS: '1', + BLOOP__LLM_TRACING__FLUSH_BATCH_SIZE: '50', + BLOOP__PIPELINE__FLUSH_INTERVAL_SECS: '1', + BLOOP__RETENTION__PRUNE_INTERVAL_SECS: '999999', + RUST_LOG: 'bloop=warn', + }, + stdio: 'pipe', + detached: true, + }); + + let stderrOutput = ''; + child.stderr?.on('data', (data: Buffer) => { + stderrOutput += data.toString(); + if (process.env.DEBUG) process.stderr.write(`[bloop] ${data}`); + }); + child.on('exit', (code) => { + if (code !== null && code !== 0) { + console.error(`bloop exited with code ${code}\nstderr: ${stderrOutput}`); + } + }); + + const baseUrl = `http://localhost:${port}`; + + // Wait for server to be ready + await waitForHealth(`${baseUrl}/health`); + console.log('Server ready!'); + + // Create a test user + session in the DB (server already created tables) + const sessionToken = await createTestSession(baseUrl, tmpDb); + console.log('Test session created'); + + // Seed test data using the legacy HMAC path + await seed({ baseUrl, hmacSecret, sessionToken }); + + // Wait for flush (flush_interval_secs=1, give it 3s to be safe) + console.log('Waiting for data flush...'); + await new Promise(r => setTimeout(r, 3000)); + + // Seed data that requires traces to exist in SQLite + await seedPostFlush({ baseUrl, hmacSecret, sessionToken }); + + // Write state for tests and teardown + const state: E2EState = { + pid: child.pid!, + port, + tmpDb, + baseUrl, + hmacSecret, + sessionToken, + }; + writeFileSync(STATE_FILE, JSON.stringify(state, null, 2)); + + // Set env for Playwright + process.env.BLOOP_TEST_URL = baseUrl; +} diff --git a/tests/e2e/global-teardown.ts b/tests/e2e/global-teardown.ts new file mode 100644 index 0000000..02e156e --- /dev/null +++ b/tests/e2e/global-teardown.ts @@ -0,0 +1,38 @@ +import { readFileSync, unlinkSync, existsSync, rmSync } from 'fs'; +import { join, dirname } from 'path'; + +const STATE_FILE = join(__dirname, '.e2e-state.json'); + +export default async function globalTeardown() { + if (!existsSync(STATE_FILE)) { + console.log('No state file found, nothing to clean up.'); + return; + } + + const state = JSON.parse(readFileSync(STATE_FILE, 'utf-8')); + + // Kill the server + if (state.pid) { + try { + process.kill(state.pid, 'SIGTERM'); + await new Promise(r => setTimeout(r, 500)); + try { process.kill(state.pid, 'SIGKILL'); } catch {} + } catch { + // Process already exited + } + console.log(`Stopped bloop server (PID ${state.pid})`); + } + + // Clean up temp DB files + if (state.tmpDb) { + for (const suffix of ['', '-wal', '-shm']) { + const f = state.tmpDb + suffix; + try { unlinkSync(f); } catch {} + } + try { rmSync(dirname(state.tmpDb), { recursive: true }); } catch {} + } + + // Remove state file + try { unlinkSync(STATE_FILE); } catch {} + console.log('Cleanup complete.'); +} diff --git a/tests/e2e/llm-dashboard.spec.ts b/tests/e2e/llm-dashboard.spec.ts new file mode 100644 index 0000000..c633d1d --- /dev/null +++ b/tests/e2e/llm-dashboard.spec.ts @@ -0,0 +1,193 @@ +import { test, expect, Page } from '@playwright/test'; +import { readFileSync } from 'fs'; +import { join } from 'path'; + +function getState() { + return JSON.parse(readFileSync(join(__dirname, '.e2e-state.json'), 'utf-8')); +} + +async function openLlmPanel(page: Page) { + // Set session cookie so auth passes + const state = getState(); + const url = new URL(state.baseUrl); + await page.context().addCookies([{ + name: 'bloop_session', + value: state.sessionToken, + domain: url.hostname, + path: '/', + }]); + + await page.goto('/'); + // Dismiss welcome modal by marking onboarded in localStorage + await page.evaluate(() => localStorage.setItem('bloop_onboarded', '1')); + // Reload so the modal doesn't appear + await page.reload(); + // Wait for the page to load and the LLM button to be visible + await page.waitForSelector('#llmBtn', { state: 'visible' }); + await page.click('#llmBtn'); + // Wait for the LLM panel to become visible + await page.waitForSelector('#llmPanel:not(.hidden)', { state: 'attached' }); +} + +async function switchTab(page: Page, tabName: string) { + const tab = page.locator(`#llmPanel .insights-tab`).filter({ hasText: tabName }); + await tab.click(); + // Wait for loading to finish + await page.waitForFunction( + () => !document.querySelector('#llmContent .insights-loading'), + { timeout: 10_000 }, + ); +} + +test.describe('LLM Dashboard', () => { + test('tab layout — expected tabs in panel', async ({ page }) => { + await openLlmPanel(page); + const tabs = page.locator('#llmPanel .insights-tab'); + const count = await tabs.count(); + expect(count).toBeGreaterThanOrEqual(8); + const names = await tabs.allTextContents(); + // These 8 tabs are always present + for (const expected of ['Overview', 'Usage', 'Latency', 'Models', 'Traces', 'Search', 'Prompts', 'Scores']) { + expect(names).toContain(expected); + } + }); + + test('Overview tab — stat cards render', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Overview'); + // Wait for stat cards to appear (there should be 6: Traces, Spans, Tokens, Cost, Errors, Error Rate) + const content = page.locator('#llmContent'); + await expect(content).not.toContainText('Failed to load'); + // The overview renders stat cards — look for key labels + await expect(content).toContainText('Traces'); + await expect(content).toContainText('Spans'); + await expect(content).toContainText('Tokens'); + await expect(content).toContainText('Cost'); + await expect(content).toContainText('Errors'); + await expect(content).toContainText('Error Rate'); + }); + + test('Usage tab — table renders', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Usage'); + const content = page.locator('#llmContent'); + const table = content.locator('table'); + await expect(table).toBeVisible(); + await expect(table).toContainText('Time'); + await expect(table).toContainText('Model'); + await expect(table).toContainText('Spans'); + await expect(table).toContainText('Tokens'); + await expect(table).toContainText('Cost'); + // Table has no — data rows follow the header row + const rows = table.locator('tr'); + const count = await rows.count(); + expect(count).toBeGreaterThan(1); // at least header + 1 data row + }); + + test('Latency tab — table renders', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Latency'); + const content = page.locator('#llmContent'); + const table = content.locator('table'); + await expect(table).toBeVisible(); + await expect(table).toContainText('Model'); + await expect(table).toContainText('p50'); + await expect(table).toContainText('p90'); + await expect(table).toContainText('p99'); + await expect(table).toContainText('TTFT'); + await expect(table).toContainText('Calls'); + // Table has no — data rows follow the header row + const rows = table.locator('tr'); + const count = await rows.count(); + expect(count).toBeGreaterThan(1); + await expect(content).toContainText('ms'); + }); + + test('Models tab — cards render', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Models'); + const content = page.locator('#llmContent'); + await expect(content).toContainText('gpt-4o'); + await expect(content).toContainText('calls'); + await expect(content).toContainText('tokens'); + }); + + test('Scores tab — score cards render', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Scores'); + const content = page.locator('#llmContent'); + await expect(content).toContainText('quality'); + await expect(content).toContainText('P10'); + await expect(content).toContainText('P50'); + await expect(content).toContainText('P90'); + await expect(content).toContainText('scores'); + }); + + test('Traces tab — trace list renders', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Traces'); + const content = page.locator('#llmContent'); + // Should show total traces count + await expect(content).toContainText('total traces'); + // Should show at least one trace row + await expect(content).toContainText('spans'); + }); + + test('Traces tab — trace detail with span tree', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Traces'); + const content = page.locator('#llmContent'); + // Click the first trace row (should be a clickable div) + const traceRow = content.locator('div[style*="cursor:pointer"]').first(); + await traceRow.click(); + // Wait for trace detail to load + await expect(content.locator('button', { hasText: 'Back to traces' })).toBeVisible(); + // Should show spans + await expect(content).toContainText('Spans'); + }); + + test('Prompts tab — table renders', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Prompts'); + const content = page.locator('#llmContent'); + // Should show a table with prompt names + const table = content.locator('table'); + await expect(table).toBeVisible(); + await expect(table).toContainText('Prompt Name'); + // Should have our seeded prompt "summarizer" + await expect(content).toContainText('summarizer'); + }); + + test('Search tab — query and results', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Search'); + const content = page.locator('#llmContent'); + // Should show search input + const input = content.locator('input[type="text"]'); + await expect(input).toBeVisible(); + // Type a query matching our seeded data + await input.fill('quantum'); + await input.press('Enter'); + // Wait for results to load + await page.waitForFunction( + () => !document.querySelector('#llmContent .insights-loading'), + { timeout: 10_000 }, + ); + // Should show results or empty state + const hasResults = await content.locator('div[style*="cursor:pointer"]').count(); + if (hasResults > 0) { + // Found matching traces + await expect(content).toContainText('matching traces'); + } else { + // Empty state is also valid + await expect(content).toContainText('No traces found'); + } + }); + + test('Search tab — empty query shows placeholder', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Search'); + const content = page.locator('#llmContent'); + await expect(content).toContainText('Enter a query to search traces'); + }); +}); diff --git a/tests/e2e/package-lock.json b/tests/e2e/package-lock.json new file mode 100644 index 0000000..a3ae6e6 --- /dev/null +++ b/tests/e2e/package-lock.json @@ -0,0 +1,75 @@ +{ + "name": "e2e", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "devDependencies": { + "@playwright/test": "^1.50.0" + } + }, + "node_modules/@playwright/test": { + "version": "1.58.2", + "resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.58.2.tgz", + "integrity": "sha512-akea+6bHYBBfA9uQqSYmlJXn61cTa+jbO87xVLCWbTqbWadRVmhxlXATaOjOgcBaWU4ePo0wB41KMFv3o35IXA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "playwright": "1.58.2" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/fsevents": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", + "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/playwright": { + "version": "1.58.2", + "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.58.2.tgz", + "integrity": "sha512-vA30H8Nvkq/cPBnNw4Q8TWz1EJyqgpuinBcHET0YVJVFldr8JDNiU9LaWAE1KqSkRYazuaBhTpB5ZzShOezQ6A==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "playwright-core": "1.58.2" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "fsevents": "2.3.2" + } + }, + "node_modules/playwright-core": { + "version": "1.58.2", + "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.58.2.tgz", + "integrity": "sha512-yZkEtftgwS8CsfYo7nm0KE8jsvm6i/PTgVtB8DL726wNf6H2IMsDuxCpJj59KDaxCtSnrWan2AeDqM7JBaultg==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "playwright-core": "cli.js" + }, + "engines": { + "node": ">=18" + } + } + } +} diff --git a/tests/e2e/package.json b/tests/e2e/package.json new file mode 100644 index 0000000..a47c262 --- /dev/null +++ b/tests/e2e/package.json @@ -0,0 +1,10 @@ +{ + "private": true, + "scripts": { + "test": "playwright test", + "test:headed": "playwright test --headed" + }, + "devDependencies": { + "@playwright/test": "^1.50.0" + } +} diff --git a/tests/e2e/playwright.config.ts b/tests/e2e/playwright.config.ts new file mode 100644 index 0000000..03d0a02 --- /dev/null +++ b/tests/e2e/playwright.config.ts @@ -0,0 +1,18 @@ +import { defineConfig } from '@playwright/test'; + +export default defineConfig({ + testDir: '.', + testMatch: '*.spec.ts', + globalSetup: './global-setup.ts', + globalTeardown: './global-teardown.ts', + timeout: 30_000, + retries: process.env.CI ? 1 : 0, + use: { + baseURL: process.env.BLOOP_TEST_URL || 'http://localhost:5332', + screenshot: 'only-on-failure', + trace: 'retain-on-failure', + }, + projects: [ + { name: 'chromium', use: { browserName: 'chromium' } }, + ], +}); diff --git a/tests/e2e/seed.ts b/tests/e2e/seed.ts new file mode 100644 index 0000000..40bb5b8 --- /dev/null +++ b/tests/e2e/seed.ts @@ -0,0 +1,193 @@ +import { createHmac } from 'crypto'; + +export interface SeedConfig { + baseUrl: string; + hmacSecret: string; // legacy HMAC secret for trace ingestion + sessionToken: string; // session cookie for query/mutation endpoints +} + +function sign(secret: string, body: string): string { + return createHmac('sha256', secret).update(body).digest('hex'); +} + +/** POST with HMAC auth (for /v1/traces ingest) */ +async function hmacPost(config: SeedConfig, path: string, body: object) { + const json = JSON.stringify(body); + const sig = sign(config.hmacSecret, json); + const resp = await fetch(`${config.baseUrl}${path}`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'X-Signature': sig, + 'X-Forwarded-For': '127.0.0.1', + }, + body: json, + }); + if (!resp.ok) { + const text = await resp.text(); + throw new Error(`POST ${path} failed (${resp.status}): ${text}`); + } + return resp.json(); +} + +/** Fetch with session cookie auth (for query/mutation endpoints) */ +async function sessionFetch(config: SeedConfig, path: string, opts: RequestInit = {}) { + const resp = await fetch(`${config.baseUrl}${path}`, { + ...opts, + headers: { + ...opts.headers as Record, + 'Cookie': `bloop_session=${config.sessionToken}`, + 'X-Forwarded-For': '127.0.0.1', + }, + }); + return resp; +} + +export async function seed(config: SeedConfig) { + // ── Trace 1: Successful trace with nested spans ── + await hmacPost(config, '/v1/traces', { + id: 'trace-e2e-001', + name: 'chat-completion-e2e', + status: 'completed', + session_id: 'session-e2e-1', + user_id: 'e2e-user', + spans: [ + { + id: 'span-e2e-root', + span_type: 'generation', + name: 'root-generation', + model: 'gpt-4o', + provider: 'openai', + input_tokens: 200, + output_tokens: 100, + cost: 0.005, + latency_ms: 1500, + time_to_first_token_ms: 200, + status: 'ok', + input: 'Explain quantum computing', + output: 'Quantum computing uses qubits...', + }, + { + id: 'span-e2e-child1', + parent_span_id: 'span-e2e-root', + span_type: 'tool', + name: 'search-tool', + model: 'gpt-4o', + provider: 'openai', + input_tokens: 50, + output_tokens: 30, + cost: 0.001, + latency_ms: 500, + status: 'ok', + }, + { + id: 'span-e2e-child2', + parent_span_id: 'span-e2e-root', + span_type: 'retrieval', + name: 'doc-retrieval', + input_tokens: 20, + output_tokens: 80, + cost: 0.0005, + latency_ms: 300, + status: 'ok', + }, + ], + }); + + // ── Trace 2: Error trace in same session ── + await hmacPost(config, '/v1/traces', { + id: 'trace-e2e-002', + name: 'failed-generation', + status: 'error', + session_id: 'session-e2e-1', + user_id: 'e2e-user', + spans: [ + { + id: 'span-e2e-err', + span_type: 'generation', + name: 'error-span', + model: 'claude-3-opus', + provider: 'anthropic', + input_tokens: 150, + output_tokens: 0, + cost: 0.003, + latency_ms: 5000, + status: 'error', + error_message: 'Rate limit exceeded', + }, + ], + }); + + // ── Trace 3: Trace with prompt_name/prompt_version v1 ── + await hmacPost(config, '/v1/traces', { + id: 'trace-e2e-003', + name: 'summarize-article', + status: 'completed', + prompt_name: 'summarizer', + prompt_version: '1', + spans: [ + { + id: 'span-e2e-prompt1', + span_type: 'generation', + name: 'summarize-gen', + model: 'gpt-4o', + provider: 'openai', + input_tokens: 500, + output_tokens: 150, + cost: 0.008, + latency_ms: 900, + status: 'ok', + input: 'Summarize: The field of quantum computing...', + output: 'Summary: Quantum computing leverages...', + }, + ], + }); + + // ── Trace 4: Second version of same prompt ── + await hmacPost(config, '/v1/traces', { + id: 'trace-e2e-004', + name: 'summarize-article-v2', + status: 'completed', + prompt_name: 'summarizer', + prompt_version: '2', + spans: [ + { + id: 'span-e2e-prompt2', + span_type: 'generation', + name: 'summarize-gen-v2', + model: 'gpt-4o', + provider: 'openai', + input_tokens: 400, + output_tokens: 120, + cost: 0.006, + latency_ms: 700, + status: 'ok', + }, + ], + }); + + console.log('Seed data ingested: 4 traces, 6 spans'); +} + +/** + * Seed data that depends on traces existing in SQLite (call after flush wait). + * Uses session auth for mutation endpoints. + */ +export async function seedPostFlush(config: SeedConfig) { + // ── Scores on trace 1 (requires trace to exist in SQLite) ── + for (const score of [ + { name: 'quality', value: 0.85 }, + { name: 'relevance', value: 0.72 }, + ]) { + const scoreResp = await sessionFetch(config, '/v1/llm/traces/trace-e2e-001/scores', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(score), + }); + if (!scoreResp.ok) { + console.warn(`Score seed (${score.name}) failed: ${scoreResp.status} ${await scoreResp.text()}`); + } + } + + console.log('Post-flush seed: 2 scores'); +}