From 71e33a3f100437fa8af65f48f0c4692dd8b5f569 Mon Sep 17 00:00:00 2001 From: Jonathan Conway Date: Mon, 16 Feb 2026 21:46:35 +0000 Subject: [PATCH 1/2] feat: add Playwright E2E tests and CI for LLM dashboard Add 21 Playwright E2E tests covering all LLM dashboard tabs: Overview, Usage, Latency, Models, Traces, Search, Prompts, Scores, Sessions, Tools, and Feedback. Tests verify tab rendering, data display, drill- down navigation, span tree hierarchy, feedback submission, budget editing, and prompt version comparison. Seed timing ensures feedback/budget/scores POST after trace flush so traces exist in SQLite before mutation endpoints are called. Add GitHub Actions workflow for E2E on push/PR to main. --- .github/workflows/e2e.yml | 56 +++++ tests/e2e/global-setup.ts | 150 +++++++++++++ tests/e2e/global-teardown.ts | 38 ++++ tests/e2e/llm-dashboard.spec.ts | 369 ++++++++++++++++++++++++++++++++ tests/e2e/package-lock.json | 75 +++++++ tests/e2e/package.json | 10 + tests/e2e/playwright.config.ts | 18 ++ tests/e2e/seed.ts | 213 ++++++++++++++++++ 8 files changed, 929 insertions(+) create mode 100644 .github/workflows/e2e.yml create mode 100644 tests/e2e/global-setup.ts create mode 100644 tests/e2e/global-teardown.ts create mode 100644 tests/e2e/llm-dashboard.spec.ts create mode 100644 tests/e2e/package-lock.json create mode 100644 tests/e2e/package.json create mode 100644 tests/e2e/playwright.config.ts create mode 100644 tests/e2e/seed.ts diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml new file mode 100644 index 0000000..32a159e --- /dev/null +++ b/.github/workflows/e2e.yml @@ -0,0 +1,56 @@ +name: E2E Tests + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + e2e: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + + - name: Cache Cargo + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + target + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + restore-keys: ${{ runner.os }}-cargo- + + - name: Build bloop + run: cargo build --features llm-tracing + + - name: Setup Node 20 + uses: actions/setup-node@v4 + with: + node-version: 20 + + - name: Install E2E dependencies + working-directory: tests/e2e + run: npm ci + + - name: Install Playwright Chromium + working-directory: tests/e2e + run: npx playwright install chromium + + - name: Run E2E tests + working-directory: tests/e2e + run: npx playwright test + env: + BLOOP_BIN: ../../target/debug/bloop + + - name: Upload test results + if: failure() + uses: actions/upload-artifact@v4 + with: + name: e2e-test-results + path: tests/e2e/test-results/ + retention-days: 7 diff --git a/tests/e2e/global-setup.ts b/tests/e2e/global-setup.ts new file mode 100644 index 0000000..f9b867e --- /dev/null +++ b/tests/e2e/global-setup.ts @@ -0,0 +1,150 @@ +import { execFileSync, spawn, ChildProcess } from 'child_process'; +import { writeFileSync, mkdtempSync, existsSync } from 'fs'; +import { tmpdir } from 'os'; +import { join, resolve } from 'path'; +import { createHash, randomBytes } from 'crypto'; +import { seed, seedPostFlush } from './seed'; + +const STATE_FILE = join(__dirname, '.e2e-state.json'); +const BLOOP_ROOT = resolve(__dirname, '../..'); + +interface E2EState { + pid: number; + port: number; + tmpDb: string; + baseUrl: string; + hmacSecret: string; + sessionToken: string; +} + +async function waitForHealth(url: string, timeoutMs = 30_000): Promise { + const start = Date.now(); + while (Date.now() - start < timeoutMs) { + try { + const r = await fetch(url); + if (r.ok) return; + } catch {} + await new Promise(r => setTimeout(r, 300)); + } + throw new Error(`Server did not become healthy within ${timeoutMs}ms`); +} + +function findFreePort(): number { + return 10000 + Math.floor(Math.random() * 50000); +} + +/** + * Insert a test user and session directly into SQLite. + * Returns the plaintext session token (for the cookie). + */ +async function createTestSession(baseUrl: string, tmpDb: string): Promise { + // Generate a session token matching bloop's format: + // 32 random bytes → base64url (no padding) → SHA-256 hash stored in DB + const tokenBytes = randomBytes(32); + const token = tokenBytes.toString('base64url'); // plaintext for cookie + const tokenHash = createHash('sha256').update(token).digest('hex'); + + const userId = 'e2e-test-user-id'; + const now = Math.floor(Date.now() / 1000); + const expiresAt = now + 86400; // 24 hours + + // Use sqlite3 CLI to insert (available on macOS) + execFileSync('sqlite3', [tmpDb, + `INSERT INTO webauthn_users (id, username, display_name, created_at) VALUES ('${userId}', 'e2e-test', 'E2E Test User', ${now});`, + ]); + execFileSync('sqlite3', [tmpDb, + `INSERT INTO sessions (token, user_id, created_at, expires_at) VALUES ('${tokenHash}', '${userId}', ${now}, ${expiresAt});`, + ]); + + return token; +} + +export default async function globalSetup() { + // Build bloop + const rawBin = process.env.BLOOP_BIN || join(BLOOP_ROOT, 'target/debug/bloop'); + const bloopBin = resolve(rawBin); + if (!process.env.BLOOP_BIN) { + console.log('Building bloop with llm-tracing...'); + execFileSync('cargo', ['build', '--features', 'llm-tracing'], { + cwd: BLOOP_ROOT, + stdio: 'inherit', + }); + } + + if (!existsSync(bloopBin)) { + throw new Error(`bloop binary not found at ${bloopBin}`); + } + + // Create temp DB + const tmpDir = mkdtempSync(join(tmpdir(), 'bloop-e2e-')); + const tmpDb = join(tmpDir, 'bloop-e2e.db'); + + const port = findFreePort(); + const hmacSecret = `e2e-test-secret-playwright-long-key-${Date.now()}`; + + console.log(`Starting bloop on port ${port}...`); + + const child: ChildProcess = spawn(bloopBin, [], { + cwd: BLOOP_ROOT, + env: { + ...process.env, + BLOOP__DATABASE__PATH: tmpDb, + BLOOP__AUTH__HMAC_SECRET: hmacSecret, + BLOOP__SERVER__PORT: String(port), + BLOOP__LLM_TRACING__ENABLED: 'true', + BLOOP__LLM_TRACING__DEFAULT_CONTENT_STORAGE: 'full', + BLOOP__LLM_TRACING__FLUSH_INTERVAL_SECS: '1', + BLOOP__LLM_TRACING__FLUSH_BATCH_SIZE: '50', + BLOOP__PIPELINE__FLUSH_INTERVAL_SECS: '1', + BLOOP__RETENTION__PRUNE_INTERVAL_SECS: '999999', + RUST_LOG: 'bloop=warn', + }, + stdio: 'pipe', + detached: true, + }); + + let stderrOutput = ''; + child.stderr?.on('data', (data: Buffer) => { + stderrOutput += data.toString(); + if (process.env.DEBUG) process.stderr.write(`[bloop] ${data}`); + }); + child.on('exit', (code) => { + if (code !== null && code !== 0) { + console.error(`bloop exited with code ${code}\nstderr: ${stderrOutput}`); + } + }); + + const baseUrl = `http://localhost:${port}`; + + // Wait for server to be ready + await waitForHealth(`${baseUrl}/health`); + console.log('Server ready!'); + + // Create a test user + session in the DB (server already created tables) + const sessionToken = await createTestSession(baseUrl, tmpDb); + console.log('Test session created'); + + // Seed test data using the legacy HMAC path + await seed({ baseUrl, hmacSecret, sessionToken }); + + // Wait for flush (flush_interval_secs=1, give it 3s to be safe) + console.log('Waiting for data flush...'); + await new Promise(r => setTimeout(r, 3000)); + + // Seed data that requires traces to exist in SQLite + await seedPostFlush({ baseUrl, hmacSecret, sessionToken }); + + // Write state for tests and teardown + const state: E2EState = { + pid: child.pid!, + port, + tmpDb, + baseUrl, + hmacSecret, + sessionToken, + }; + writeFileSync(STATE_FILE, JSON.stringify(state, null, 2)); + + // Set env for Playwright + process.env.BLOOP_TEST_URL = baseUrl; +} diff --git a/tests/e2e/global-teardown.ts b/tests/e2e/global-teardown.ts new file mode 100644 index 0000000..02e156e --- /dev/null +++ b/tests/e2e/global-teardown.ts @@ -0,0 +1,38 @@ +import { readFileSync, unlinkSync, existsSync, rmSync } from 'fs'; +import { join, dirname } from 'path'; + +const STATE_FILE = join(__dirname, '.e2e-state.json'); + +export default async function globalTeardown() { + if (!existsSync(STATE_FILE)) { + console.log('No state file found, nothing to clean up.'); + return; + } + + const state = JSON.parse(readFileSync(STATE_FILE, 'utf-8')); + + // Kill the server + if (state.pid) { + try { + process.kill(state.pid, 'SIGTERM'); + await new Promise(r => setTimeout(r, 500)); + try { process.kill(state.pid, 'SIGKILL'); } catch {} + } catch { + // Process already exited + } + console.log(`Stopped bloop server (PID ${state.pid})`); + } + + // Clean up temp DB files + if (state.tmpDb) { + for (const suffix of ['', '-wal', '-shm']) { + const f = state.tmpDb + suffix; + try { unlinkSync(f); } catch {} + } + try { rmSync(dirname(state.tmpDb), { recursive: true }); } catch {} + } + + // Remove state file + try { unlinkSync(STATE_FILE); } catch {} + console.log('Cleanup complete.'); +} diff --git a/tests/e2e/llm-dashboard.spec.ts b/tests/e2e/llm-dashboard.spec.ts new file mode 100644 index 0000000..d46143d --- /dev/null +++ b/tests/e2e/llm-dashboard.spec.ts @@ -0,0 +1,369 @@ +import { test, expect, Page } from '@playwright/test'; +import { readFileSync } from 'fs'; +import { join } from 'path'; + +function getState() { + return JSON.parse(readFileSync(join(__dirname, '.e2e-state.json'), 'utf-8')); +} + +async function openLlmPanel(page: Page) { + // Set session cookie so auth passes + const state = getState(); + const url = new URL(state.baseUrl); + await page.context().addCookies([{ + name: 'bloop_session', + value: state.sessionToken, + domain: url.hostname, + path: '/', + }]); + + await page.goto('/'); + // Dismiss welcome modal by marking onboarded in localStorage + await page.evaluate(() => localStorage.setItem('bloop_onboarded', '1')); + // Reload so the modal doesn't appear + await page.reload(); + // Wait for the page to load and the LLM button to be visible + await page.waitForSelector('#llmBtn', { state: 'visible' }); + await page.click('#llmBtn'); + // Wait for the LLM panel to become visible + await page.waitForSelector('#llmPanel:not(.hidden)', { state: 'attached' }); +} + +async function switchTab(page: Page, tabName: string) { + const tab = page.locator(`#llmPanel .insights-tab`).filter({ hasText: tabName }); + await tab.click(); + // Wait for loading to finish + await page.waitForFunction( + () => !document.querySelector('#llmContent .insights-loading'), + { timeout: 10_000 }, + ); +} + +test.describe('LLM Dashboard', () => { + test('tab layout — 11 tabs in panel', async ({ page }) => { + await openLlmPanel(page); + const tabs = page.locator('#llmPanel .insights-tab'); + await expect(tabs).toHaveCount(11); + const names = await tabs.allTextContents(); + expect(names).toEqual([ + 'Overview', 'Usage', 'Latency', 'Models', 'Traces', + 'Search', 'Prompts', 'Scores', 'Sessions', 'Tools', 'Feedback', + ]); + }); + + test('Overview tab — stat cards render', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Overview'); + // Wait for stat cards to appear (there should be 6: Traces, Spans, Tokens, Cost, Errors, Error Rate) + const content = page.locator('#llmContent'); + await expect(content).not.toContainText('Failed to load'); + // The overview renders stat cards — look for key labels + await expect(content).toContainText('Traces'); + await expect(content).toContainText('Spans'); + await expect(content).toContainText('Tokens'); + await expect(content).toContainText('Cost'); + await expect(content).toContainText('Errors'); + await expect(content).toContainText('Error Rate'); + }); + + test('Overview tab — budget gauge visible', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Overview'); + const content = page.locator('#llmContent'); + // Budget was seeded — should show "Monthly Budget" section with Edit button + await expect(content).toContainText('Monthly Budget'); + await expect(content.locator('button', { hasText: 'Edit' })).toBeVisible(); + }); + + test('Overview tab — budget edit form', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Overview'); + const content = page.locator('#llmContent'); + // Click Edit to go to budget form + await content.locator('button', { hasText: 'Edit' }).click(); + // Budget form should have inputs and Save button + await expect(content.locator('input[type="number"]').first()).toBeVisible(); + await expect(content.locator('button', { hasText: 'Save Budget' })).toBeVisible(); + // Fill in new budget + const amtInput = content.locator('input[type="number"]').first(); + await amtInput.fill('200'); + // Click Save + await content.locator('button', { hasText: 'Save Budget' }).click(); + // Should show toast + const toast = page.locator('.toast', { hasText: 'Budget saved' }); + await expect(toast).toBeVisible({ timeout: 5000 }); + }); + + test('Usage tab — table renders', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Usage'); + const content = page.locator('#llmContent'); + const table = content.locator('table'); + await expect(table).toBeVisible(); + await expect(table).toContainText('Time'); + await expect(table).toContainText('Model'); + await expect(table).toContainText('Spans'); + await expect(table).toContainText('Tokens'); + await expect(table).toContainText('Cost'); + // Table has no — data rows follow the header row + const rows = table.locator('tr'); + const count = await rows.count(); + expect(count).toBeGreaterThan(1); // at least header + 1 data row + }); + + test('Latency tab — table renders', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Latency'); + const content = page.locator('#llmContent'); + const table = content.locator('table'); + await expect(table).toBeVisible(); + await expect(table).toContainText('Model'); + await expect(table).toContainText('p50'); + await expect(table).toContainText('p90'); + await expect(table).toContainText('p99'); + await expect(table).toContainText('TTFT'); + await expect(table).toContainText('Calls'); + // Table has no — data rows follow the header row + const rows = table.locator('tr'); + const count = await rows.count(); + expect(count).toBeGreaterThan(1); + await expect(content).toContainText('ms'); + }); + + test('Models tab — cards render', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Models'); + const content = page.locator('#llmContent'); + await expect(content).toContainText('gpt-4o'); + await expect(content).toContainText('calls'); + await expect(content).toContainText('tokens'); + }); + + test('Scores tab — score cards render', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Scores'); + const content = page.locator('#llmContent'); + await expect(content).toContainText('quality'); + await expect(content).toContainText('P10'); + await expect(content).toContainText('P50'); + await expect(content).toContainText('P90'); + await expect(content).toContainText('scores'); + }); + + test('Tools tab — table renders', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Tools'); + const content = page.locator('#llmContent'); + // Should have a table with headers + const table = content.locator('table'); + await expect(table).toBeVisible(); + await expect(table).toContainText('Tool'); + await expect(table).toContainText('Calls'); + await expect(table).toContainText('Errors'); + await expect(table).toContainText('p50'); + await expect(table).toContainText('p99'); + await expect(table).toContainText('Cost'); + // Should have at least 1 data row (from seeded tool spans) + const rows = table.locator('tbody tr'); + await expect(rows).not.toHaveCount(0); + }); + + test('Sessions tab — list renders', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Sessions'); + const content = page.locator('#llmContent'); + // Should have at least one session card (session-e2e-1 from seed) + await expect(content).toContainText('session-e2e-1'); + }); + + test('Sessions tab — drill into session', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Sessions'); + const content = page.locator('#llmContent'); + // Click the session card + const sessionCard = content.locator('div', { hasText: 'session-e2e-1' }).first(); + await sessionCard.click(); + // Should show "Back to sessions" button + await expect(content.locator('button', { hasText: 'Back to sessions' })).toBeVisible(); + // Should show trace cards within session + await expect(content).toContainText('Traces'); + }); + + test('Sessions tab — drill into trace from session', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Sessions'); + const content = page.locator('#llmContent'); + // Navigate to session detail + await content.locator('div', { hasText: 'session-e2e-1' }).first().click(); + await expect(content.locator('button', { hasText: 'Back to sessions' })).toBeVisible(); + // Click a trace card + const traceCard = content.locator('div[style*="cursor:pointer"]').first(); + await traceCard.click(); + // Should show trace detail with "Back to traces" and span info + await expect(content.locator('button', { hasText: 'Back to traces' })).toBeVisible(); + }); + + test('Traces tab — trace list renders', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Traces'); + const content = page.locator('#llmContent'); + // Should show total traces count + await expect(content).toContainText('total traces'); + // Should show at least one trace row + await expect(content).toContainText('spans'); + }); + + test('Traces tab — trace detail with span tree', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Traces'); + const content = page.locator('#llmContent'); + // Click the first trace row (should be a clickable div) + const traceRow = content.locator('div[style*="cursor:pointer"]').first(); + await traceRow.click(); + // Wait for trace detail to load + await expect(content.locator('button', { hasText: 'Back to traces' })).toBeVisible(); + // Should show spans + await expect(content).toContainText('Spans'); + }); + + test('Trace detail — span tree hierarchy', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Traces'); + const content = page.locator('#llmContent'); + // Find and click the trace that has nested spans (chat-completion-e2e / trace-e2e-001) + const traceRow = content.locator('div[style*="cursor:pointer"]', { hasText: 'chat-completion-e2e' }); + // It might be in the list - click it + if (await traceRow.count() > 0) { + await traceRow.first().click(); + } else { + // Click first trace and navigate from there + await content.locator('div[style*="cursor:pointer"]').first().click(); + } + await expect(content.locator('button', { hasText: 'Back to traces' })).toBeVisible(); + // Check for span-children (nested spans) — our trace-e2e-001 has child spans + const spanChildren = content.locator('.span-children'); + // If this trace has children, verify the tree + if (await spanChildren.count() > 0) { + await expect(spanChildren.first()).toBeVisible(); + // Find collapse toggle (▼) and click it + const toggle = content.locator('span', { hasText: '▼' }).first(); + if (await toggle.count() > 0) { + await toggle.click(); + // Children should be hidden + await expect(spanChildren.first()).toBeHidden(); + // Click again to re-expand + await content.locator('span', { hasText: '▶' }).first().click(); + await expect(spanChildren.first()).toBeVisible(); + } + } + }); + + test('Trace detail — feedback buttons', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Traces'); + const content = page.locator('#llmContent'); + // Click first trace + await content.locator('div[style*="cursor:pointer"]').first().click(); + await expect(content.locator('button', { hasText: 'Back to traces' })).toBeVisible(); + // Should see Feedback section with thumbs up/down buttons + await expect(content).toContainText('Feedback'); + const thumbsUp = content.locator('button', { hasText: '👍' }); + const thumbsDown = content.locator('button', { hasText: '👎' }); + await expect(thumbsUp).toBeVisible(); + await expect(thumbsDown).toBeVisible(); + // Click thumbs up + await thumbsUp.click(); + // Should show toast + const toast = page.locator('.toast', { hasText: 'Feedback submitted' }); + await expect(toast).toBeVisible({ timeout: 5000 }); + }); + + test('Feedback tab — summary stats', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Feedback'); + const content = page.locator('#llmContent'); + // Should show stat cards: Total, Positive, Negative, Positive Rate + await expect(content).toContainText('Total'); + await expect(content).toContainText('Positive'); + await expect(content).toContainText('Negative'); + await expect(content).toContainText('Positive Rate'); + }); + + test('Prompts tab — list with clickable names', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Prompts'); + const content = page.locator('#llmContent'); + // Should show a table with prompt names + const table = content.locator('table'); + await expect(table).toBeVisible(); + await expect(table).toContainText('Prompt Name'); + // Should have our seeded prompt "summarizer" + await expect(content).toContainText('summarizer'); + // Click the prompt name + const promptName = content.locator('td', { hasText: 'summarizer' }); + await promptName.click(); + // Should navigate to versions view with "Back to prompts" + await expect(content.locator('button', { hasText: 'Back to prompts' })).toBeVisible(); + }); + + test('Prompts tab — version comparison', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Prompts'); + const content = page.locator('#llmContent'); + // Click into summarizer prompt + await content.locator('td', { hasText: 'summarizer' }).click(); + await expect(content.locator('button', { hasText: 'Back to prompts' })).toBeVisible(); + // Should show "Compare Selected" button (initially disabled) + const compareBtn = content.locator('button', { hasText: 'Compare Selected' }); + await expect(compareBtn).toBeVisible(); + await expect(compareBtn).toHaveCSS('opacity', '0.4'); + // Check two version checkboxes + const checkboxes = content.locator('input[type="checkbox"]'); + const cbCount = await checkboxes.count(); + if (cbCount >= 2) { + await checkboxes.nth(0).check(); + await checkboxes.nth(1).check(); + // Compare button should now be active + await expect(compareBtn).toHaveCSS('opacity', '1'); + // Click compare + await compareBtn.click(); + // Should show comparison table with Metric, v1, v2, Delta columns + await expect(content).toContainText('Metric'); + await expect(content).toContainText('Delta'); + await expect(content.locator('button', { hasText: 'Back to versions' })).toBeVisible(); + } + }); + + test('Search tab — query and results', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Search'); + const content = page.locator('#llmContent'); + // Should show search input + const input = content.locator('input[type="text"]'); + await expect(input).toBeVisible(); + // Type a query matching our seeded data + await input.fill('quantum'); + await input.press('Enter'); + // Wait for results to load + await page.waitForFunction( + () => !document.querySelector('#llmContent .insights-loading'), + { timeout: 10_000 }, + ); + // Should show results or empty state + const hasResults = await content.locator('div[style*="cursor:pointer"]').count(); + if (hasResults > 0) { + // Found matching traces + await expect(content).toContainText('matching traces'); + } else { + // Empty state is also valid + await expect(content).toContainText('No traces found'); + } + }); + + test('Search tab — empty query shows placeholder', async ({ page }) => { + await openLlmPanel(page); + await switchTab(page, 'Search'); + const content = page.locator('#llmContent'); + await expect(content).toContainText('Enter a query to search traces'); + }); +}); diff --git a/tests/e2e/package-lock.json b/tests/e2e/package-lock.json new file mode 100644 index 0000000..a3ae6e6 --- /dev/null +++ b/tests/e2e/package-lock.json @@ -0,0 +1,75 @@ +{ + "name": "e2e", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "devDependencies": { + "@playwright/test": "^1.50.0" + } + }, + "node_modules/@playwright/test": { + "version": "1.58.2", + "resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.58.2.tgz", + "integrity": "sha512-akea+6bHYBBfA9uQqSYmlJXn61cTa+jbO87xVLCWbTqbWadRVmhxlXATaOjOgcBaWU4ePo0wB41KMFv3o35IXA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "playwright": "1.58.2" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/fsevents": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", + "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/playwright": { + "version": "1.58.2", + "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.58.2.tgz", + "integrity": "sha512-vA30H8Nvkq/cPBnNw4Q8TWz1EJyqgpuinBcHET0YVJVFldr8JDNiU9LaWAE1KqSkRYazuaBhTpB5ZzShOezQ6A==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "playwright-core": "1.58.2" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "fsevents": "2.3.2" + } + }, + "node_modules/playwright-core": { + "version": "1.58.2", + "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.58.2.tgz", + "integrity": "sha512-yZkEtftgwS8CsfYo7nm0KE8jsvm6i/PTgVtB8DL726wNf6H2IMsDuxCpJj59KDaxCtSnrWan2AeDqM7JBaultg==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "playwright-core": "cli.js" + }, + "engines": { + "node": ">=18" + } + } + } +} diff --git a/tests/e2e/package.json b/tests/e2e/package.json new file mode 100644 index 0000000..a47c262 --- /dev/null +++ b/tests/e2e/package.json @@ -0,0 +1,10 @@ +{ + "private": true, + "scripts": { + "test": "playwright test", + "test:headed": "playwright test --headed" + }, + "devDependencies": { + "@playwright/test": "^1.50.0" + } +} diff --git a/tests/e2e/playwright.config.ts b/tests/e2e/playwright.config.ts new file mode 100644 index 0000000..03d0a02 --- /dev/null +++ b/tests/e2e/playwright.config.ts @@ -0,0 +1,18 @@ +import { defineConfig } from '@playwright/test'; + +export default defineConfig({ + testDir: '.', + testMatch: '*.spec.ts', + globalSetup: './global-setup.ts', + globalTeardown: './global-teardown.ts', + timeout: 30_000, + retries: process.env.CI ? 1 : 0, + use: { + baseURL: process.env.BLOOP_TEST_URL || 'http://localhost:5332', + screenshot: 'only-on-failure', + trace: 'retain-on-failure', + }, + projects: [ + { name: 'chromium', use: { browserName: 'chromium' } }, + ], +}); diff --git a/tests/e2e/seed.ts b/tests/e2e/seed.ts new file mode 100644 index 0000000..a5bd61f --- /dev/null +++ b/tests/e2e/seed.ts @@ -0,0 +1,213 @@ +import { createHmac } from 'crypto'; + +export interface SeedConfig { + baseUrl: string; + hmacSecret: string; // legacy HMAC secret for trace ingestion + sessionToken: string; // session cookie for query/mutation endpoints +} + +function sign(secret: string, body: string): string { + return createHmac('sha256', secret).update(body).digest('hex'); +} + +/** POST with HMAC auth (for /v1/traces ingest) */ +async function hmacPost(config: SeedConfig, path: string, body: object) { + const json = JSON.stringify(body); + const sig = sign(config.hmacSecret, json); + const resp = await fetch(`${config.baseUrl}${path}`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'X-Signature': sig, + 'X-Forwarded-For': '127.0.0.1', + }, + body: json, + }); + if (!resp.ok) { + const text = await resp.text(); + throw new Error(`POST ${path} failed (${resp.status}): ${text}`); + } + return resp.json(); +} + +/** Fetch with session cookie auth (for query/mutation endpoints) */ +async function sessionFetch(config: SeedConfig, path: string, opts: RequestInit = {}) { + const resp = await fetch(`${config.baseUrl}${path}`, { + ...opts, + headers: { + ...opts.headers as Record, + 'Cookie': `bloop_session=${config.sessionToken}`, + 'X-Forwarded-For': '127.0.0.1', + }, + }); + return resp; +} + +export async function seed(config: SeedConfig) { + // ── Trace 1: Successful trace with nested spans ── + await hmacPost(config, '/v1/traces', { + id: 'trace-e2e-001', + name: 'chat-completion-e2e', + status: 'completed', + session_id: 'session-e2e-1', + user_id: 'e2e-user', + spans: [ + { + id: 'span-e2e-root', + span_type: 'generation', + name: 'root-generation', + model: 'gpt-4o', + provider: 'openai', + input_tokens: 200, + output_tokens: 100, + cost: 0.005, + latency_ms: 1500, + time_to_first_token_ms: 200, + status: 'ok', + input: 'Explain quantum computing', + output: 'Quantum computing uses qubits...', + }, + { + id: 'span-e2e-child1', + parent_span_id: 'span-e2e-root', + span_type: 'tool', + name: 'search-tool', + model: 'gpt-4o', + provider: 'openai', + input_tokens: 50, + output_tokens: 30, + cost: 0.001, + latency_ms: 500, + status: 'ok', + }, + { + id: 'span-e2e-child2', + parent_span_id: 'span-e2e-root', + span_type: 'retrieval', + name: 'doc-retrieval', + input_tokens: 20, + output_tokens: 80, + cost: 0.0005, + latency_ms: 300, + status: 'ok', + }, + ], + }); + + // ── Trace 2: Error trace in same session ── + await hmacPost(config, '/v1/traces', { + id: 'trace-e2e-002', + name: 'failed-generation', + status: 'error', + session_id: 'session-e2e-1', + user_id: 'e2e-user', + spans: [ + { + id: 'span-e2e-err', + span_type: 'generation', + name: 'error-span', + model: 'claude-3-opus', + provider: 'anthropic', + input_tokens: 150, + output_tokens: 0, + cost: 0.003, + latency_ms: 5000, + status: 'error', + error_message: 'Rate limit exceeded', + }, + ], + }); + + // ── Trace 3: Trace with prompt_name/prompt_version v1 ── + await hmacPost(config, '/v1/traces', { + id: 'trace-e2e-003', + name: 'summarize-article', + status: 'completed', + prompt_name: 'summarizer', + prompt_version: '1', + spans: [ + { + id: 'span-e2e-prompt1', + span_type: 'generation', + name: 'summarize-gen', + model: 'gpt-4o', + provider: 'openai', + input_tokens: 500, + output_tokens: 150, + cost: 0.008, + latency_ms: 900, + status: 'ok', + input: 'Summarize: The field of quantum computing...', + output: 'Summary: Quantum computing leverages...', + }, + ], + }); + + // ── Trace 4: Second version of same prompt ── + await hmacPost(config, '/v1/traces', { + id: 'trace-e2e-004', + name: 'summarize-article-v2', + status: 'completed', + prompt_name: 'summarizer', + prompt_version: '2', + spans: [ + { + id: 'span-e2e-prompt2', + span_type: 'generation', + name: 'summarize-gen-v2', + model: 'gpt-4o', + provider: 'openai', + input_tokens: 400, + output_tokens: 120, + cost: 0.006, + latency_ms: 700, + status: 'ok', + }, + ], + }); + + console.log('Seed data ingested: 4 traces, 6 spans'); +} + +/** + * Seed data that depends on traces existing in SQLite (call after flush wait). + * Uses session auth for mutation endpoints. + */ +export async function seedPostFlush(config: SeedConfig) { + // ── Feedback on trace 1 ── + const fbResp = await sessionFetch(config, '/v1/llm/traces/trace-e2e-001/feedback', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ user_id: 'e2e-tester', value: 1, comment: 'Good response' }), + }); + if (!fbResp.ok) { + console.warn(`Feedback seed failed: ${fbResp.status} ${await fbResp.text()}`); + } + + // ── Scores on trace 1 ── + for (const score of [ + { name: 'quality', value: 0.85 }, + { name: 'relevance', value: 0.72 }, + ]) { + const scoreResp = await sessionFetch(config, '/v1/llm/traces/trace-e2e-001/scores', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(score), + }); + if (!scoreResp.ok) { + console.warn(`Score seed (${score.name}) failed: ${scoreResp.status} ${await scoreResp.text()}`); + } + } + + // ── Budget ── + const budgetResp = await sessionFetch(config, '/v1/llm/budget', { + method: 'PUT', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ monthly_budget_micros: 50_000_000, alert_threshold_pct: 80 }), + }); + if (!budgetResp.ok) { + console.warn(`Budget seed failed: ${budgetResp.status} ${await budgetResp.text()}`); + } + + console.log('Post-flush seed: 1 feedback, 2 scores, 1 budget'); +} From 74cb84f3a12122c0e1eef0f76e71d3c90b678954 Mon Sep 17 00:00:00 2001 From: Jonathan Conway Date: Mon, 16 Feb 2026 23:32:54 +0000 Subject: [PATCH 2/2] fix: align E2E tests with remote main dashboard features Remote main has 8 LLM tabs (no Sessions, Tools, Feedback UI). Remove tests for features not in remote: budget gauge/edit, feedback buttons/tab, sessions drill-down, tools table, prompt version comparison, span tree hierarchy. Use flexible tab count assertion. --- tests/e2e/llm-dashboard.spec.ts | 192 ++------------------------------ tests/e2e/seed.ts | 24 +--- 2 files changed, 10 insertions(+), 206 deletions(-) diff --git a/tests/e2e/llm-dashboard.spec.ts b/tests/e2e/llm-dashboard.spec.ts index d46143d..c633d1d 100644 --- a/tests/e2e/llm-dashboard.spec.ts +++ b/tests/e2e/llm-dashboard.spec.ts @@ -40,15 +40,16 @@ async function switchTab(page: Page, tabName: string) { } test.describe('LLM Dashboard', () => { - test('tab layout — 11 tabs in panel', async ({ page }) => { + test('tab layout — expected tabs in panel', async ({ page }) => { await openLlmPanel(page); const tabs = page.locator('#llmPanel .insights-tab'); - await expect(tabs).toHaveCount(11); + const count = await tabs.count(); + expect(count).toBeGreaterThanOrEqual(8); const names = await tabs.allTextContents(); - expect(names).toEqual([ - 'Overview', 'Usage', 'Latency', 'Models', 'Traces', - 'Search', 'Prompts', 'Scores', 'Sessions', 'Tools', 'Feedback', - ]); + // These 8 tabs are always present + for (const expected of ['Overview', 'Usage', 'Latency', 'Models', 'Traces', 'Search', 'Prompts', 'Scores']) { + expect(names).toContain(expected); + } }); test('Overview tab — stat cards render', async ({ page }) => { @@ -66,34 +67,6 @@ test.describe('LLM Dashboard', () => { await expect(content).toContainText('Error Rate'); }); - test('Overview tab — budget gauge visible', async ({ page }) => { - await openLlmPanel(page); - await switchTab(page, 'Overview'); - const content = page.locator('#llmContent'); - // Budget was seeded — should show "Monthly Budget" section with Edit button - await expect(content).toContainText('Monthly Budget'); - await expect(content.locator('button', { hasText: 'Edit' })).toBeVisible(); - }); - - test('Overview tab — budget edit form', async ({ page }) => { - await openLlmPanel(page); - await switchTab(page, 'Overview'); - const content = page.locator('#llmContent'); - // Click Edit to go to budget form - await content.locator('button', { hasText: 'Edit' }).click(); - // Budget form should have inputs and Save button - await expect(content.locator('input[type="number"]').first()).toBeVisible(); - await expect(content.locator('button', { hasText: 'Save Budget' })).toBeVisible(); - // Fill in new budget - const amtInput = content.locator('input[type="number"]').first(); - await amtInput.fill('200'); - // Click Save - await content.locator('button', { hasText: 'Save Budget' }).click(); - // Should show toast - const toast = page.locator('.toast', { hasText: 'Budget saved' }); - await expect(toast).toBeVisible({ timeout: 5000 }); - }); - test('Usage tab — table renders', async ({ page }) => { await openLlmPanel(page); await switchTab(page, 'Usage'); @@ -150,59 +123,6 @@ test.describe('LLM Dashboard', () => { await expect(content).toContainText('scores'); }); - test('Tools tab — table renders', async ({ page }) => { - await openLlmPanel(page); - await switchTab(page, 'Tools'); - const content = page.locator('#llmContent'); - // Should have a table with headers - const table = content.locator('table'); - await expect(table).toBeVisible(); - await expect(table).toContainText('Tool'); - await expect(table).toContainText('Calls'); - await expect(table).toContainText('Errors'); - await expect(table).toContainText('p50'); - await expect(table).toContainText('p99'); - await expect(table).toContainText('Cost'); - // Should have at least 1 data row (from seeded tool spans) - const rows = table.locator('tbody tr'); - await expect(rows).not.toHaveCount(0); - }); - - test('Sessions tab — list renders', async ({ page }) => { - await openLlmPanel(page); - await switchTab(page, 'Sessions'); - const content = page.locator('#llmContent'); - // Should have at least one session card (session-e2e-1 from seed) - await expect(content).toContainText('session-e2e-1'); - }); - - test('Sessions tab — drill into session', async ({ page }) => { - await openLlmPanel(page); - await switchTab(page, 'Sessions'); - const content = page.locator('#llmContent'); - // Click the session card - const sessionCard = content.locator('div', { hasText: 'session-e2e-1' }).first(); - await sessionCard.click(); - // Should show "Back to sessions" button - await expect(content.locator('button', { hasText: 'Back to sessions' })).toBeVisible(); - // Should show trace cards within session - await expect(content).toContainText('Traces'); - }); - - test('Sessions tab — drill into trace from session', async ({ page }) => { - await openLlmPanel(page); - await switchTab(page, 'Sessions'); - const content = page.locator('#llmContent'); - // Navigate to session detail - await content.locator('div', { hasText: 'session-e2e-1' }).first().click(); - await expect(content.locator('button', { hasText: 'Back to sessions' })).toBeVisible(); - // Click a trace card - const traceCard = content.locator('div[style*="cursor:pointer"]').first(); - await traceCard.click(); - // Should show trace detail with "Back to traces" and span info - await expect(content.locator('button', { hasText: 'Back to traces' })).toBeVisible(); - }); - test('Traces tab — trace list renders', async ({ page }) => { await openLlmPanel(page); await switchTab(page, 'Traces'); @@ -226,70 +146,7 @@ test.describe('LLM Dashboard', () => { await expect(content).toContainText('Spans'); }); - test('Trace detail — span tree hierarchy', async ({ page }) => { - await openLlmPanel(page); - await switchTab(page, 'Traces'); - const content = page.locator('#llmContent'); - // Find and click the trace that has nested spans (chat-completion-e2e / trace-e2e-001) - const traceRow = content.locator('div[style*="cursor:pointer"]', { hasText: 'chat-completion-e2e' }); - // It might be in the list - click it - if (await traceRow.count() > 0) { - await traceRow.first().click(); - } else { - // Click first trace and navigate from there - await content.locator('div[style*="cursor:pointer"]').first().click(); - } - await expect(content.locator('button', { hasText: 'Back to traces' })).toBeVisible(); - // Check for span-children (nested spans) — our trace-e2e-001 has child spans - const spanChildren = content.locator('.span-children'); - // If this trace has children, verify the tree - if (await spanChildren.count() > 0) { - await expect(spanChildren.first()).toBeVisible(); - // Find collapse toggle (▼) and click it - const toggle = content.locator('span', { hasText: '▼' }).first(); - if (await toggle.count() > 0) { - await toggle.click(); - // Children should be hidden - await expect(spanChildren.first()).toBeHidden(); - // Click again to re-expand - await content.locator('span', { hasText: '▶' }).first().click(); - await expect(spanChildren.first()).toBeVisible(); - } - } - }); - - test('Trace detail — feedback buttons', async ({ page }) => { - await openLlmPanel(page); - await switchTab(page, 'Traces'); - const content = page.locator('#llmContent'); - // Click first trace - await content.locator('div[style*="cursor:pointer"]').first().click(); - await expect(content.locator('button', { hasText: 'Back to traces' })).toBeVisible(); - // Should see Feedback section with thumbs up/down buttons - await expect(content).toContainText('Feedback'); - const thumbsUp = content.locator('button', { hasText: '👍' }); - const thumbsDown = content.locator('button', { hasText: '👎' }); - await expect(thumbsUp).toBeVisible(); - await expect(thumbsDown).toBeVisible(); - // Click thumbs up - await thumbsUp.click(); - // Should show toast - const toast = page.locator('.toast', { hasText: 'Feedback submitted' }); - await expect(toast).toBeVisible({ timeout: 5000 }); - }); - - test('Feedback tab — summary stats', async ({ page }) => { - await openLlmPanel(page); - await switchTab(page, 'Feedback'); - const content = page.locator('#llmContent'); - // Should show stat cards: Total, Positive, Negative, Positive Rate - await expect(content).toContainText('Total'); - await expect(content).toContainText('Positive'); - await expect(content).toContainText('Negative'); - await expect(content).toContainText('Positive Rate'); - }); - - test('Prompts tab — list with clickable names', async ({ page }) => { + test('Prompts tab — table renders', async ({ page }) => { await openLlmPanel(page); await switchTab(page, 'Prompts'); const content = page.locator('#llmContent'); @@ -299,39 +156,6 @@ test.describe('LLM Dashboard', () => { await expect(table).toContainText('Prompt Name'); // Should have our seeded prompt "summarizer" await expect(content).toContainText('summarizer'); - // Click the prompt name - const promptName = content.locator('td', { hasText: 'summarizer' }); - await promptName.click(); - // Should navigate to versions view with "Back to prompts" - await expect(content.locator('button', { hasText: 'Back to prompts' })).toBeVisible(); - }); - - test('Prompts tab — version comparison', async ({ page }) => { - await openLlmPanel(page); - await switchTab(page, 'Prompts'); - const content = page.locator('#llmContent'); - // Click into summarizer prompt - await content.locator('td', { hasText: 'summarizer' }).click(); - await expect(content.locator('button', { hasText: 'Back to prompts' })).toBeVisible(); - // Should show "Compare Selected" button (initially disabled) - const compareBtn = content.locator('button', { hasText: 'Compare Selected' }); - await expect(compareBtn).toBeVisible(); - await expect(compareBtn).toHaveCSS('opacity', '0.4'); - // Check two version checkboxes - const checkboxes = content.locator('input[type="checkbox"]'); - const cbCount = await checkboxes.count(); - if (cbCount >= 2) { - await checkboxes.nth(0).check(); - await checkboxes.nth(1).check(); - // Compare button should now be active - await expect(compareBtn).toHaveCSS('opacity', '1'); - // Click compare - await compareBtn.click(); - // Should show comparison table with Metric, v1, v2, Delta columns - await expect(content).toContainText('Metric'); - await expect(content).toContainText('Delta'); - await expect(content.locator('button', { hasText: 'Back to versions' })).toBeVisible(); - } }); test('Search tab — query and results', async ({ page }) => { diff --git a/tests/e2e/seed.ts b/tests/e2e/seed.ts index a5bd61f..40bb5b8 100644 --- a/tests/e2e/seed.ts +++ b/tests/e2e/seed.ts @@ -174,17 +174,7 @@ export async function seed(config: SeedConfig) { * Uses session auth for mutation endpoints. */ export async function seedPostFlush(config: SeedConfig) { - // ── Feedback on trace 1 ── - const fbResp = await sessionFetch(config, '/v1/llm/traces/trace-e2e-001/feedback', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ user_id: 'e2e-tester', value: 1, comment: 'Good response' }), - }); - if (!fbResp.ok) { - console.warn(`Feedback seed failed: ${fbResp.status} ${await fbResp.text()}`); - } - - // ── Scores on trace 1 ── + // ── Scores on trace 1 (requires trace to exist in SQLite) ── for (const score of [ { name: 'quality', value: 0.85 }, { name: 'relevance', value: 0.72 }, @@ -199,15 +189,5 @@ export async function seedPostFlush(config: SeedConfig) { } } - // ── Budget ── - const budgetResp = await sessionFetch(config, '/v1/llm/budget', { - method: 'PUT', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ monthly_budget_micros: 50_000_000, alert_threshold_pct: 80 }), - }); - if (!budgetResp.ok) { - console.warn(`Budget seed failed: ${budgetResp.status} ${await budgetResp.text()}`); - } - - console.log('Post-flush seed: 1 feedback, 2 scores, 1 budget'); + console.log('Post-flush seed: 2 scores'); }