From 184daa71d066ce19ff6b79c643dadee813e4b45a Mon Sep 17 00:00:00 2001 From: hshum Date: Wed, 20 May 2026 16:33:13 -0700 Subject: [PATCH] fix(redesign): R34 CI fix + gen-over-gen QA pipeline + inbox stagger animations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the `record` CI check failure (pr-demo-video workflow needed npm install instead of npm ci since package-lock.json is gitignored). Upgrade the Gemini QA pipeline with generation-over-generation milestone tracking, semantic P1 fingerprinting, and dimension trend analysis. Add stagger fade-in animations to inbox list items and smoother surface content transitions. - CI: Remove `cache: npm` from setup-node, switch npm ci → npm install - Pipeline: Milestone diff engine (baseline, gen boundaries, midpoint, previous) - Pipeline: Semantic P1 fingerprinting for stochastic variance filtering - Pipeline: Dimension trend arrows with velocity, scenario stability badges - UI: rd-stagger-fade-in keyframe for inbox rows (40ms cascade, 400ms cap) - UI: Transition smoothness on inbox-list, v3-card, inbox-row elements - UI: prefers-reduced-motion respected for new animations Co-Authored-By: Claude Opus 4.6 --- .github/workflows/pr-demo-video.yml | 5 +- scripts/ui/interactiveGeminiQaPipeline.mjs | 605 +++++++++++++++--- src/features/redesign/primitives.css | 18 + .../redesign/surfaces/InboxSurface.tsx | 6 +- 4 files changed, 550 insertions(+), 84 deletions(-) diff --git a/.github/workflows/pr-demo-video.yml b/.github/workflows/pr-demo-video.yml index 98e205f2..32d02e1e 100644 --- a/.github/workflows/pr-demo-video.yml +++ b/.github/workflows/pr-demo-video.yml @@ -50,10 +50,11 @@ jobs: uses: actions/setup-node@v4 with: node-version: 20 - cache: npm + # NOTE: `cache: npm` removed — package-lock.json is gitignored + # in this repo, so the action fails with "lock file not found". - name: Install dependencies - run: npm ci + run: npm install --legacy-peer-deps - name: Install Playwright browsers run: npx playwright install --with-deps chromium diff --git a/scripts/ui/interactiveGeminiQaPipeline.mjs b/scripts/ui/interactiveGeminiQaPipeline.mjs index 7ddfb928..43bd7216 100644 --- a/scripts/ui/interactiveGeminiQaPipeline.mjs +++ b/scripts/ui/interactiveGeminiQaPipeline.mjs @@ -525,9 +525,18 @@ async function evaluateVideos(videos) { } /* ═══════════════════════════════════════════════════════════════ - PHASE 5: Cross-Round Comparison (new) + PHASE 5: Cross-Round Comparison — Gen-over-Gen Milestone Engine + ═══════════════════════════════════════════════════════════════ + Upgraded from simple prev-round diff to full generational analysis: + - Milestone comparisons (vs baseline, midpoints, every 5th round) + - Semantic P1 fingerprinting (dedup stochastic re-flags by description) + - Video dimension trends across ALL rounds (not just score) + - All-time dimension stats with trend direction + velocity + - Generation segmentation (Gen 1 = R11-R15, Gen 2 = R16-R20, etc.) ═══════════════════════════════════════════════════════════════ */ +const GEN_SIZE = 5; // Rounds per generation + function extractAllP1Ids(screenshotEval) { if (!screenshotEval?.surfaceScores) return []; const ids = []; @@ -550,199 +559,632 @@ function extractAllP1Details(screenshotEval) { return details; } +/** + * Semantic P1 fingerprint — normalize issue text to catch stochastic + * re-flags that get different issueId each round but describe the same bug. + * E.g. "p1-home-3" and "p1-home-2" with text "Chat now button low contrast" + * should be recognized as the same issue. + */ +function p1Fingerprint(p1) { + const text = (p1.issue || p1.issueId || "").toLowerCase() + .replace(/[^a-z0-9\s]/g, "") // strip punctuation + .replace(/\s+/g, " ") // normalize whitespace + .trim(); + const surface = (p1.surface || "").toLowerCase(); + // Use first 60 chars of normalized issue text + surface as fingerprint + return `${surface}::${text.slice(0, 60)}`; +} + +/** + * Group rounds into generations. Gen 1 = first GEN_SIZE rounds, etc. + */ +function assignGeneration(roundNumber, baselineRound) { + return Math.floor((roundNumber - baselineRound) / GEN_SIZE) + 1; +} + +/** + * Select milestone rounds for comparison from history: + * - Baseline (first round) + * - Midpoint (50% through history) + * - Every GEN_SIZE-th round (generation boundaries) + * - Previous round + */ +function selectMilestoneRounds(history, currentRound) { + if (history.length === 0) return []; + const milestones = []; + const seen = new Set(); + + // Baseline (first round) + const baseline = history[0]; + milestones.push({ tag: "baseline", round: baseline }); + seen.add(baseline.roundNumber); + + // Generation boundaries (every GEN_SIZE rounds) + const baseRound = history[0].roundNumber; + for (const r of history) { + const gen = assignGeneration(r.roundNumber, baseRound); + const genStart = baseRound + (gen - 1) * GEN_SIZE; + // Pick the first round of each generation as boundary marker + if (r.roundNumber === genStart && !seen.has(r.roundNumber)) { + milestones.push({ tag: `gen-${gen}-start`, round: r }); + seen.add(r.roundNumber); + } + } + + // Midpoint + const midIdx = Math.floor(history.length / 2); + if (!seen.has(history[midIdx].roundNumber)) { + milestones.push({ tag: "midpoint", round: history[midIdx] }); + seen.add(history[midIdx].roundNumber); + } + + // Previous round (most recent in history) + const prev = history[history.length - 1]; + if (!seen.has(prev.roundNumber)) { + milestones.push({ tag: "previous", round: prev }); + seen.add(prev.roundNumber); + } + + return milestones.sort((a, b) => a.round.roundNumber - b.round.roundNumber); +} + +/** + * Compute a milestone diff: current vs reference round. + */ +function computeMilestoneDiff(current, reference, tag) { + const currScore = current.screenshotEval?.overallScore || 0; + const refScore = reference.screenshotEval?.overallScore || 0; + const currP1s = extractAllP1Details(current.screenshotEval); + const refP1s = extractAllP1Details(reference.screenshotEval); + + // Semantic fingerprint diff + const currFingerprints = new Set(currP1s.map(p1Fingerprint)); + const refFingerprints = new Set(refP1s.map(p1Fingerprint)); + const resolvedFingerprints = [...refFingerprints].filter(f => !currFingerprints.has(f)); + const newFingerprints = [...currFingerprints].filter(f => !refFingerprints.has(f)); + + // Scenario diff + const currScenarios = current.screenshotEval?.scenarioResults || {}; + const refScenarios = reference.screenshotEval?.scenarioResults || {}; + const currPass = Object.values(currScenarios).filter(r => r.pass).length; + const refPass = Object.values(refScenarios).filter(r => r.pass).length; + + // Video diff + const currVideoScore = current.videoEval?.overallInteractionScore || null; + const refVideoScore = reference.videoEval?.overallInteractionScore || null; + + // Dimension diff (screenshot) + const dimNames = ["visualHierarchy", "touchTargets", "contentDensity", "typography", "firstImpression", + "glassDesignSystem", "navigation", "emptyLoadingStates", "responsiveParity", "interactionQuality"]; + const dimDiffs = {}; + for (const dim of dimNames) { + const currAvg = avgDimension(current.screenshotEval, dim); + const refAvg = avgDimension(reference.screenshotEval, dim); + if (currAvg != null && refAvg != null) { + dimDiffs[dim] = { current: currAvg, reference: refAvg, delta: +(currAvg - refAvg).toFixed(1) }; + } + } + + // Video dimension diff + const videoDimDiffs = {}; + const vidDimNames = ["transitionSmoothness", "loadingTiming", "statePersistence", "dataLoading", + "interactionResponsiveness", "errorHandling", "navigationFlow", "animationQuality"]; + for (const dim of vidDimNames) { + const curr = current.videoEval?.dimensions?.[dim] ?? null; + const ref = reference.videoEval?.dimensions?.[dim] ?? null; + if (curr != null && ref != null) { + videoDimDiffs[dim] = { current: curr, reference: ref, delta: curr - ref }; + } + } + + return { + tag, + referenceRound: reference.roundNumber, + referenceLabel: reference.label || "", + roundsApart: current.roundNumber - reference.roundNumber, + scoreDiff: { current: currScore, reference: refScore, delta: currScore - refScore }, + p1Diff: { + current: currP1s.length, reference: refP1s.length, delta: currP1s.length - refP1s.length, + resolvedSemanticCount: resolvedFingerprints.length, + newSemanticCount: newFingerprints.length, + }, + scenarioDiff: { current: currPass, reference: refPass, delta: currPass - refPass }, + videoDiff: currVideoScore != null && refVideoScore != null + ? { current: currVideoScore, reference: refVideoScore, delta: currVideoScore - refVideoScore } + : null, + dimensionDiffs: dimDiffs, + videoDimensionDiffs: videoDimDiffs, + }; +} + +function avgDimension(screenshotEval, dim) { + if (!screenshotEval?.surfaceScores) return null; + const scores = Object.values(screenshotEval.surfaceScores) + .map(s => s.dimensions?.[dim]).filter(v => v != null); + return scores.length > 0 ? +(scores.reduce((a, b) => a + b, 0) / scores.length).toFixed(1) : null; +} + +/** + * Compute trend direction and velocity from a numeric series. + * Returns: { direction: "up"|"down"|"flat", velocity: number, start, end, range } + */ +function computeTrend(series) { + const valid = series.filter(v => v != null); + if (valid.length < 2) return { direction: "flat", velocity: 0, start: valid[0] ?? null, end: valid[0] ?? null, range: 0 }; + const start = valid[0]; + const end = valid[valid.length - 1]; + const delta = end - start; + const velocity = +(delta / valid.length).toFixed(2); + const range = Math.max(...valid) - Math.min(...valid); + const direction = Math.abs(delta) < 0.5 ? "flat" : delta > 0 ? "up" : "down"; + return { direction, velocity, start, end, range: +range.toFixed(1) }; +} + function generateCrossRoundComparison(currentRound, history) { - console.log("\n[Phase 5] Generating cross-round comparison ...\n"); + console.log("\n[Phase 5] Generating gen-over-gen cross-round comparison ...\n"); + + const allRounds = [...history, currentRound]; + const baselineRound = allRounds[0]?.roundNumber || currentRound.roundNumber; const comparison = { currentRound: currentRound.roundNumber, - totalRoundsCompleted: history.length + 1, + currentGeneration: assignGeneration(currentRound.roundNumber, baselineRound), + totalRoundsCompleted: allRounds.length, + totalGenerations: assignGeneration(currentRound.roundNumber, baselineRound), timestamp: currentRound.timestamp, - // Score trajectory + // === Full score trajectory (all rounds) === scoreTrajectory: [], - // P1 tracking - p1Tracking: { resolved: [], regressed: [], persistent: [], new: [] }, + // === Milestone diffs: current vs baseline, midpoint, gen boundaries, previous === + milestoneDiffs: [], - // Dimension trends (averaged across surfaces) + // === Generation summary: aggregated stats per generation === + generationSummary: [], + + // === P1 tracking with semantic fingerprinting === + p1Tracking: { resolved: [], regressed: [], persistent: [], new: [], stochastic: [] }, + + // === ALL-TIME dimension trends (every round, not just last 5) === dimensionTrends: {}, + dimensionStats: {}, - // Scenario pass rate over time - scenarioTrends: {}, + // === Video dimension trends (all rounds) === + videoDimensionTrends: {}, + videoDimensionStats: {}, - // Video score trajectory (if available) + // === Video score trajectory === videoScoreTrajectory: [], - // Summary + // === Scenario trends (all rounds) === + scenarioTrends: {}, + scenarioStats: {}, + + // === Summary + actions === summary: "", + generationNarrative: "", nextActions: [], }; - // Build score trajectory from history + current - const allRounds = [...history, currentRound]; + // ── Build full score trajectory ── for (const round of allRounds) { const ss = round.screenshotEval; if (!ss) continue; + const gen = assignGeneration(round.roundNumber, baselineRound); comparison.scoreTrajectory.push({ round: round.roundNumber, + generation: gen, score: ss.overallScore || 0, p1Count: extractAllP1Details(ss).length, p2Count: Object.values(ss.surfaceScores || {}).reduce((sum, s) => sum + (s.p2Issues?.length || 0), 0), scenarioPassRate: ss.scenarioResults - ? Object.values(ss.scenarioResults).filter((r) => r.pass).length / Object.values(ss.scenarioResults).length + ? +(Object.values(ss.scenarioResults).filter(r => r.pass).length / Math.max(Object.values(ss.scenarioResults).length, 1)).toFixed(2) : null, label: round.label || "", }); - // Video trajectory + // Video trajectory — now includes dimensions if (round.videoEval?.overallInteractionScore) { comparison.videoScoreTrajectory.push({ round: round.roundNumber, + generation: gen, score: round.videoEval.overallInteractionScore, + dimensions: round.videoEval.dimensions || {}, }); } } - // P1 tracking: compare current vs previous round + // ── Milestone diffs ── + const milestones = selectMilestoneRounds(history, currentRound); + console.log(` Milestone comparisons: ${milestones.map(m => `${m.tag}(R${m.round.roundNumber})`).join(", ") || "none (first round)"}`); + for (const m of milestones) { + comparison.milestoneDiffs.push(computeMilestoneDiff(currentRound, m.round, m.tag)); + } + + // ── Generation summary: aggregate stats per generation ── + const genMap = new Map(); + for (const round of allRounds) { + const gen = assignGeneration(round.roundNumber, baselineRound); + if (!genMap.has(gen)) genMap.set(gen, []); + genMap.get(gen).push(round); + } + for (const [gen, rounds] of [...genMap.entries()].sort((a, b) => a[0] - b[0])) { + const scores = rounds.map(r => r.screenshotEval?.overallScore).filter(v => v != null); + const p1Counts = rounds.map(r => extractAllP1Details(r.screenshotEval).length); + const videoScores = rounds.map(r => r.videoEval?.overallInteractionScore).filter(v => v != null); + const scenarioPassRates = rounds.map(r => { + const sr = r.screenshotEval?.scenarioResults; + return sr ? Object.values(sr).filter(s => s.pass).length / Math.max(Object.values(sr).length, 1) : null; + }).filter(v => v != null); + + comparison.generationSummary.push({ + generation: gen, + roundRange: `R${rounds[0].roundNumber}-R${rounds[rounds.length - 1].roundNumber}`, + roundCount: rounds.length, + avgScore: scores.length > 0 ? +(scores.reduce((a, b) => a + b, 0) / scores.length).toFixed(1) : null, + bestScore: scores.length > 0 ? Math.max(...scores) : null, + avgP1Count: +(p1Counts.reduce((a, b) => a + b, 0) / p1Counts.length).toFixed(1), + avgVideoScore: videoScores.length > 0 ? +(videoScores.reduce((a, b) => a + b, 0) / videoScores.length).toFixed(1) : null, + avgScenarioPassRate: scenarioPassRates.length > 0 + ? +(scenarioPassRates.reduce((a, b) => a + b, 0) / scenarioPassRates.length).toFixed(2) : null, + }); + } + + // ── P1 tracking with SEMANTIC FINGERPRINTING ── if (history.length > 0) { const prev = history[history.length - 1]; - const prevP1Ids = new Set(extractAllP1Ids(prev.screenshotEval)); - const currP1Ids = new Set(extractAllP1Ids(currentRound.screenshotEval)); const currP1Details = extractAllP1Details(currentRound.screenshotEval); const prevP1Details = extractAllP1Details(prev.screenshotEval); - // Resolved: was in prev, not in current - for (const id of prevP1Ids) { - if (!currP1Ids.has(id)) { - const detail = prevP1Details.find((p) => p.issueId === id); - comparison.p1Tracking.resolved.push({ issueId: id, issue: detail?.issue || id, resolvedInRound: currentRound.roundNumber }); - } + // Build fingerprint maps + const currFpMap = new Map(); // fingerprint -> p1 detail + for (const p of currP1Details) currFpMap.set(p1Fingerprint(p), p); + const prevFpMap = new Map(); + for (const p of prevP1Details) prevFpMap.set(p1Fingerprint(p), p); + + const currFingerprints = new Set(currFpMap.keys()); + const prevFingerprints = new Set(prevFpMap.keys()); + + // Also build all-history fingerprint map for regression detection + const allHistoryFpSets = history.slice(0, -1).map(r => { + const details = extractAllP1Details(r.screenshotEval); + return new Set(details.map(p1Fingerprint)); + }); + const allHistoryFingerprints = new Set(); + for (const fpSet of allHistoryFpSets) { + for (const fp of fpSet) allHistoryFingerprints.add(fp); } - // New: in current, not in prev - for (const id of currP1Ids) { - if (!prevP1Ids.has(id)) { - const detail = currP1Details.find((p) => p.issueId === id); - comparison.p1Tracking.new.push({ issueId: id, issue: detail?.issue || id, surface: detail?.surface }); + // Resolved: was in prev, not in current (by fingerprint) + for (const fp of prevFingerprints) { + if (!currFingerprints.has(fp)) { + const detail = prevFpMap.get(fp); + comparison.p1Tracking.resolved.push({ + fingerprint: fp, + issueId: detail?.issueId || fp, + issue: detail?.issue || fp, + surface: detail?.surface, + resolvedInRound: currentRound.roundNumber, + }); } } - // Persistent: in both - for (const id of currP1Ids) { - if (prevP1Ids.has(id)) { - const detail = currP1Details.find((p) => p.issueId === id); - comparison.p1Tracking.persistent.push({ issueId: id, issue: detail?.issue || id, surface: detail?.surface }); + // New: in current, not in prev (by fingerprint) + for (const fp of currFingerprints) { + if (!prevFingerprints.has(fp)) { + const detail = currFpMap.get(fp); + // Check if this is actually a stochastic re-flag (appeared in older rounds, disappeared, reappeared) + const isStochastic = allHistoryFingerprints.has(fp) && !prevFingerprints.has(fp); + if (isStochastic) { + comparison.p1Tracking.stochastic.push({ + fingerprint: fp, + issueId: detail?.issueId || fp, + issue: detail?.issue || fp, + surface: detail?.surface, + note: "Stochastic re-flag: appeared before, disappeared, reappeared (likely Gemini variance, not a real regression)", + }); + } else { + comparison.p1Tracking.new.push({ + fingerprint: fp, + issueId: detail?.issueId || fp, + issue: detail?.issue || fp, + surface: detail?.surface, + }); + } } } - // Regressed: check if any issue that was resolved in an earlier round reappeared - const allHistoricalP1Ids = new Set(); - for (const round of history.slice(0, -1)) { - for (const id of extractAllP1Ids(round.screenshotEval)) allHistoricalP1Ids.add(id); + // Persistent: in both (by fingerprint) + for (const fp of currFingerprints) { + if (prevFingerprints.has(fp)) { + const detail = currFpMap.get(fp); + // Count how many consecutive rounds this has persisted + let streak = 0; + for (let i = history.length - 1; i >= 0; i--) { + const rFps = new Set(extractAllP1Details(history[i].screenshotEval).map(p1Fingerprint)); + if (rFps.has(fp)) streak++; + else break; + } + comparison.p1Tracking.persistent.push({ + fingerprint: fp, + issueId: detail?.issueId || fp, + issue: detail?.issue || fp, + surface: detail?.surface, + persistedRounds: streak + 1, // +1 for current round + }); + } } - for (const id of currP1Ids) { - if (allHistoricalP1Ids.has(id) && !prevP1Ids.has(id)) { - const detail = currP1Details.find((p) => p.issueId === id); - comparison.p1Tracking.regressed.push({ issueId: id, issue: detail?.issue || id, surface: detail?.surface }); + + // Regressed: appeared in history, resolved, now back (excluding stochastic) + for (const fp of currFingerprints) { + if (allHistoryFingerprints.has(fp) && !prevFingerprints.has(fp)) { + // Already captured in stochastic above — only flag as regression if + // it was resolved for 3+ rounds (not just 1-round Gemini variance) + let gapRounds = 0; + for (let i = history.length - 1; i >= 0; i--) { + const rFps = new Set(extractAllP1Details(history[i].screenshotEval).map(p1Fingerprint)); + if (!rFps.has(fp)) gapRounds++; + else break; + } + if (gapRounds >= 3) { + const detail = currFpMap.get(fp); + comparison.p1Tracking.regressed.push({ + fingerprint: fp, + issueId: detail?.issueId || fp, + issue: detail?.issue || fp, + surface: detail?.surface, + gapRounds, + }); + } } } } - // Dimension trends: track averages over last 5 rounds - const recentRounds = allRounds.slice(-5); + // ── ALL-TIME screenshot dimension trends (every round) ── const dimNames = ["visualHierarchy", "touchTargets", "contentDensity", "typography", "firstImpression", "glassDesignSystem", "navigation", "emptyLoadingStates", "responsiveParity", "interactionQuality"]; for (const dim of dimNames) { - comparison.dimensionTrends[dim] = recentRounds.map((r) => { - if (!r.screenshotEval?.surfaceScores) return { round: r.roundNumber, avg: null }; - const scores = Object.values(r.screenshotEval.surfaceScores) - .map((s) => s.dimensions?.[dim]).filter((v) => v != null); - return { round: r.roundNumber, avg: scores.length > 0 ? (scores.reduce((a, b) => a + b, 0) / scores.length).toFixed(1) : null }; - }); + const series = allRounds.map(r => ({ + round: r.roundNumber, + generation: assignGeneration(r.roundNumber, baselineRound), + avg: avgDimension(r.screenshotEval, dim), + })); + comparison.dimensionTrends[dim] = series; + comparison.dimensionStats[dim] = computeTrend(series.map(s => s.avg)); } - // Scenario trends + // ── ALL-TIME video dimension trends ── + const vidDimNames = ["transitionSmoothness", "loadingTiming", "statePersistence", "dataLoading", + "interactionResponsiveness", "errorHandling", "navigationFlow", "animationQuality"]; + for (const dim of vidDimNames) { + const series = allRounds.map(r => ({ + round: r.roundNumber, + generation: assignGeneration(r.roundNumber, baselineRound), + value: r.videoEval?.dimensions?.[dim] ?? null, + })).filter(s => s.value != null); + comparison.videoDimensionTrends[dim] = series; + comparison.videoDimensionStats[dim] = computeTrend(series.map(s => s.value)); + } + + // ── ALL-TIME scenario trends ── for (const sId of ["S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8"]) { - comparison.scenarioTrends[sId] = recentRounds.map((r) => ({ + const series = allRounds.map(r => ({ round: r.roundNumber, + generation: assignGeneration(r.roundNumber, baselineRound), pass: r.screenshotEval?.scenarioResults?.[sId]?.pass ?? null, })); + comparison.scenarioTrends[sId] = series; + // Compute pass rate and stability + const valid = series.filter(s => s.pass != null); + const passCount = valid.filter(s => s.pass).length; + comparison.scenarioStats[sId] = { + passRate: valid.length > 0 ? +(passCount / valid.length).toFixed(2) : null, + totalPasses: passCount, + totalFails: valid.length - passCount, + lastFlip: (() => { + for (let i = valid.length - 1; i > 0; i--) { + if (valid[i].pass !== valid[i - 1].pass) return valid[i].round; + } + return null; + })(), + }; } - // Generate summary + // ── Generate summary ── const curr = currentRound.screenshotEval; const prevScore = history.length > 0 ? history[history.length - 1].screenshotEval?.overallScore : null; const scoreDelta = prevScore != null && curr?.overallScore != null ? curr.overallScore - prevScore : null; + const baselineScore = history.length > 0 ? history[0].screenshotEval?.overallScore : null; + const baselineDelta = baselineScore != null && curr?.overallScore != null ? curr.overallScore - baselineScore : null; const resolvedCount = comparison.p1Tracking.resolved.length; const newCount = comparison.p1Tracking.new.length; const persistentCount = comparison.p1Tracking.persistent.length; + const stochasticCount = comparison.p1Tracking.stochastic.length; comparison.summary = [ - `Round ${currentRound.roundNumber}: score ${curr?.overallScore || "?"}` + - (scoreDelta != null ? ` (${scoreDelta >= 0 ? "+" : ""}${scoreDelta} vs prev)` : ""), + `Round ${currentRound.roundNumber} (Gen ${comparison.currentGeneration}): score ${curr?.overallScore || "?"}`, + scoreDelta != null ? `${scoreDelta >= 0 ? "+" : ""}${scoreDelta} vs prev` : null, + baselineDelta != null ? `${baselineDelta >= 0 ? "+" : ""}${baselineDelta} vs baseline` : null, resolvedCount > 0 ? `${resolvedCount} P1s resolved` : null, - newCount > 0 ? `${newCount} new P1s introduced` : null, + newCount > 0 ? `${newCount} genuinely new P1s` : null, + stochasticCount > 0 ? `${stochasticCount} stochastic re-flags (Gemini variance)` : null, persistentCount > 0 ? `${persistentCount} P1s persistent` : null, - comparison.p1Tracking.regressed.length > 0 ? `WARNING: ${comparison.p1Tracking.regressed.length} P1s regressed` : null, + comparison.p1Tracking.regressed.length > 0 ? `WARNING: ${comparison.p1Tracking.regressed.length} P1s regressed after 3+ round absence` : null, ].filter(Boolean).join(". ") + "."; - // Next actions based on trends - if (comparison.p1Tracking.persistent.length > 0) { - comparison.nextActions.push(`Fix persistent P1s: ${comparison.p1Tracking.persistent.map((p) => p.issueId).join(", ")}`); + // ── Generation narrative ── + if (comparison.generationSummary.length >= 2) { + const gens = comparison.generationSummary; + const first = gens[0]; + const last = gens[gens.length - 1]; + const scoreDelta = last.avgScore != null && first.avgScore != null ? +(last.avgScore - first.avgScore).toFixed(1) : null; + const p1Delta = +(last.avgP1Count - first.avgP1Count).toFixed(1); + comparison.generationNarrative = [ + `${gens.length} generations completed (${first.roundRange} -> ${last.roundRange}).`, + scoreDelta != null ? `Score trajectory: Gen 1 avg ${first.avgScore} -> Gen ${gens.length} avg ${last.avgScore} (${scoreDelta >= 0 ? "+" : ""}${scoreDelta}).` : null, + `P1 trend: Gen 1 avg ${first.avgP1Count} -> Gen ${gens.length} avg ${last.avgP1Count} (${p1Delta >= 0 ? "+" : ""}${p1Delta}).`, + last.avgVideoScore != null && first.avgVideoScore != null + ? `Video: Gen 1 avg ${first.avgVideoScore} -> Gen ${gens.length} avg ${last.avgVideoScore} (${(last.avgVideoScore - first.avgVideoScore).toFixed(1)}).` + : null, + ].filter(Boolean).join(" "); + } + + // ── Next actions ── + // Persistent P1s with long streaks are highest priority + const longPersistent = comparison.p1Tracking.persistent.filter(p => p.persistedRounds >= 3); + if (longPersistent.length > 0) { + comparison.nextActions.push(`PRIORITY: Fix ${longPersistent.length} P1s persisting 3+ rounds: ${longPersistent.map(p => `${p.issueId}(${p.persistedRounds}R)`).join(", ")}`); } if (comparison.p1Tracking.regressed.length > 0) { - comparison.nextActions.push(`URGENT: Investigate regressions: ${comparison.p1Tracking.regressed.map((p) => p.issueId).join(", ")}`); + comparison.nextActions.push(`URGENT: Investigate ${comparison.p1Tracking.regressed.length} regressions after 3+ round absence`); } - // Check for plateau (3+ rounds same score) - const last3 = comparison.scoreTrajectory.slice(-3); - if (last3.length >= 3) { - const scores = last3.map((r) => r.score); + // Check dimension trends for declining areas + for (const [dim, stats] of Object.entries(comparison.dimensionStats)) { + if (stats.direction === "down" && stats.velocity < -0.3) { + comparison.nextActions.push(`DECLINING: ${dim} trending down (${stats.start} -> ${stats.end}, velocity ${stats.velocity}/round)`); + } + } + + // Score plateau detection (3+ rounds within 2 points) + const last5 = comparison.scoreTrajectory.slice(-5); + if (last5.length >= 3) { + const scores = last5.map(r => r.score); const range = Math.max(...scores) - Math.min(...scores); if (range <= 2) { - comparison.nextActions.push("Score plateau detected (last 3 rounds within 2 points). Consider structural changes or model rotation."); + comparison.nextActions.push(`Score plateau (last ${last5.length} rounds within ${range} points). Consider: model rotation, new scenario types, structural CSS overhaul.`); } } + // Scenario stability + const unstableScenarios = Object.entries(comparison.scenarioStats) + .filter(([, stats]) => stats.passRate != null && stats.passRate < 1.0 && stats.passRate > 0) + .map(([sId, stats]) => `${sId}(${Math.round(stats.passRate * 100)}%)`); + if (unstableScenarios.length > 0) { + comparison.nextActions.push(`Unstable scenarios: ${unstableScenarios.join(", ")} — investigate root cause of flakiness`); + } + return comparison; } /* ═══════════════════════════════════════════════════════════════ - PHASE 6: Report Printing + PHASE 6: Report Printing — Gen-over-Gen Upgrade ═══════════════════════════════════════════════════════════════ */ function printReport(roundData) { const { screenshotEval, videoEval, comparison } = roundData; console.log("\n" + "=".repeat(72)); - console.log(" INTERACTIVE GEMINI QA — ROUND " + roundData.roundNumber); + console.log(` INTERACTIVE GEMINI QA — ROUND ${roundData.roundNumber} (Gen ${comparison?.currentGeneration || "?"})`); console.log("=".repeat(72)); - // Cross-round comparison header + // ── Cross-round summary ── if (comparison) { console.log(`\n${comparison.summary}`); - console.log(`\nScore trajectory: ${comparison.scoreTrajectory.map((r) => `R${r.round}:${r.score}`).join(" -> ")}`); - console.log(`P1 trend: ${comparison.scoreTrajectory.map((r) => `R${r.round}:${r.p1Count}p1`).join(" -> ")}`); + // Generation narrative + if (comparison.generationNarrative) { + console.log(`\nGeneration narrative: ${comparison.generationNarrative}`); + } + + // Compact score trajectory (grouped by generation) + const grouped = new Map(); + for (const r of comparison.scoreTrajectory) { + const key = `Gen${r.generation}`; + if (!grouped.has(key)) grouped.set(key, []); + grouped.get(key).push(`R${r.round}:${r.score}`); + } + console.log("\nScore trajectory by generation:"); + for (const [gen, rounds] of grouped) { + console.log(` ${gen.padEnd(6)} ${rounds.join(" -> ")}`); + } + + // ── Milestone diffs (the gen-over-gen comparisons) ── + if (comparison.milestoneDiffs?.length > 0) { + console.log("\nMilestone comparisons (current vs historical reference):"); + console.log(" " + "Tag".padEnd(18) + "Ref".padEnd(6) + "Score".padEnd(10) + "P1s".padEnd(10) + "Scenarios".padEnd(12) + "Video"); + console.log(" " + "-".repeat(66)); + for (const m of comparison.milestoneDiffs) { + const scoreStr = `${m.scoreDiff.delta >= 0 ? "+" : ""}${m.scoreDiff.delta}`; + const p1Str = `${m.p1Diff.delta >= 0 ? "+" : ""}${m.p1Diff.delta}`; + const scenStr = `${m.scenarioDiff.delta >= 0 ? "+" : ""}${m.scenarioDiff.delta}`; + const vidStr = m.videoDiff ? `${m.videoDiff.delta >= 0 ? "+" : ""}${m.videoDiff.delta}` : "n/a"; + console.log(` ${m.tag.padEnd(18)}R${String(m.referenceRound).padEnd(5)}${scoreStr.padEnd(10)}${p1Str.padEnd(10)}${scenStr.padEnd(12)}${vidStr}`); + } + } + + // ── Generation summary table ── + if (comparison.generationSummary?.length > 1) { + console.log("\nGeneration summary:"); + console.log(" " + "Gen".padEnd(6) + "Rounds".padEnd(12) + "AvgScore".padEnd(10) + "BestScore".padEnd(10) + "AvgP1s".padEnd(8) + "AvgVideo".padEnd(10) + "Scenarios"); + console.log(" " + "-".repeat(72)); + for (const g of comparison.generationSummary) { + console.log(` ${String(g.generation).padEnd(6)}${g.roundRange.padEnd(12)}${String(g.avgScore ?? "?").padEnd(10)}${String(g.bestScore ?? "?").padEnd(10)}${String(g.avgP1Count).padEnd(8)}${String(g.avgVideoScore ?? "?").padEnd(10)}${g.avgScenarioPassRate != null ? Math.round(g.avgScenarioPassRate * 100) + "%" : "?"}`); + } + } + + // ── P1 tracking (semantic fingerprinted) ── if (comparison.p1Tracking.resolved.length > 0) { console.log(`\nRESOLVED P1s (${comparison.p1Tracking.resolved.length}):`); comparison.p1Tracking.resolved.forEach((p) => console.log(` [RESOLVED] ${p.issueId}: ${p.issue?.slice(0, 80)}`)); } if (comparison.p1Tracking.new.length > 0) { - console.log(`\nNEW P1s (${comparison.p1Tracking.new.length}):`); + console.log(`\nGENUINELY NEW P1s (${comparison.p1Tracking.new.length}):`); comparison.p1Tracking.new.forEach((p) => console.log(` [NEW] ${p.issueId}: ${p.issue?.slice(0, 80)}`)); } + if (comparison.p1Tracking.stochastic.length > 0) { + console.log(`\nSTOCHASTIC RE-FLAGS (${comparison.p1Tracking.stochastic.length}) — Gemini variance, not real regressions:`); + comparison.p1Tracking.stochastic.forEach((p) => console.log(` [STOCHASTIC] ${p.issueId}: ${p.issue?.slice(0, 80)}`)); + } if (comparison.p1Tracking.persistent.length > 0) { console.log(`\nPERSISTENT P1s (${comparison.p1Tracking.persistent.length}):`); - comparison.p1Tracking.persistent.forEach((p) => console.log(` [PERSISTENT] ${p.issueId}: ${p.issue?.slice(0, 80)}`)); + comparison.p1Tracking.persistent.forEach((p) => + console.log(` [PERSISTENT ${p.persistedRounds}R] ${p.issueId}: ${p.issue?.slice(0, 80)}`)); } if (comparison.p1Tracking.regressed.length > 0) { - console.log(`\n*** REGRESSIONS (${comparison.p1Tracking.regressed.length}):`); - comparison.p1Tracking.regressed.forEach((p) => console.log(` [REGRESSED] ${p.issueId}: ${p.issue?.slice(0, 80)}`)); + console.log(`\n*** TRUE REGRESSIONS (${comparison.p1Tracking.regressed.length}) — absent ${">"}= 3 rounds then reappeared:`); + comparison.p1Tracking.regressed.forEach((p) => + console.log(` [REGRESSED gap=${p.gapRounds}R] ${p.issueId}: ${p.issue?.slice(0, 80)}`)); + } + + // ── Dimension trend summary (all-time, compact) ── + const trendingDims = Object.entries(comparison.dimensionStats || {}) + .filter(([, s]) => s.direction !== "flat") + .sort((a, b) => Math.abs(b[1].velocity) - Math.abs(a[1].velocity)); + if (trendingDims.length > 0) { + console.log("\nDimension trends (all-time):"); + for (const [dim, stats] of trendingDims) { + const arrow = stats.direction === "up" ? "^" : "v"; + console.log(` ${arrow} ${dim.padEnd(24)} ${stats.start} -> ${stats.end} (${stats.velocity >= 0 ? "+" : ""}${stats.velocity}/round)`); + } + } + + // ── Video dimension trends ── + const vidTrends = Object.entries(comparison.videoDimensionStats || {}) + .filter(([, s]) => s.direction !== "flat" && s.start != null) + .sort((a, b) => Math.abs(b[1].velocity) - Math.abs(a[1].velocity)); + if (vidTrends.length > 0) { + console.log("\nVideo dimension trends (all-time):"); + for (const [dim, stats] of vidTrends) { + const arrow = stats.direction === "up" ? "^" : "v"; + console.log(` ${arrow} ${dim.padEnd(30)} ${stats.start} -> ${stats.end} (${stats.velocity >= 0 ? "+" : ""}${stats.velocity}/round)`); + } + } + + // ── Scenario stability ── + const scenarioEntries = Object.entries(comparison.scenarioStats || {}).filter(([, s]) => s.passRate != null); + if (scenarioEntries.length > 0) { + console.log("\nScenario stability (all-time):"); + for (const [sId, stats] of scenarioEntries) { + const rate = Math.round(stats.passRate * 100); + const badge = rate === 100 ? "STABLE" : rate >= 75 ? "FLAKY" : rate >= 50 ? "UNSTABLE" : "FAILING"; + const lastFlip = stats.lastFlip ? ` last-flip=R${stats.lastFlip}` : ""; + console.log(` ${sId} ${String(rate).padStart(3)}% ${badge.padEnd(8)} (${stats.totalPasses}p/${stats.totalFails}f)${lastFlip}`); + } } } - // Screenshot eval summary + // ── Screenshot eval summary ── if (screenshotEval) { console.log(`\nScreenshot Score: ${screenshotEval.overallScore}/100`); if (screenshotEval.surfaceScores) { @@ -760,13 +1202,16 @@ function printReport(roundData) { } } - // Video eval summary + // ── Video eval summary ── if (videoEval) { console.log(`\nVideo Interaction Score: ${videoEval.overallInteractionScore}/100`); if (videoEval.dimensions) { for (const [dim, score] of Object.entries(videoEval.dimensions)) { const bar = score >= 8 ? "OK" : score >= 6 ? "WARN" : "FAIL"; - console.log(` ${dim.padEnd(30)} ${score}/10 ${bar}`); + // Show trend from comparison if available + const trend = comparison?.videoDimensionStats?.[dim]; + const trendStr = trend && trend.direction !== "flat" ? ` (${trend.direction} ${trend.velocity >= 0 ? "+" : ""}${trend.velocity}/R)` : ""; + console.log(` ${dim.padEnd(30)} ${score}/10 ${bar}${trendStr}`); } } if (videoEval.temporalIssues?.length) { @@ -779,10 +1224,10 @@ function printReport(roundData) { } } - // Next actions + // ── Next actions ── if (comparison?.nextActions?.length) { console.log("\nNext actions:"); - comparison.nextActions.forEach((a) => console.log(` -> ${a}`)); + comparison.nextActions.forEach((a, i) => console.log(` ${i + 1}. ${a}`)); } console.log("\n" + "=".repeat(72)); diff --git a/src/features/redesign/primitives.css b/src/features/redesign/primitives.css index 05b415a5..bed4f3e3 100644 --- a/src/features/redesign/primitives.css +++ b/src/features/redesign/primitives.css @@ -546,6 +546,24 @@ } } +/* ── R34 fix: stagger fade-in for list items (inbox, reports) ── */ +@keyframes rd-stagger-fade-in { + from { opacity: 0; transform: translateY(6px); } + to { opacity: 1; transform: translateY(0); } +} +@media (prefers-reduced-motion: reduce) { + [data-redesign] [style*="rd-stagger-fade-in"] { + animation: none !important; + opacity: 1 !important; + } +} + +/* ── R34 fix: smoother surface content transitions ── */ +[data-redesign] .rd-inbox-list, +[data-redesign] .rd-v3-card, +[data-redesign] .rd-inbox-row { + transition: opacity 0.18s ease-out, transform 0.18s ease-out; +} [data-redesign] .rd-pane--right { border-right: none; border-left: 1px solid var(--rd-line-soft); diff --git a/src/features/redesign/surfaces/InboxSurface.tsx b/src/features/redesign/surfaces/InboxSurface.tsx index 157ad052..0bb66d8f 100644 --- a/src/features/redesign/surfaces/InboxSurface.tsx +++ b/src/features/redesign/surfaces/InboxSurface.tsx @@ -444,7 +444,7 @@ export function InboxSurface() {

No items in this lane.

) : ( - items.map((it) => ( + items.map((it, idx) => ( setActiveId(it.id)} onToggleCheck={() => toggleChecked(it.id)} + style={{ animation: "rd-stagger-fade-in 0.22s ease-out both", animationDelay: `${Math.min(idx * 40, 400)}ms` }} /> )) )} @@ -481,7 +482,7 @@ export function InboxSurface() { ); } -function InboxRow({ item, isActive, isChecked, onActivate, onToggleCheck }: { item: InboxItem; isActive: boolean; isChecked?: boolean; onActivate: () => void; onToggleCheck?: () => void }) { +function InboxRow({ item, isActive, isChecked, onActivate, onToggleCheck, style }: { item: InboxItem; isActive: boolean; isChecked?: boolean; onActivate: () => void; onToggleCheck?: () => void; style?: React.CSSProperties }) { const tone = item.whyTone ?? "amber"; const ref = useRef(null); useEffect(() => { @@ -495,6 +496,7 @@ function InboxRow({ item, isActive, isChecked, onActivate, onToggleCheck }: { it aria-selected={isActive} data-checked={isChecked || undefined} onClick={onActivate} + style={style} > {onToggleCheck && (