Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 182 additions & 0 deletions src/__tests__/qualityScore.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
import { describe, expect, it } from "vitest";
import {
computeQualityScore,
scoreToGrade,
gradeColor,
formatScoreTooltip,
} from "../lib/qualityScore.js";
import { theme } from "../lib/theme.js";

describe("qualityScore", function () {
describe("computeQualityScore", function () {
it("returns a score between 0 and 1", function () {
var result = computeQualityScore(
{ totalEvents: 100, totalTurns: 10, totalToolCalls: 50, errorCount: 2, uniqueToolCount: 8 },
{ efficiency: 0.7 },
);
expect(result.score).toBeGreaterThanOrEqual(0);
expect(result.score).toBeLessThanOrEqual(1);
});

it("gives A grade for a perfect session", function () {
var result = computeQualityScore(
{ totalEvents: 200, totalTurns: 10, totalToolCalls: 80, errorCount: 0, uniqueToolCount: 12 },
{ efficiency: 0.95 },
);
expect(result.grade).toBe("A");
expect(result.score).toBeGreaterThanOrEqual(0.9);
});

it("gives D or F for high error rate", function () {
var result = computeQualityScore(
{ totalEvents: 10, totalTurns: 2, totalToolCalls: 5, errorCount: 5, uniqueToolCount: 2 },
{ efficiency: 0.3 },
);
expect(["D", "F"]).toContain(result.grade);
});

it("handles missing stats gracefully", function () {
var result = computeQualityScore(null, null);
expect(result).toBeDefined();
expect(result.score).toBeGreaterThanOrEqual(0);
expect(result.score).toBeLessThanOrEqual(1);
expect(result.grade).toBeDefined();
expect(result.components).toBeDefined();
});

it("handles undefined inputs without crashing", function () {
expect(function () { computeQualityScore(undefined, undefined); }).not.toThrow();
expect(function () { computeQualityScore({}, {}); }).not.toThrow();
expect(function () { computeQualityScore({}, undefined); }).not.toThrow();
expect(function () { computeQualityScore(undefined, {}); }).not.toThrow();
});

it("handles empty stats object", function () {
var result = computeQualityScore({}, {});
expect(result.score).toBeGreaterThanOrEqual(0);
expect(result.score).toBeLessThanOrEqual(1);
});

it("returns all five component scores", function () {
var result = computeQualityScore(
{ totalEvents: 50, totalTurns: 5, totalToolCalls: 20, errorCount: 1, uniqueToolCount: 5 },
{ efficiency: 0.6 },
);
expect(result.components.errorRate).toBeDefined();
expect(result.components.autonomy).toBeDefined();
expect(result.components.toolDiversity).toBeDefined();
expect(result.components.completion).toBeDefined();
expect(result.components.efficiency).toBeDefined();
});
});

describe("scoreToGrade", function () {
it("returns A for score >= 0.9", function () {
expect(scoreToGrade(0.9)).toBe("A");
expect(scoreToGrade(1.0)).toBe("A");
expect(scoreToGrade(0.95)).toBe("A");
});

it("returns B for score >= 0.8", function () {
expect(scoreToGrade(0.8)).toBe("B");
expect(scoreToGrade(0.89)).toBe("B");
});

it("returns C for score >= 0.65", function () {
expect(scoreToGrade(0.65)).toBe("C");
expect(scoreToGrade(0.79)).toBe("C");
});

it("returns D for score >= 0.5", function () {
expect(scoreToGrade(0.5)).toBe("D");
expect(scoreToGrade(0.64)).toBe("D");
});

it("returns F for score < 0.5", function () {
expect(scoreToGrade(0.49)).toBe("F");
expect(scoreToGrade(0.0)).toBe("F");
});
});

describe("gradeColor", function () {
it("returns success color for A and B", function () {
expect(gradeColor("A")).toBe(theme.semantic.success);
expect(gradeColor("B")).toBe(theme.semantic.success);
});

it("returns warning color for C", function () {
expect(gradeColor("C")).toBe(theme.semantic.warning);
});

it("returns error color for D and F", function () {
expect(gradeColor("D")).toBe(theme.semantic.error);
expect(gradeColor("F")).toBe(theme.semantic.error);
});
});

describe("formatScoreTooltip", function () {
it("formats tooltip with grade and percentages", function () {
var result = computeQualityScore(
{ totalEvents: 100, totalTurns: 5, totalToolCalls: 30, errorCount: 0, uniqueToolCount: 8 },
{ efficiency: 0.8 },
);
var tooltip = formatScoreTooltip(result);
expect(tooltip).toContain("Quality:");
expect(tooltip).toContain(result.grade);
expect(tooltip).toContain("Errors:");
expect(tooltip).toContain("Autonomy:");
expect(tooltip).toContain("Tool diversity:");
expect(tooltip).toContain("Completion:");
expect(tooltip).toContain("Efficiency:");
});

it("returns empty string for null input", function () {
expect(formatScoreTooltip(null)).toBe("");
expect(formatScoreTooltip(undefined)).toBe("");
});
});

describe("tool diversity scoring edge cases", function () {
it("returns 0.5 for zero tool calls", function () {
var result = computeQualityScore(
{ totalEvents: 10, totalTurns: 2, totalToolCalls: 0, errorCount: 0 },
{ efficiency: 0.8 },
);
expect(result.components.toolDiversity).toBe(0.5);
});

it("handles sessions with only 1 unique tool", function () {
var result = computeQualityScore(
{ totalEvents: 100, totalTurns: 5, totalToolCalls: 100, errorCount: 0, uniqueToolCount: 1 },
{ efficiency: 0.8 },
);
// ratio = 1/50 = 0.02 which is below 0.05, so score is 0.3
expect(result.components.toolDiversity).toBe(0.3);
});

it("handles sessions with many unique tools", function () {
var result = computeQualityScore(
{ totalEvents: 10, totalTurns: 2, totalToolCalls: 5, errorCount: 0, uniqueToolCount: 5 },
{ efficiency: 0.8 },
);
// ratio = 5/5 = 1.0 which is > 0.5, so score is 0.7
expect(result.components.toolDiversity).toBe(0.7);
});
});

describe("score clamping", function () {
it("never produces component scores outside 0-1", function () {
var extremes = [
{ totalEvents: 1, totalTurns: 0, totalToolCalls: 0, errorCount: 100 },
{ totalEvents: 1000, totalTurns: 100, totalToolCalls: 5000, errorCount: 0, uniqueToolCount: 50 },
];
extremes.forEach(function (stats) {
var result = computeQualityScore(stats, { efficiency: 1.5 });
Object.keys(result.components).forEach(function (key) {
expect(result.components[key]).toBeGreaterThanOrEqual(0);
expect(result.components[key]).toBeLessThanOrEqual(1);
});
});
});
});
});
7 changes: 7 additions & 0 deletions src/components/DashboardView.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ import {
sortDiscoveredLandingEntries,
sortLandingEntries,
} from "../lib/landingSessions.js";
import { computeQualityScore } from "../lib/qualityScore.js";
import QualityBadge from "./QualityBadge.jsx";
import Icon from "./Icon.jsx";
import usePersistentState from "../hooks/usePersistentState.js";
import ToolbarButton from "./ui/ToolbarButton.jsx";
Expand Down Expand Up @@ -105,6 +107,7 @@ function SessionCard({ entry, onClick }) {
var summary = isDiscovered ? null : getCardSummary(entry, title);
var meta = buildCardMeta(entry, title);
var updatedLabel = formatRelativeTime(entry.updatedAt || entry.importedAt);
var quality = isDiscovered ? null : computeQualityScore(entry.stats, entry.autonomyMetrics);
var chips = [
entry.reviewScore != null ? { label: "Needs review", value: entry.reviewScore.toFixed(1) } : null,
autonomy.autonomyEfficiency != null ? { label: "Autonomy", value: formatAutonomyEfficiency(autonomy.autonomyEfficiency) } : null,
Expand Down Expand Up @@ -164,8 +167,12 @@ function SessionCard({ entry, onClick }) {
overflow: "hidden",
textOverflow: "ellipsis",
whiteSpace: "nowrap",
display: "flex",
alignItems: "center",
gap: 6,
}}>
{title}
{quality && <QualityBadge grade={quality.grade} score={quality.score} />}
</span>
<span style={{ fontSize: theme.fontSize.xs, color: theme.text.ghost, flexShrink: 0, marginTop: 1 }}>
{updatedLabel}
Expand Down
15 changes: 14 additions & 1 deletion src/components/InboxView.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ import {
import Icon from "./Icon.jsx";
import ToolbarButton from "./ui/ToolbarButton.jsx";
import ToolbarSelect from "./ui/ToolbarSelect.jsx";
import { computeQualityScore, formatScoreTooltip } from "../lib/qualityScore.js";
import QualityBadge from "./QualityBadge.jsx";
import usePersistentState from "../hooks/usePersistentState.js";

var SORT_OPTIONS = [
Expand All @@ -27,6 +29,7 @@ var SORT_OPTIONS = [
{ id: "most-expensive", label: LANDING_SORT_LABELS["most-expensive"] },
{ id: "highest-babysitting", label: "Most human response time" },
{ id: "highest-idle", label: "Highest idle" },
{ id: "highest-quality", label: "Highest quality" },
{ id: "most-recent", label: LANDING_SORT_LABELS["most-recent"] },
];

Expand All @@ -43,6 +46,14 @@ function sortEntries(entries, sortMode) {
});
}

if (sortMode === "highest-quality") {
return (entries || []).slice().sort(function (left, right) {
var leftScore = computeQualityScore(left.stats, left.autonomyMetrics).score;
var rightScore = computeQualityScore(right.stats, right.autonomyMetrics).score;
return rightScore - leftScore;
});
}

return sortLandingEntries(entries, sortMode);
}

Expand Down Expand Up @@ -474,6 +485,7 @@ export default function InboxView({ entries, onOpenSession, onImport, onLoadSamp
var canOpen = Boolean(entry.hasContent || entry.discoveredPath);
var title = getLandingEntryDisplayTitle(entry);
var secondaryText = getLandingEntrySecondaryText(entry, title);
var quality = computeQualityScore(entry.stats, entry.autonomyMetrics);

return (
<div
Expand All @@ -489,9 +501,10 @@ export default function InboxView({ entries, onOpenSession, onImport, onLoadSamp
<div style={{ minWidth: 0 }}>
<div
title={buildEntryTooltip(entry)}
style={{ fontSize: theme.fontSize.base, color: theme.text.primary, fontFamily: theme.font.mono }}
style={{ display: "flex", alignItems: "center", gap: 6, fontSize: theme.fontSize.base, color: theme.text.primary, fontFamily: theme.font.mono }}
>
{title}
<QualityBadge grade={quality.grade} score={quality.score} />
</div>
<div style={{ fontSize: theme.fontSize.sm, color: theme.text.muted, marginTop: 4, lineHeight: 1.5 }}>
{renderMeta(entry)}
Expand Down
32 changes: 32 additions & 0 deletions src/components/QualityBadge.jsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import { theme } from "../lib/theme.js";
import { gradeColor } from "../lib/qualityScore.js";

export default function QualityBadge({ grade, score, style }) {
if (!grade) return null;

var color = gradeColor(grade);

return (
<span
title={"Quality: " + grade + " (" + Math.round((score || 0) * 100) + "%)"}
style={Object.assign({
display: "inline-flex",
alignItems: "center",
justifyContent: "center",
width: 22,
height: 18,
borderRadius: theme.radius.sm,
fontSize: theme.fontSize.xs,
fontFamily: theme.font.mono,
fontWeight: 600,
color: color,
background: color + "18",
border: "1px solid " + color + "30",
flexShrink: 0,
lineHeight: 1,
}, style || {})}
>
{grade}
</span>
);
}
Loading
Loading