wimi321
diff --git a/‎README.md‎
Lines changed: 18 additions & 3 deletions b/‎README.md‎
Lines changed: 18 additions & 3 deletions
diff --git a/‎README.zh-CN.md‎
Lines changed: 18 additions & 3 deletions b/‎README.zh-CN.md‎
Lines changed: 18 additions & 3 deletions
diff --git a/‎ROADMAP.md‎
Lines changed: 1 addition & 0 deletions b/‎ROADMAP.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎ROADMAP.zh-CN.md‎
Lines changed: 1 addition & 0 deletions b/‎ROADMAP.zh-CN.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/cli/commands/report.ts‎
Lines changed: 57 additions & 0 deletions b/‎src/cli/commands/report.ts‎
Lines changed: 57 additions & 0 deletions
diff --git a/‎src/cli/index.ts‎
Lines changed: 2 additions & 0 deletions b/‎src/cli/index.ts‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/core/bundle.ts‎
Lines changed: 1 addition & 0 deletions b/‎src/core/bundle.ts‎
Lines changed: 1 addition & 0 deletions
@@ -77,19 +77,25 @@ npm run dev -- validate ./examples/hello-world-bundle
 npm run dev -- scan ./examples
 ```
 
-6. Generate starter inputs:
+6. Generate a benchmark report:
+
+```bash
+npm run dev -- report ./examples --out ./dist/benchmark-report.md
+```
+
+7. Generate starter inputs:
 
 ```bash
 npm run dev -- init --out ./starter
 ```
 
-7. Pack from the generated config:
+8. Pack from the generated config:
 
 ```bash
 npm run dev -- pack --config ./starter/taskbundle.config.json
 ```
 
-8. Archive the result:
+9. Archive the result:
 
 ```bash
 npm run dev -- archive ./starter/bundle-output --out ./starter/bundle-output.tar.gz
@@ -201,6 +207,13 @@ Scan a directory for bundle folders:
 npm run dev -- scan ./examples
 ```
 
+### `taskbundle report`
+Generate a benchmark-style ranking and optional Markdown report:
+
+```bash
+npm run dev -- report ./examples --out ./dist/benchmark-report.md
+```
+
 ## Example Bundles
 
 The repository includes two real examples:
@@ -209,6 +222,8 @@ The repository includes two real examples:
 
 They represent the same task captured from different tool/model combinations so `compare` has something meaningful to show.
 
+You can also point `taskbundle report` at the same directory to generate a small benchmark-style leaderboard.
+
 ## Bundle Format At A Glance
 
 - `bundle.json`: top-level metadata and artifact pointers
 
@@ -77,19 +77,25 @@ npm run dev -- validate ./examples/hello-world-bundle
 npm run dev -- scan ./examples
 ```
 
-6. 生成 starter 输入目录：
+6. 生成 benchmark 风格报告：
+
+```bash
+npm run dev -- report ./examples --out ./dist/benchmark-report.md
+```
+
+7. 生成 starter 输入目录：
 
 ```bash
 npm run dev -- init --out ./starter
 ```
 
-7. 直接从配置文件打包：
+8. 直接从配置文件打包：
 
 ```bash
 npm run dev -- pack --config ./starter/taskbundle.config.json
 ```
 
-8. 把 bundle 归档成 `.tar.gz`：
+9. 把 bundle 归档成 `.tar.gz`：
 
 ```bash
 npm run dev -- archive ./starter/bundle-output --out ./starter/bundle-output.tar.gz
@@ -201,6 +207,13 @@ npm run dev -- validate ./examples/hello-world-bundle
 npm run dev -- scan ./examples
 ```
 
+### `taskbundle report`
+生成 benchmark 风格的排行榜和可选 Markdown 报告：
+
+```bash
+npm run dev -- report ./examples --out ./dist/benchmark-report.md
+```
+
 ## 示例 Bundle
 
 仓库里现在有两个示例：
@@ -209,6 +222,8 @@ npm run dev -- scan ./examples
 
 它们表达的是同一个任务，但来自不同的工具 / 模型组合，所以 `compare` 命令有真实可看的结果。
 
+你也可以直接把这个目录交给 `taskbundle report`，生成一份小型 benchmark 排行榜。
+
 ## Bundle 格式一眼看懂
 
 - `bundle.json`：顶层元数据和 artifact 指针
 
@@ -14,6 +14,7 @@ Task Bundle started as a small CLI MVP. This roadmap turns it into a practical f
 - Done: `validate` and `scan` commands for replay checks and bundle collections
 - Done: artifact hashes and sizes in `bundle.json`
 - Done: benchmark-style outcome fields in bundle metadata
+- Done: benchmark report generation with ranking, leaderboard, and Markdown export
 - Done: CLI smoke tests and GitHub Actions CI
 - Done: Chinese and English documentation
 
 
@@ -14,6 +14,7 @@ Task Bundle 目前已经从一个小型 CLI MVP，走到了“可实际使用的
 - 已完成：`validate` 与 `scan` 命令，用于 replay 校验和 bundle 集合扫描
 - 已完成：artifact 哈希和大小写入 `bundle.json`
 - 已完成：bundle metadata 中的 benchmark / judge 结果字段
+- 已完成：benchmark report 生成，支持排行榜、leaderboard 和 Markdown 导出
 - 已完成：CLI smoke tests 和 GitHub Actions CI
 - 已完成：中英文文档
 
 
@@ -0,0 +1,57 @@
+import path from "node:path";
+import { Command } from "commander";
+import { generateBenchmarkReport, renderBenchmarkReportMarkdown } from "../../core/report";
+import { writeTextFile } from "../../utils/fs";
+import { printKeyValue } from "../../utils/output";
+
+export function registerReportCommand(program: Command): void {
+  program
+    .command("report")
+    .description("Generate a benchmark-style report for a directory of bundles.")
+    .option("--json", "Print machine-readable JSON instead of text")
+    .option("--out <file>", "Write a Markdown report to a file")
+    .argument("<rootDir>", "Directory that contains bundle folders")
+    .action(async (rootDir: string, options: { json?: boolean; out?: string }) => {
+      const report = await generateBenchmarkReport(path.resolve(rootDir));
+
+      if (options.out) {
+        const markdown = renderBenchmarkReportMarkdown(report);
+        await writeTextFile(path.resolve(options.out), markdown);
+      }
+
+      if (options.json) {
+        console.log(JSON.stringify(report, null, 2));
+        return;
+      }
+
+      console.log("Task Bundle Benchmark Report");
+      console.log("----------------------------");
+      printKeyValue("Root", report.rootDir);
+      printKeyValue("Bundles", String(report.bundleCount));
+      printKeyValue("Scored bundles", String(report.scoredBundleCount));
+      printKeyValue("Average score", report.averageScore !== undefined ? Number(report.averageScore.toFixed(4)).toString() : "n/a");
+      console.log("");
+      console.log("Ranking");
+      for (const entry of report.ranking) {
+        console.log(
+          `${entry.rank}. ${entry.title} | ${entry.tool ?? "unknown"} / ${entry.model ?? "unknown"} | ${
+            entry.status ?? "unknown"
+          } | score ${entry.score !== undefined ? Number(entry.score.toFixed(4)).toString() : "n/a"}`
+        );
+      }
+      console.log("");
+      console.log("Leaderboard");
+      for (const entry of report.leaderboard) {
+        console.log(
+          `- ${entry.tool ?? "unknown"} / ${entry.model ?? "unknown"} | runs ${entry.runs} | avg ${
+            entry.averageScore !== undefined ? Number(entry.averageScore.toFixed(4)).toString() : "n/a"
+          } | best ${entry.bestScore !== undefined ? Number(entry.bestScore.toFixed(4)).toString() : "n/a"}`
+        );
+      }
+
+      if (options.out) {
+        console.log("");
+        console.log(`Markdown report: ${path.resolve(options.out)}`);
+      }
+    });
+}
@@ -5,6 +5,7 @@ import { registerCompareCommand } from "./commands/compare";
 import { registerInitCommand } from "./commands/init";
 import { registerInspectCommand } from "./commands/inspect";
 import { registerPackCommand } from "./commands/pack";
+import { registerReportCommand } from "./commands/report";
 import { registerScanCommand } from "./commands/scan";
 import { registerValidateCommand } from "./commands/validate";
 
@@ -23,6 +24,7 @@ async function main(): Promise<void> {
   registerArchiveCommands(program);
   registerValidateCommand(program);
   registerScanCommand(program);
+  registerReportCommand(program);
 
   await program.parseAsync(process.argv);
 }
 
@@ -246,6 +246,7 @@ export async function inspectBundle(bundleDir: string): Promise<BundleInspection
   const artifacts = await detectArtifacts(resolvedBundleDir);
 
   return {
+    bundleDir: resolvedBundleDir,
     title: bundle.metadata.title,
     schemaVersion: bundle.metadata.schemaVersion,
     createdAt: bundle.metadata.createdAt,