From 81db38584b5b942ada4776af39429dbb97f5bd8b Mon Sep 17 00:00:00 2001
From: notdarking <devanshp2007@gmail.com>
Date: Sun, 12 Apr 2026 02:43:27 +0530
Subject: [PATCH 1/2] fix : changed data size in retrieval_core and
 graph_analytics (fixes #2)

---
 chuck/tasks/graph_analytics/task.py | 2 +-
 chuck/tasks/retrieval_core/task.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/chuck/tasks/graph_analytics/task.py b/chuck/tasks/graph_analytics/task.py
index 79cb5d3..ecd24a6 100644
--- a/chuck/tasks/graph_analytics/task.py
+++ b/chuck/tasks/graph_analytics/task.py
@@ -44,4 +44,4 @@ def solve(graph: dict[str, list[str]], iterations: int = 16, damping: float = 0.
     }
 
 
-TASK_SPEC = TaskSpec("graph_analytics", generate, solve, 48, 1_000)
+TASK_SPEC = TaskSpec("graph_analytics", generate, solve, 48, 200_000)
diff --git a/chuck/tasks/retrieval_core/task.py b/chuck/tasks/retrieval_core/task.py
index 67c9a04..1b48afc 100644
--- a/chuck/tasks/retrieval_core/task.py
+++ b/chuck/tasks/retrieval_core/task.py
@@ -72,7 +72,7 @@ def solve(payload: dict[str, Any]) -> dict[str, Any]:
     generate,
     solve,
     48,
-    2_000,
+    2000_000,
     algorithm_style="probabilistic",
     reliability_floor=0.88,
 )

From 403c704dcc65aba2837599f01d9cf4fb0ddbf597 Mon Sep 17 00:00:00 2001
From: notdarking <devanshp2007@gmail.com>
Date: Sun, 12 Apr 2026 16:09:03 +0530
Subject: [PATCH 2/2] feat: make benchmark size configurable

---
 chuck/__main__.py                              | 8 +++++++-
 chuck/benchmark.py                             | 8 ++++----
 chuck/benchmarks/compute_core/__init__.py      | 4 ++--
 chuck/benchmarks/data_encoding/__init__.py     | 4 ++--
 chuck/benchmarks/graph_analytics/__init__.py   | 4 ++--
 chuck/benchmarks/io_pipeline/__init__.py       | 4 ++--
 chuck/benchmarks/memory_index/__init__.py      | 4 ++--
 chuck/benchmarks/memory_tier/__init__.py       | 4 ++--
 chuck/benchmarks/ordering_core/__init__.py     | 4 ++--
 chuck/benchmarks/prime_analytics/__init__.py   | 4 ++--
 chuck/benchmarks/relational_fusion/__init__.py | 4 ++--
 chuck/benchmarks/retrieval_core/__init__.py    | 4 ++--
 12 files changed, 31 insertions(+), 25 deletions(-)

diff --git a/chuck/__main__.py b/chuck/__main__.py
index 8599ab9..49c7215 100644
--- a/chuck/__main__.py
+++ b/chuck/__main__.py
@@ -37,6 +37,12 @@ def main() -> int:
         ],
         help="Run a single capability benchmark",
     )
+    bench_parser.add_argument(
+    "--size",
+    type=int,
+    default=None,
+    help="Override the default benchmark size"
+    )
     subparsers.add_parser("regress", help="Run regression checks")
     subparsers.add_parser("generate-baselines", help="Generate regression baselines")
     snapshot_parser = subparsers.add_parser("snapshot", help="Create a performance/reliability snapshot")
@@ -81,7 +87,7 @@ def main() -> int:
     command = args.command or "bench"
 
     if command == "bench":
-        print(format_benchmarks(run_benchmarks(task=args.task)))
+        print(format_benchmarks(run_benchmarks(task=args.task,size=args.size)))
         return 0
     if command == "regress":
         print(format_regression(run_regression()))
diff --git a/chuck/benchmark.py b/chuck/benchmark.py
index 769c4d7..7b25e5d 100644
--- a/chuck/benchmark.py
+++ b/chuck/benchmark.py
@@ -43,12 +43,12 @@
 }
 
 
-def run_benchmarks(task: str | None = None) -> list[dict[str, Any]]:
+def run_benchmarks(task: str | None = None, size: int | None = None) -> list[dict[str, Any]]:
     if task is None:
-        return [runner() for runner in RUNNERS]
-    runner = RUNNER_BY_NAME[task]
-    return [runner()]
+        return [runner(size=size) for runner in RUNNERS]
 
+    runner = RUNNER_BY_NAME[task]
+    return [runner(size=size)]
 
 def _compact_output(output: Any) -> str:
     if not isinstance(output, dict):
diff --git a/chuck/benchmarks/compute_core/__init__.py b/chuck/benchmarks/compute_core/__init__.py
index 280109a..dda0782 100644
--- a/chuck/benchmarks/compute_core/__init__.py
+++ b/chuck/benchmarks/compute_core/__init__.py
@@ -6,5 +6,5 @@
 from ...tasks.compute_core import TASK_SPEC
 
 
-def run() -> dict[str, Any]:
-    return benchmark_task(TASK_SPEC, seed=1_009)
+def run(size: int | None = None) -> dict[str, Any]:
+    return benchmark_task(TASK_SPEC, seed=1_009,size=size)
diff --git a/chuck/benchmarks/data_encoding/__init__.py b/chuck/benchmarks/data_encoding/__init__.py
index 772a249..ba794d7 100644
--- a/chuck/benchmarks/data_encoding/__init__.py
+++ b/chuck/benchmarks/data_encoding/__init__.py
@@ -6,5 +6,5 @@
 from ...tasks.data_encoding import TASK_SPEC
 
 
-def run() -> dict[str, Any]:
-    return benchmark_task(TASK_SPEC, seed=1_004)
+def run(size: int | None = None) -> dict[str, Any]:
+    return benchmark_task(TASK_SPEC, seed=1_004,size=size)
diff --git a/chuck/benchmarks/graph_analytics/__init__.py b/chuck/benchmarks/graph_analytics/__init__.py
index 569b268..4024aec 100644
--- a/chuck/benchmarks/graph_analytics/__init__.py
+++ b/chuck/benchmarks/graph_analytics/__init__.py
@@ -6,5 +6,5 @@
 from ...tasks.graph_analytics import TASK_SPEC
 
 
-def run() -> dict[str, Any]:
-    return benchmark_task(TASK_SPEC, seed=1_005)
+def run(size: int | None = None) -> dict[str, Any]:
+    return benchmark_task(TASK_SPEC, seed=1_005,size=size)
diff --git a/chuck/benchmarks/io_pipeline/__init__.py b/chuck/benchmarks/io_pipeline/__init__.py
index fb0858d..e320465 100644
--- a/chuck/benchmarks/io_pipeline/__init__.py
+++ b/chuck/benchmarks/io_pipeline/__init__.py
@@ -6,5 +6,5 @@
 from ...tasks.io_pipeline import TASK_SPEC
 
 
-def run() -> dict[str, Any]:
-    return benchmark_task(TASK_SPEC, seed=1_001)
+def run(size: int | None = None) -> dict[str, Any]:
+    return benchmark_task(TASK_SPEC, seed=1_001, size=size)
diff --git a/chuck/benchmarks/memory_index/__init__.py b/chuck/benchmarks/memory_index/__init__.py
index 35d6d07..40f99b6 100644
--- a/chuck/benchmarks/memory_index/__init__.py
+++ b/chuck/benchmarks/memory_index/__init__.py
@@ -6,5 +6,5 @@
 from ...tasks.memory_index import TASK_SPEC
 
 
-def run() -> dict[str, Any]:
-    return benchmark_task(TASK_SPEC, seed=1_008)
+def run(size: int | None = None) -> dict[str, Any]:
+    return benchmark_task(TASK_SPEC, seed=1_008,size=size)
diff --git a/chuck/benchmarks/memory_tier/__init__.py b/chuck/benchmarks/memory_tier/__init__.py
index 7127add..516f6e5 100644
--- a/chuck/benchmarks/memory_tier/__init__.py
+++ b/chuck/benchmarks/memory_tier/__init__.py
@@ -6,5 +6,5 @@
 from ...tasks.memory_tier import TASK_SPEC
 
 
-def run() -> dict[str, Any]:
-    return benchmark_task(TASK_SPEC, seed=1_007)
+def run(size: int | None = None) -> dict[str, Any]:
+    return benchmark_task(TASK_SPEC, seed=1_007,size=size)
diff --git a/chuck/benchmarks/ordering_core/__init__.py b/chuck/benchmarks/ordering_core/__init__.py
index 53032ad..9b6ce1e 100644
--- a/chuck/benchmarks/ordering_core/__init__.py
+++ b/chuck/benchmarks/ordering_core/__init__.py
@@ -6,5 +6,5 @@
 from ...tasks.ordering_core import TASK_SPEC
 
 
-def run() -> dict[str, Any]:
-    return benchmark_task(TASK_SPEC, seed=1_002)
+def run(size: int | None = None) -> dict[str, Any]:
+    return benchmark_task(TASK_SPEC, seed=1_002,size=size)
diff --git a/chuck/benchmarks/prime_analytics/__init__.py b/chuck/benchmarks/prime_analytics/__init__.py
index a68787a..00974d6 100644
--- a/chuck/benchmarks/prime_analytics/__init__.py
+++ b/chuck/benchmarks/prime_analytics/__init__.py
@@ -6,5 +6,5 @@
 from ...tasks.prime_analytics import TASK_SPEC
 
 
-def run() -> dict[str, Any]:
-    return benchmark_task(TASK_SPEC, seed=1_006)
+def run(size: int | None = None) -> dict[str, Any]:
+    return benchmark_task(TASK_SPEC, seed=1_006,size=size)
diff --git a/chuck/benchmarks/relational_fusion/__init__.py b/chuck/benchmarks/relational_fusion/__init__.py
index 6fae1ff..8943f66 100644
--- a/chuck/benchmarks/relational_fusion/__init__.py
+++ b/chuck/benchmarks/relational_fusion/__init__.py
@@ -6,5 +6,5 @@
 from ...tasks.relational_fusion import TASK_SPEC
 
 
-def run() -> dict[str, Any]:
-    return benchmark_task(TASK_SPEC, seed=1_010)
+def run(size: int | None = None) -> dict[str, Any]:
+    return benchmark_task(TASK_SPEC, seed=1_010,size=size)
diff --git a/chuck/benchmarks/retrieval_core/__init__.py b/chuck/benchmarks/retrieval_core/__init__.py
index 5eefb30..d89fc9e 100644
--- a/chuck/benchmarks/retrieval_core/__init__.py
+++ b/chuck/benchmarks/retrieval_core/__init__.py
@@ -6,5 +6,5 @@
 from ...tasks.retrieval_core import TASK_SPEC
 
 
-def run() -> dict[str, Any]:
-    return benchmark_task(TASK_SPEC, seed=1_003)
+def run(size: int | None = None) -> dict[str, Any]:
+    return benchmark_task(TASK_SPEC, seed=1_003,size=size)