From 81db38584b5b942ada4776af39429dbb97f5bd8b Mon Sep 17 00:00:00 2001 From: notdarking Date: Sun, 12 Apr 2026 02:43:27 +0530 Subject: [PATCH 1/2] fix : changed data size in retrieval_core and graph_analytics (fixes #2) --- chuck/tasks/graph_analytics/task.py | 2 +- chuck/tasks/retrieval_core/task.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/chuck/tasks/graph_analytics/task.py b/chuck/tasks/graph_analytics/task.py index 79cb5d3..ecd24a6 100644 --- a/chuck/tasks/graph_analytics/task.py +++ b/chuck/tasks/graph_analytics/task.py @@ -44,4 +44,4 @@ def solve(graph: dict[str, list[str]], iterations: int = 16, damping: float = 0. } -TASK_SPEC = TaskSpec("graph_analytics", generate, solve, 48, 1_000) +TASK_SPEC = TaskSpec("graph_analytics", generate, solve, 48, 200_000) diff --git a/chuck/tasks/retrieval_core/task.py b/chuck/tasks/retrieval_core/task.py index 67c9a04..1b48afc 100644 --- a/chuck/tasks/retrieval_core/task.py +++ b/chuck/tasks/retrieval_core/task.py @@ -72,7 +72,7 @@ def solve(payload: dict[str, Any]) -> dict[str, Any]: generate, solve, 48, - 2_000, + 2000_000, algorithm_style="probabilistic", reliability_floor=0.88, ) From 403c704dcc65aba2837599f01d9cf4fb0ddbf597 Mon Sep 17 00:00:00 2001 From: notdarking Date: Sun, 12 Apr 2026 16:09:03 +0530 Subject: [PATCH 2/2] feat: make benchmark size configurable --- chuck/__main__.py | 8 +++++++- chuck/benchmark.py | 8 ++++---- chuck/benchmarks/compute_core/__init__.py | 4 ++-- chuck/benchmarks/data_encoding/__init__.py | 4 ++-- chuck/benchmarks/graph_analytics/__init__.py | 4 ++-- chuck/benchmarks/io_pipeline/__init__.py | 4 ++-- chuck/benchmarks/memory_index/__init__.py | 4 ++-- chuck/benchmarks/memory_tier/__init__.py | 4 ++-- chuck/benchmarks/ordering_core/__init__.py | 4 ++-- chuck/benchmarks/prime_analytics/__init__.py | 4 ++-- chuck/benchmarks/relational_fusion/__init__.py | 4 ++-- chuck/benchmarks/retrieval_core/__init__.py | 4 ++-- 12 files changed, 31 insertions(+), 25 deletions(-) diff --git a/chuck/__main__.py b/chuck/__main__.py index 8599ab9..49c7215 100644 --- a/chuck/__main__.py +++ b/chuck/__main__.py @@ -37,6 +37,12 @@ def main() -> int: ], help="Run a single capability benchmark", ) + bench_parser.add_argument( + "--size", + type=int, + default=None, + help="Override the default benchmark size" + ) subparsers.add_parser("regress", help="Run regression checks") subparsers.add_parser("generate-baselines", help="Generate regression baselines") snapshot_parser = subparsers.add_parser("snapshot", help="Create a performance/reliability snapshot") @@ -81,7 +87,7 @@ def main() -> int: command = args.command or "bench" if command == "bench": - print(format_benchmarks(run_benchmarks(task=args.task))) + print(format_benchmarks(run_benchmarks(task=args.task,size=args.size))) return 0 if command == "regress": print(format_regression(run_regression())) diff --git a/chuck/benchmark.py b/chuck/benchmark.py index 769c4d7..7b25e5d 100644 --- a/chuck/benchmark.py +++ b/chuck/benchmark.py @@ -43,12 +43,12 @@ } -def run_benchmarks(task: str | None = None) -> list[dict[str, Any]]: +def run_benchmarks(task: str | None = None, size: int | None = None) -> list[dict[str, Any]]: if task is None: - return [runner() for runner in RUNNERS] - runner = RUNNER_BY_NAME[task] - return [runner()] + return [runner(size=size) for runner in RUNNERS] + runner = RUNNER_BY_NAME[task] + return [runner(size=size)] def _compact_output(output: Any) -> str: if not isinstance(output, dict): diff --git a/chuck/benchmarks/compute_core/__init__.py b/chuck/benchmarks/compute_core/__init__.py index 280109a..dda0782 100644 --- a/chuck/benchmarks/compute_core/__init__.py +++ b/chuck/benchmarks/compute_core/__init__.py @@ -6,5 +6,5 @@ from ...tasks.compute_core import TASK_SPEC -def run() -> dict[str, Any]: - return benchmark_task(TASK_SPEC, seed=1_009) +def run(size: int | None = None) -> dict[str, Any]: + return benchmark_task(TASK_SPEC, seed=1_009,size=size) diff --git a/chuck/benchmarks/data_encoding/__init__.py b/chuck/benchmarks/data_encoding/__init__.py index 772a249..ba794d7 100644 --- a/chuck/benchmarks/data_encoding/__init__.py +++ b/chuck/benchmarks/data_encoding/__init__.py @@ -6,5 +6,5 @@ from ...tasks.data_encoding import TASK_SPEC -def run() -> dict[str, Any]: - return benchmark_task(TASK_SPEC, seed=1_004) +def run(size: int | None = None) -> dict[str, Any]: + return benchmark_task(TASK_SPEC, seed=1_004,size=size) diff --git a/chuck/benchmarks/graph_analytics/__init__.py b/chuck/benchmarks/graph_analytics/__init__.py index 569b268..4024aec 100644 --- a/chuck/benchmarks/graph_analytics/__init__.py +++ b/chuck/benchmarks/graph_analytics/__init__.py @@ -6,5 +6,5 @@ from ...tasks.graph_analytics import TASK_SPEC -def run() -> dict[str, Any]: - return benchmark_task(TASK_SPEC, seed=1_005) +def run(size: int | None = None) -> dict[str, Any]: + return benchmark_task(TASK_SPEC, seed=1_005,size=size) diff --git a/chuck/benchmarks/io_pipeline/__init__.py b/chuck/benchmarks/io_pipeline/__init__.py index fb0858d..e320465 100644 --- a/chuck/benchmarks/io_pipeline/__init__.py +++ b/chuck/benchmarks/io_pipeline/__init__.py @@ -6,5 +6,5 @@ from ...tasks.io_pipeline import TASK_SPEC -def run() -> dict[str, Any]: - return benchmark_task(TASK_SPEC, seed=1_001) +def run(size: int | None = None) -> dict[str, Any]: + return benchmark_task(TASK_SPEC, seed=1_001, size=size) diff --git a/chuck/benchmarks/memory_index/__init__.py b/chuck/benchmarks/memory_index/__init__.py index 35d6d07..40f99b6 100644 --- a/chuck/benchmarks/memory_index/__init__.py +++ b/chuck/benchmarks/memory_index/__init__.py @@ -6,5 +6,5 @@ from ...tasks.memory_index import TASK_SPEC -def run() -> dict[str, Any]: - return benchmark_task(TASK_SPEC, seed=1_008) +def run(size: int | None = None) -> dict[str, Any]: + return benchmark_task(TASK_SPEC, seed=1_008,size=size) diff --git a/chuck/benchmarks/memory_tier/__init__.py b/chuck/benchmarks/memory_tier/__init__.py index 7127add..516f6e5 100644 --- a/chuck/benchmarks/memory_tier/__init__.py +++ b/chuck/benchmarks/memory_tier/__init__.py @@ -6,5 +6,5 @@ from ...tasks.memory_tier import TASK_SPEC -def run() -> dict[str, Any]: - return benchmark_task(TASK_SPEC, seed=1_007) +def run(size: int | None = None) -> dict[str, Any]: + return benchmark_task(TASK_SPEC, seed=1_007,size=size) diff --git a/chuck/benchmarks/ordering_core/__init__.py b/chuck/benchmarks/ordering_core/__init__.py index 53032ad..9b6ce1e 100644 --- a/chuck/benchmarks/ordering_core/__init__.py +++ b/chuck/benchmarks/ordering_core/__init__.py @@ -6,5 +6,5 @@ from ...tasks.ordering_core import TASK_SPEC -def run() -> dict[str, Any]: - return benchmark_task(TASK_SPEC, seed=1_002) +def run(size: int | None = None) -> dict[str, Any]: + return benchmark_task(TASK_SPEC, seed=1_002,size=size) diff --git a/chuck/benchmarks/prime_analytics/__init__.py b/chuck/benchmarks/prime_analytics/__init__.py index a68787a..00974d6 100644 --- a/chuck/benchmarks/prime_analytics/__init__.py +++ b/chuck/benchmarks/prime_analytics/__init__.py @@ -6,5 +6,5 @@ from ...tasks.prime_analytics import TASK_SPEC -def run() -> dict[str, Any]: - return benchmark_task(TASK_SPEC, seed=1_006) +def run(size: int | None = None) -> dict[str, Any]: + return benchmark_task(TASK_SPEC, seed=1_006,size=size) diff --git a/chuck/benchmarks/relational_fusion/__init__.py b/chuck/benchmarks/relational_fusion/__init__.py index 6fae1ff..8943f66 100644 --- a/chuck/benchmarks/relational_fusion/__init__.py +++ b/chuck/benchmarks/relational_fusion/__init__.py @@ -6,5 +6,5 @@ from ...tasks.relational_fusion import TASK_SPEC -def run() -> dict[str, Any]: - return benchmark_task(TASK_SPEC, seed=1_010) +def run(size: int | None = None) -> dict[str, Any]: + return benchmark_task(TASK_SPEC, seed=1_010,size=size) diff --git a/chuck/benchmarks/retrieval_core/__init__.py b/chuck/benchmarks/retrieval_core/__init__.py index 5eefb30..d89fc9e 100644 --- a/chuck/benchmarks/retrieval_core/__init__.py +++ b/chuck/benchmarks/retrieval_core/__init__.py @@ -6,5 +6,5 @@ from ...tasks.retrieval_core import TASK_SPEC -def run() -> dict[str, Any]: - return benchmark_task(TASK_SPEC, seed=1_003) +def run(size: int | None = None) -> dict[str, Any]: + return benchmark_task(TASK_SPEC, seed=1_003,size=size)