Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 15 additions & 9 deletions chuck/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,22 @@ def _baseline_path(task_name: str) -> Path:

def _expected_cases() -> list[dict[str, Any]]:
cases = []
SEEDS_PER_TASK = 5

for index, task in enumerate(TASKS, start=1):
payload = task.generator(task.regression_size, index)
cases.append(
{
"task": task.name,
"seed": index,
"size": task.regression_size,
"expected": solve_with_backend(task=task, payload=payload, backend="python"),
}
)
for offset in range(SEEDS_PER_TASK):

seed = (index * 10) + offset

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please tell your reasoning behind these changes

payload = task.generator(task.regression_size, seed)
cases.append(
{
"task": task.name,
"seed": seed,
"size": task.regression_size,
"expected": solve_with_backend(task=task, payload=payload, backend="python"),
}
)
return cases


Expand Down
6 changes: 3 additions & 3 deletions data/compute_core/regression.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
{
"expected": {
"backend": "python",
"checksum": 76643,
"checksum": 85094,
"size": 16,
"trace": 4787
"trace": 5409
},
"seed": 9,
"seed": 94,
"size": 16,
"task": "compute_core"
}
Expand Down
8 changes: 4 additions & 4 deletions data/data_encoding/regression.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
{
"expected": {
"backend": "python",
"compressed_bytes": 5388,
"compressed_bytes": 5439,
"input_bytes": 8192,
"ratio": 0.6577,
"ratio": 0.6639,
"roundtrip": true,
"sha256": "9937957b8c18ae7965f7ecf6fd355064db5a8d6ea251719f602f49d2817f0631"
"sha256": "fd1c38e6389403240f49d439a8cb65c9c90dfee38e5a50f027263984e8b00cdd"
},
"seed": 4,
"seed": 44,
"size": 8192,
"task": "data_encoding"
}
Expand Down
8 changes: 4 additions & 4 deletions data/graph_analytics/regression.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
{
"expected": {
"backend": "python",
"checksum": 23.048914,
"checksum": 24.231153,
"node_count": 48,
"top_node": "n0010",
"top_score": 0.051786
"top_node": "n0028",
"top_score": 0.063059
},
"seed": 5,
"seed": 54,
"size": 48,
"task": "graph_analytics"
}
Expand Down
10 changes: 5 additions & 5 deletions data/io_pipeline/regression.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
"expected": {
"backend": "python",
"records": 64,
"top_pair": "acct_003|bucket_03",
"top_value": 3220,
"total_value": 33802,
"unique_pairs": 33
"top_pair": "acct_005|bucket_02",
"top_value": 2227,
"total_value": 33077,
"unique_pairs": 30
},
"seed": 1,
"seed": 14,
"size": 64,
"task": "io_pipeline"
}
Expand Down
2 changes: 1 addition & 1 deletion data/memory_index/regression.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"probes": 192,
"true_positives": 64
},
"seed": 8,
"seed": 84,
"size": 128,
"task": "memory_index"
}
Expand Down
24 changes: 12 additions & 12 deletions data/memory_tier/regression.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,25 @@
"expected": {
"backend": "python",
"final_keys": [
5,
25,
6,
2,
12,
7,
31,
3,
10,
13,
18,
1,
26,
9,
31,
8,
15,
11,
25
28
],
"hit_rate": 0.3672,
"hits": 47,
"misses": 81,
"hit_rate": 0.4453,
"hits": 57,
"misses": 71,
"requests": 128
},
"seed": 7,
"seed": 74,
"size": 128,
"task": "memory_tier"
}
Expand Down
10 changes: 5 additions & 5 deletions data/ordering_core/regression.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
{
"expected": {
"backend": "python",
"checksum": 795494423,
"checksum": 585457486,
"count": 128,
"max": 991026,
"median": 102583,
"min": -949966
"max": 998651,
"median": 30351,
"min": -973342
},
"seed": 2,
"seed": 24,
"size": 128,
"task": "ordering_core"
}
Expand Down
8 changes: 4 additions & 4 deletions data/prime_analytics/regression.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
"expected": {
"backend": "python",
"candidates": 128,
"checksum": 28615758,
"checksum": 30628377,
"confidence": 0.9961,
"prime_density_estimate": 0.125,
"prime_density_estimate": 0.1016,
"probabilistic": true,
"probable_primes": 16
"probable_primes": 13
},
"seed": 6,
"seed": 64,
"size": 128,
"task": "prime_analytics"
}
Expand Down
180 changes: 170 additions & 10 deletions data/regression.json
Original file line number Diff line number Diff line change
@@ -1,42 +1,202 @@
[
{
"path": "data/io_pipeline/regression.json",
"path": "data\\io_pipeline\\regression.json",
"task": "io_pipeline"
},
{
"path": "data/ordering_core/regression.json",
"path": "data\\io_pipeline\\regression.json",
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why

"task": "io_pipeline"
},
{
"path": "data\\io_pipeline\\regression.json",
"task": "io_pipeline"
},
{
"path": "data\\io_pipeline\\regression.json",
"task": "io_pipeline"
},
{
"path": "data\\io_pipeline\\regression.json",
"task": "io_pipeline"
},
Comment on lines 1 to +21
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Verify if the baseline files can be resolved with current paths
cd data
echo "=== Checking if paths resolve correctly ==="
# List actual directory structure
find . -name "regression.json" -type f

echo ""
echo "=== Testing path resolution ==="
# On POSIX, this literal path should NOT exist
if [ -e "data\\io_pipeline\\regression.json" ]; then
    echo "UNEXPECTED: Backslash path exists (likely Windows)"
else
    echo "EXPECTED: Backslash path does not exist on POSIX"
fi

Repository: iiitl/chuck

Length of output: 521


🏁 Script executed:

cat data/regression.json

Repository: iiitl/chuck

Length of output: 4537


🏁 Script executed:

find . -name "*.py" -type f | head -20

Repository: iiitl/chuck

Length of output: 753


🏁 Script executed:

grep -r "load_regression_file" --include="*.py" -B 2 -A 10

Repository: iiitl/chuck

Length of output: 2945


🏁 Script executed:

grep -r "test_regression\|_load_expected_by_task" --include="*.py" -B 2 -A 10 | head -100

Repository: iiitl/chuck

Length of output: 1490


🏁 Script executed:

grep -r "DEFAULT_REGRESSION_PATH\|test.*regression" --include="*.py" -B 2 -A 5

Repository: iiitl/chuck

Length of output: 5156


🏁 Script executed:

grep -r "def test_" --include="*.py" | grep -i regression

Repository: iiitl/chuck

Length of output: 268


🏁 Script executed:

grep -A 5 "DEFAULT_REGRESSION_PATH" chuck/regression.py

Repository: iiitl/chuck

Length of output: 819


🏁 Script executed:

cat tests/test_regression.py

Repository: iiitl/chuck

Length of output: 2334


Windows-style path separators will break on POSIX systems.

Paths like "data\\io_pipeline\\regression.json" use backslashes which are literal characters (not separators) on Linux/Mac. When load_regression_file() executes ROOT / item["path"], it will attempt to read a file literally named data\io_pipeline\regression.json instead of data/io_pipeline/regression.json, causing FileNotFoundError on POSIX.

Additionally, the manifest should contain 10 entries (one per task), not 50. The current 5 duplicate entries per task are redundant, and when _load_expected_by_task() builds a dict keyed by task name, the duplicates silently collapse to a single entry per task via key overwriting.

Regenerate this file on a POSIX system or use forward slashes (data/io_pipeline/regression.json) which work cross-platform.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@data/regression.json` around lines 1 - 21, The manifest uses Windows
backslashes and duplicated entries causing POSIX FileNotFoundError and silent
task-key overwrites; update the JSON in data/regression.json to use POSIX-safe
forward slashes (e.g., "data/io_pipeline/regression.json") and regenerate so
there are exactly 10 unique task entries (one per task) instead of repeated
duplicates so that load_regression_file() when doing ROOT / item["path"] finds
the file and _load_expected_by_task() does not lose entries by key overwrites.

Comment on lines 2 to +21
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Manifest has 50 duplicate entries; should have 10 (one per task).

The manifest contains 5 identical entries per task, but each points to the same baseline file. Per the relevant code snippet from tests/test_regression.py:48-52, the test expects exactly 10 entries:

self.assertEqual(len(entries), 10)

Additionally, _load_expected_by_task() in comparison.py builds a dict keyed by task name, so duplicates are silently discarded anyway.

After fixing the overwrite bug in generate_regression_file(), the manifest should have one entry per task (pointing to a file containing all 5 seeds).

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@data/regression.json` around lines 2 - 21, The manifest file contains 5
duplicate entries per task because generate_regression_file() is overwriting or
appending the same path repeatedly instead of creating a single entry per task
that references a file containing all 5 seeds; update generate_regression_file()
so it creates one manifest entry per task (not one per seed) by aggregating the
seeds into the task's baseline file and writing only a single { "path": "...",
"task": "<task>" } entry per task, ensuring the manifest length matches the test
expectation in tests/test_regression.py (len(entries) == 10) and that
_load_expected_by_task() in comparison.py receives unique task keys (no
duplicates to be silently dropped).

{
"path": "data\\ordering_core\\regression.json",
"task": "ordering_core"
},
{
"path": "data\\ordering_core\\regression.json",
"task": "ordering_core"
},
{
"path": "data/retrieval_core/regression.json",
"path": "data\\ordering_core\\regression.json",
"task": "ordering_core"
},
{
"path": "data\\ordering_core\\regression.json",
"task": "ordering_core"
},
{
"path": "data\\ordering_core\\regression.json",
"task": "ordering_core"
},
{
"path": "data\\retrieval_core\\regression.json",
"task": "retrieval_core"
},
{
"path": "data\\retrieval_core\\regression.json",
"task": "retrieval_core"
},
{
"path": "data\\retrieval_core\\regression.json",
"task": "retrieval_core"
},
{
"path": "data/data_encoding/regression.json",
"path": "data\\retrieval_core\\regression.json",
"task": "retrieval_core"
},
{
"path": "data\\retrieval_core\\regression.json",
"task": "retrieval_core"
},
{
"path": "data\\data_encoding\\regression.json",
"task": "data_encoding"
},
{
"path": "data\\data_encoding\\regression.json",
"task": "data_encoding"
},
{
"path": "data\\data_encoding\\regression.json",
"task": "data_encoding"
},
{
"path": "data\\data_encoding\\regression.json",
"task": "data_encoding"
},
{
"path": "data/graph_analytics/regression.json",
"path": "data\\data_encoding\\regression.json",
"task": "data_encoding"
},
{
"path": "data\\graph_analytics\\regression.json",
"task": "graph_analytics"
},
{
"path": "data\\graph_analytics\\regression.json",
"task": "graph_analytics"
},
{
"path": "data\\graph_analytics\\regression.json",
"task": "graph_analytics"
},
{
"path": "data\\graph_analytics\\regression.json",
"task": "graph_analytics"
},
{
"path": "data\\graph_analytics\\regression.json",
"task": "graph_analytics"
},
{
"path": "data/prime_analytics/regression.json",
"path": "data\\prime_analytics\\regression.json",
"task": "prime_analytics"
},
{
"path": "data/memory_tier/regression.json",
"path": "data\\prime_analytics\\regression.json",
"task": "prime_analytics"
},
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you realise why each entry is occuring 5 time?

{
"path": "data\\prime_analytics\\regression.json",
"task": "prime_analytics"
},
{
"path": "data\\prime_analytics\\regression.json",
"task": "prime_analytics"
},
{
"path": "data\\prime_analytics\\regression.json",
"task": "prime_analytics"
},
{
"path": "data\\memory_tier\\regression.json",
"task": "memory_tier"
},
{
"path": "data\\memory_tier\\regression.json",
"task": "memory_tier"
},
{
"path": "data/memory_index/regression.json",
"path": "data\\memory_tier\\regression.json",
"task": "memory_tier"
},
{
"path": "data\\memory_tier\\regression.json",
"task": "memory_tier"
},
{
"path": "data\\memory_tier\\regression.json",
"task": "memory_tier"
},
{
"path": "data\\memory_index\\regression.json",
"task": "memory_index"
},
{
"path": "data\\memory_index\\regression.json",
"task": "memory_index"
},
{
"path": "data\\memory_index\\regression.json",
"task": "memory_index"
},
{
"path": "data/compute_core/regression.json",
"path": "data\\memory_index\\regression.json",
"task": "memory_index"
},
{
"path": "data\\memory_index\\regression.json",
"task": "memory_index"
},
{
"path": "data\\compute_core\\regression.json",
"task": "compute_core"
},
{
"path": "data\\compute_core\\regression.json",
"task": "compute_core"
},
{
"path": "data\\compute_core\\regression.json",
"task": "compute_core"
},
{
"path": "data\\compute_core\\regression.json",
"task": "compute_core"
},
{
"path": "data/relational_fusion/regression.json",
"path": "data\\compute_core\\regression.json",
"task": "compute_core"
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

chuck currently supports linux, mac and WSL. These windows style path style must be reverted

},
{
"path": "data\\relational_fusion\\regression.json",
"task": "relational_fusion"
},
{
"path": "data\\relational_fusion\\regression.json",
"task": "relational_fusion"
},
{
"path": "data\\relational_fusion\\regression.json",
"task": "relational_fusion"
},
{
"path": "data\\relational_fusion\\regression.json",
"task": "relational_fusion"
},
{
"path": "data\\relational_fusion\\regression.json",
"task": "relational_fusion"
}
]
Loading
Loading