From b21e0e62dfb6f228a792e9a23dd5c20c3662cd85 Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Wed, 8 Apr 2026 12:01:00 -0400 Subject: [PATCH 1/9] ci(checks): extract setup into composite action and bump nox shards to 4 Extract the shared checkout, mise setup, and dependency install steps from the smoke and nox jobs into a reusable composite action at .github/actions/setup-python-env. Increase nox test shards from 2 to 4 for faster parallel execution. --- .github/actions/setup-python-env/action.yml | 22 ++++++++++++++++ .github/workflows/checks.yaml | 28 ++++++--------------- 2 files changed, 30 insertions(+), 20 deletions(-) create mode 100644 .github/actions/setup-python-env/action.yml diff --git a/.github/actions/setup-python-env/action.yml b/.github/actions/setup-python-env/action.yml new file mode 100644 index 00000000..81b07596 --- /dev/null +++ b/.github/actions/setup-python-env/action.yml @@ -0,0 +1,22 @@ +name: "Setup Python environment" +description: "Checkout, configure mise, and install dev dependencies for a given Python version" + +inputs: + python-version: + description: "Python version to install (e.g. 3.12)" + required: true + +runs: + using: "composite" + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 + - name: Set up mise + uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1 + with: + cache: true + experimental: true + install_args: python@${{ inputs.python-version }} + - name: Install dependencies + shell: bash + run: | + mise exec python@${{ inputs.python-version }} -- make -C py install-dev diff --git a/.github/workflows/checks.yaml b/.github/workflows/checks.yaml index ca7844bf..21664d8e 100644 --- a/.github/workflows/checks.yaml +++ b/.github/workflows/checks.yaml @@ -44,16 +44,10 @@ jobs: os: [ubuntu-latest, windows-latest] steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 - - name: Set up mise - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1 + - name: Setup Python environment + uses: ./.github/actions/setup-python-env with: - cache: true - experimental: true - install_args: python@${{ matrix.python-version }} - - name: Install dependencies - run: | - mise exec python@${{ matrix.python-version }} -- make -C py install-dev + python-version: ${{ matrix.python-version }} - name: Test whether the Python SDK can be installed run: | # This is already done by make install-dev, but we're keeping this as a separate step @@ -72,23 +66,17 @@ jobs: matrix: python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] os: [ubuntu-latest, windows-latest] - shard: [0, 1] + shard: [0, 1, 2, 3] steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 - - name: Set up mise - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1 + - name: Setup Python environment + uses: ./.github/actions/setup-python-env with: - cache: true - experimental: true - install_args: python@${{ matrix.python-version }} - - name: Install dependencies - run: | - mise exec python@${{ matrix.python-version }} -- make -C py install-dev + python-version: ${{ matrix.python-version }} - name: Run nox tests (shard ${{ matrix.shard }}/2) shell: bash run: | - mise exec python@${{ matrix.python-version }} -- bash ./py/scripts/nox-matrix.sh ${{ matrix.shard }} 2 + mise exec python@${{ matrix.python-version }} -- bash ./py/scripts/nox-matrix.sh ${{ matrix.shard }} 4 adk-py: uses: ./.github/workflows/adk-py-test.yaml From 58137b03f768f8c2166276b363827076317c560f Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Wed, 8 Apr 2026 12:04:19 -0400 Subject: [PATCH 2/9] fix(ci): add checkout step before local composite action Local composite actions require the repo to be checked out first. Move actions/checkout back into each job, before the composite action reference. --- .github/actions/setup-python-env/action.yml | 1 - .github/workflows/checks.yaml | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/actions/setup-python-env/action.yml b/.github/actions/setup-python-env/action.yml index 81b07596..e876771f 100644 --- a/.github/actions/setup-python-env/action.yml +++ b/.github/actions/setup-python-env/action.yml @@ -9,7 +9,6 @@ inputs: runs: using: "composite" steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 - name: Set up mise uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1 with: diff --git a/.github/workflows/checks.yaml b/.github/workflows/checks.yaml index 21664d8e..23600d79 100644 --- a/.github/workflows/checks.yaml +++ b/.github/workflows/checks.yaml @@ -44,6 +44,7 @@ jobs: os: [ubuntu-latest, windows-latest] steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 - name: Setup Python environment uses: ./.github/actions/setup-python-env with: @@ -69,6 +70,7 @@ jobs: shard: [0, 1, 2, 3] steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 - name: Setup Python environment uses: ./.github/actions/setup-python-env with: From 5c0559834ae30c0dca6c6fb92a644cfcf8d35435 Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Wed, 8 Apr 2026 12:14:38 -0400 Subject: [PATCH 3/9] ci(nox-matrix): balance shards by session weight using greedy LPT Replace the round-robin (NR % TOTAL) shard assignment with a greedy longest-processing-time-first bin-packing algorithm. Session weights are read from py/scripts/session-weights.json (measured from CI). This brings the 4-shard split from a potential ~60s imbalance to within ~3s. --- py/scripts/nox-matrix.sh | 55 ++++++++++++++++++++++++++----- py/scripts/session-weights.json | 57 +++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 8 deletions(-) create mode 100644 py/scripts/session-weights.json diff --git a/py/scripts/nox-matrix.sh b/py/scripts/nox-matrix.sh index 337297ea..d81de1fa 100755 --- a/py/scripts/nox-matrix.sh +++ b/py/scripts/nox-matrix.sh @@ -1,14 +1,15 @@ #!/bin/bash # -# This is a very crude script to parallelize nox sessions into groups. -# It's used to run the nox tests in parallel on GitHub Actions. -# +# Distributes nox sessions across shards using greedy bin-packing (LPT) so +# that total estimated runtime per shard is balanced. Session weights come from +# a companion file (session-weights.json). Unknown sessions get a default weight. # set -euo pipefail ROOT_DIR=$(git rev-parse --show-toplevel) NOXFILE=$ROOT_DIR/py/noxfile.py +WEIGHTS_FILE=$ROOT_DIR/py/scripts/session-weights.json # Parse command line arguments if [ $# -lt 2 ]; then @@ -43,11 +44,49 @@ fi # * test_foo # * test_bar -> Optional description # We need to strip the description part after " -> " -all_sessions=$(nox -l -f $NOXFILE | grep "^\* " | cut -c 3- | sed 's/ ->.*$//' | sort) -matches=$(echo "$all_sessions" | awk "NR % $TOTAL == $INDEX") -misses=$(echo "$all_sessions" | awk "NR % $TOTAL != $INDEX") -n_matches=$(echo "$matches" | wc -l | xargs) -n_all=$(echo "$all_sessions" | wc -l | xargs) +all_sessions=$(nox -l -f "$NOXFILE" | grep "^\* " | cut -c 3- | sed 's/ ->.*$//' | sort) + +# Use Python for the greedy LPT assignment — it's already available. +matches=$(python3 -c " +import json, sys + +sessions = '''$all_sessions'''.strip().split('\n') +total_shards = $TOTAL +my_shard = $INDEX + +# Load weights; fall back to default for unknown sessions +try: + with open('$WEIGHTS_FILE') as f: + weights = json.load(f) +except FileNotFoundError: + weights = {} +default_weight = weights.get('_default', 15) + +# Sort sessions by weight descending (LPT) +sessions_weighted = [(s, weights.get(s, default_weight)) for s in sessions] +sessions_weighted.sort(key=lambda x: -x[1]) + +# Greedy assignment: always put next session into the lightest shard +shard_totals = [0] * total_shards +shard_assignments = [[] for _ in range(total_shards)] +for name, weight in sessions_weighted: + lightest = min(range(total_shards), key=lambda i: shard_totals[i]) + shard_assignments[lightest].append(name) + shard_totals[lightest] += weight + +# Print summary to stderr +for i in range(total_shards): + marker = ' <-- this shard' if i == my_shard else '' + print(f' shard {i}: {len(shard_assignments[i])} sessions, ~{shard_totals[i]}s{marker}', file=sys.stderr) + +# Print this shard's sessions to stdout +for s in sorted(shard_assignments[my_shard]): + print(s) +") + +misses=$(comm -23 <(echo "$all_sessions") <(echo "$matches")) +n_matches=$(echo "$matches" | grep -c . || true) +n_all=$(echo "$all_sessions" | grep -c . || true) printf "nox matrix idx:%d shards:%d running %d/%d sessions\n" "$INDEX" "$TOTAL" "$n_matches" "$n_all" diff --git a/py/scripts/session-weights.json b/py/scripts/session-weights.json new file mode 100644 index 00000000..5897835d --- /dev/null +++ b/py/scripts/session-weights.json @@ -0,0 +1,57 @@ +{ + "_comment": "Approximate session durations in seconds from CI (ubuntu, Python 3.13). Used by nox-matrix.sh for balanced shard assignment. Re-measure periodically and update.", + "_default": 15, + "pylint": 55, + "test_agentscope(1.0.0)": 20, + "test_agentscope(latest)": 22, + "test_agno(2.1.0)": 18, + "test_agno(2.4.0)": 19, + "test_agno(latest)": 19, + "test_anthropic(0.48.0)": 15, + "test_anthropic(0.49.0)": 15, + "test_anthropic(0.50.0)": 15, + "test_anthropic(latest)": 17, + "test_autoevals(0.0.129)": 10, + "test_autoevals(latest)": 10, + "test_braintrust_core": 10, + "test_claude_agent_sdk(0.1.10)": 14, + "test_claude_agent_sdk(latest)": 14, + "test_cli": 4, + "test_core": 10, + "test_dspy(2.6.0)": 25, + "test_dspy(latest)": 25, + "test_google_adk(1.14.1)": 39, + "test_google_adk(latest)": 24, + "test_google_genai(1.30.0)": 15, + "test_google_genai(latest)": 15, + "test_langchain(0.3.28)": 23, + "test_langchain(latest)": 22, + "test_latest_wrappers_novcr": 15, + "test_litellm(1.74.0)": 23, + "test_litellm(1.83.0)": 25, + "test_mistral(1.12.4)": 14, + "test_mistral(latest)": 14, + "test_openai(1.71)": 17, + "test_openai(1.77.0)": 18, + "test_openai(1.91)": 18, + "test_openai(1.92)": 19, + "test_openai(latest)": 19, + "test_openai_agents(0.0.19)": 17, + "test_openai_agents(latest)": 17, + "test_openai_http2_streaming": 5, + "test_openrouter(0.6.0)": 5, + "test_openrouter(latest)": 5, + "test_otel": 4, + "test_otel_not_installed": 3, + "test_pydantic_ai_integration(1.10.0)": 23, + "test_pydantic_ai_integration(latest)": 25, + "test_pydantic_ai_logfire": 8, + "test_pydantic_ai_wrap_openai(0.1.9)": 16, + "test_pydantic_ai_wrap_openai(1.0.1)": 19, + "test_pydantic_ai_wrap_openai(latest)": 20, + "test_pytest_plugin(8.4.2)": 11, + "test_pytest_plugin(latest)": 11, + "test_temporal(1.19.0)": 6, + "test_temporal(1.20.0)": 5, + "test_temporal(latest)": 5 +} From f491f0881ac719ab7c92e0d12d0f35c5f8d57844 Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Wed, 8 Apr 2026 12:16:04 -0400 Subject: [PATCH 4/9] ref(nox-matrix): rewrite shell script as Python Replace py/scripts/nox-matrix.sh with py/scripts/nox-matrix.py. The Python version is cleaner and avoids the inline Python heredoc that the shell script was already using for the LPT bin-packing logic. Update the workflow to call the new script. --- .github/workflows/checks.yaml | 2 +- py/scripts/nox-matrix.py | 133 ++++++++++++++++++++++++++++++++ py/scripts/nox-matrix.sh | 107 ------------------------- py/scripts/session-weights.json | 2 +- 4 files changed, 135 insertions(+), 109 deletions(-) create mode 100644 py/scripts/nox-matrix.py delete mode 100755 py/scripts/nox-matrix.sh diff --git a/.github/workflows/checks.yaml b/.github/workflows/checks.yaml index 23600d79..97f7e1c7 100644 --- a/.github/workflows/checks.yaml +++ b/.github/workflows/checks.yaml @@ -78,7 +78,7 @@ jobs: - name: Run nox tests (shard ${{ matrix.shard }}/2) shell: bash run: | - mise exec python@${{ matrix.python-version }} -- bash ./py/scripts/nox-matrix.sh ${{ matrix.shard }} 4 + mise exec python@${{ matrix.python-version }} -- python ./py/scripts/nox-matrix.py ${{ matrix.shard }} 4 adk-py: uses: ./.github/workflows/adk-py-test.yaml diff --git a/py/scripts/nox-matrix.py b/py/scripts/nox-matrix.py new file mode 100644 index 00000000..8f429db9 --- /dev/null +++ b/py/scripts/nox-matrix.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python3 +"""Distribute nox sessions across shards using greedy bin-packing (LPT). + +Session weights are read from session-weights.json (co-located with this +script). Unknown sessions get a default weight. The algorithm sorts sessions +by weight descending and greedily assigns each to the lightest shard. + +Usage: + python nox-matrix.py [--dry-run] +""" + +import argparse +import json +import subprocess +import sys +from pathlib import Path + + +def get_nox_sessions(noxfile: Path) -> list[str]: + """List available nox sessions by running ``nox -l``.""" + result = subprocess.run( + ["nox", "-l", "-f", str(noxfile)], + capture_output=True, + text=True, + check=True, + ) + sessions: list[str] = [] + for line in result.stdout.splitlines(): + if line.startswith("* "): + # Strip the leading "* " and any " -> description" suffix + name = line[2:].split(" -> ")[0].strip() + sessions.append(name) + return sorted(sessions) + + +def load_weights(weights_file: Path) -> tuple[dict[str, int], int]: + """Return (weights_map, default_weight) from the JSON file.""" + try: + with open(weights_file) as f: + data = json.load(f) + except FileNotFoundError: + data = {} + default = data.get("_default", 15) + return data, default + + +def assign_shards( + sessions: list[str], + total_shards: int, + weights: dict[str, int], + default_weight: int, +) -> list[list[str]]: + """Assign sessions to shards using greedy LPT bin-packing.""" + # Sort by weight descending + weighted = [(s, weights.get(s, default_weight)) for s in sessions] + weighted.sort(key=lambda x: -x[1]) + + shard_totals = [0] * total_shards + shard_assignments: list[list[str]] = [[] for _ in range(total_shards)] + + for name, weight in weighted: + lightest = min(range(total_shards), key=lambda i: shard_totals[i]) + shard_assignments[lightest].append(name) + shard_totals[lightest] += weight + + # Print summary to stderr + for i in range(total_shards): + count = len(shard_assignments[i]) + total = shard_totals[i] + print(f" shard {i}: {count} sessions, ~{total}s", file=sys.stderr) + + # Sort each shard's sessions for deterministic output + for assignments in shard_assignments: + assignments.sort() + + return shard_assignments + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("shard_index", type=int, help="Zero-based shard index") + parser.add_argument("num_shards", type=int, help="Total number of shards") + parser.add_argument("--dry-run", action="store_true", help="Print assignment without running nox") + args = parser.parse_args() + + if args.shard_index >= args.num_shards: + print( + f"Error: shard_index ({args.shard_index}) must be less than num_shards ({args.num_shards})", + file=sys.stderr, + ) + sys.exit(1) + + root_dir = Path( + subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + capture_output=True, + text=True, + check=True, + ).stdout.strip() + ) + + noxfile = root_dir / "py" / "noxfile.py" + weights_file = root_dir / "py" / "scripts" / "session-weights.json" + + all_sessions = get_nox_sessions(noxfile) + weights, default_weight = load_weights(weights_file) + shard_assignments = assign_shards(all_sessions, args.num_shards, weights, default_weight) + + my_sessions = shard_assignments[args.shard_index] + other_sessions = sorted(set(all_sessions) - set(my_sessions)) + + print( + f"nox matrix idx:{args.shard_index} shards:{args.num_shards} " + f"running {len(my_sessions)}/{len(all_sessions)} sessions" + ) + + if args.dry_run: + print("--------------------------------") + print("Would run the following sessions:") + print("\n".join(my_sessions)) + print() + print("--------------------------------") + print("Would skip the following sessions:") + print("\n".join(other_sessions)) + return + + # Run nox with the assigned sessions + cmd = ["nox", "-f", str(noxfile), "-s", *my_sessions] + sys.exit(subprocess.run(cmd).returncode) + + +if __name__ == "__main__": + main() diff --git a/py/scripts/nox-matrix.sh b/py/scripts/nox-matrix.sh deleted file mode 100755 index d81de1fa..00000000 --- a/py/scripts/nox-matrix.sh +++ /dev/null @@ -1,107 +0,0 @@ -#!/bin/bash -# -# Distributes nox sessions across shards using greedy bin-packing (LPT) so -# that total estimated runtime per shard is balanced. Session weights come from -# a companion file (session-weights.json). Unknown sessions get a default weight. -# - -set -euo pipefail - -ROOT_DIR=$(git rev-parse --show-toplevel) -NOXFILE=$ROOT_DIR/py/noxfile.py -WEIGHTS_FILE=$ROOT_DIR/py/scripts/session-weights.json - -# Parse command line arguments -if [ $# -lt 2 ]; then - echo "Usage: $0 [--dry-run]" - exit 1 -fi - -INDEX=$1 -TOTAL=$2 -DRY_RUN=false -shift 2 -while [[ $# -gt 0 ]]; do - case "$1" in - --dry-run) - DRY_RUN=true - shift - ;; - *) - echo "Unknown option: $1" - echo "Usage: $0 [--dry-run]" - exit 1 - ;; - esac -done - -if [ "$INDEX" -ge "$TOTAL" ]; then - echo "Error: shard_index ($INDEX) must be less than number_of_shards ($TOTAL)" - exit 1 -fi - -# Nox formats the sessions like: -# * test_foo -# * test_bar -> Optional description -# We need to strip the description part after " -> " -all_sessions=$(nox -l -f "$NOXFILE" | grep "^\* " | cut -c 3- | sed 's/ ->.*$//' | sort) - -# Use Python for the greedy LPT assignment — it's already available. -matches=$(python3 -c " -import json, sys - -sessions = '''$all_sessions'''.strip().split('\n') -total_shards = $TOTAL -my_shard = $INDEX - -# Load weights; fall back to default for unknown sessions -try: - with open('$WEIGHTS_FILE') as f: - weights = json.load(f) -except FileNotFoundError: - weights = {} -default_weight = weights.get('_default', 15) - -# Sort sessions by weight descending (LPT) -sessions_weighted = [(s, weights.get(s, default_weight)) for s in sessions] -sessions_weighted.sort(key=lambda x: -x[1]) - -# Greedy assignment: always put next session into the lightest shard -shard_totals = [0] * total_shards -shard_assignments = [[] for _ in range(total_shards)] -for name, weight in sessions_weighted: - lightest = min(range(total_shards), key=lambda i: shard_totals[i]) - shard_assignments[lightest].append(name) - shard_totals[lightest] += weight - -# Print summary to stderr -for i in range(total_shards): - marker = ' <-- this shard' if i == my_shard else '' - print(f' shard {i}: {len(shard_assignments[i])} sessions, ~{shard_totals[i]}s{marker}', file=sys.stderr) - -# Print this shard's sessions to stdout -for s in sorted(shard_assignments[my_shard]): - print(s) -") - -misses=$(comm -23 <(echo "$all_sessions") <(echo "$matches")) -n_matches=$(echo "$matches" | grep -c . || true) -n_all=$(echo "$all_sessions" | grep -c . || true) - -printf "nox matrix idx:%d shards:%d running %d/%d sessions\n" "$INDEX" "$TOTAL" "$n_matches" "$n_all" - -if [ "$DRY_RUN" = true ]; then - echo "--------------------------------" - echo "Would run the following sessions:" - echo "$matches" - echo "" - echo "--------------------------------" - echo "Would skip the following sessions:" - echo "$misses" - exit 0 -fi - -# Build session list and run nox once -# Quote each session name to handle parentheses in names like test_openai(latest) -session_list=$(echo "$matches" | sed 's/.*/"&"/' | tr '\n' ' ') -eval "nox -f $NOXFILE -s $session_list" diff --git a/py/scripts/session-weights.json b/py/scripts/session-weights.json index 5897835d..edf007ea 100644 --- a/py/scripts/session-weights.json +++ b/py/scripts/session-weights.json @@ -1,5 +1,5 @@ { - "_comment": "Approximate session durations in seconds from CI (ubuntu, Python 3.13). Used by nox-matrix.sh for balanced shard assignment. Re-measure periodically and update.", + "_comment": "Approximate session durations in seconds from CI (ubuntu, Python 3.13). Used by nox-matrix.py for balanced shard assignment. Re-measure periodically and update.", "_default": 15, "pylint": 55, "test_agentscope(1.0.0)": 20, From 136997b2d3a4866a993e9e2c5f531ee85841ff22 Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Wed, 8 Apr 2026 12:19:20 -0400 Subject: [PATCH 5/9] ci: add session weight drift detection Add --output-durations flag to nox-matrix.py to capture actual session runtimes from nox output. A new non-blocking check-session-weights job runs after nox, compares measured durations against session-weights.json, and warns when any session drifts beyond 50%. The job is deliberately excluded from checks-passed so it never blocks PRs. --- .github/workflows/checks.yaml | 27 ++++++- py/scripts/check-session-weights.py | 116 ++++++++++++++++++++++++++++ py/scripts/nox-matrix.py | 32 +++++++- 3 files changed, 172 insertions(+), 3 deletions(-) create mode 100644 py/scripts/check-session-weights.py diff --git a/.github/workflows/checks.yaml b/.github/workflows/checks.yaml index 97f7e1c7..17168eb2 100644 --- a/.github/workflows/checks.yaml +++ b/.github/workflows/checks.yaml @@ -75,10 +75,33 @@ jobs: uses: ./.github/actions/setup-python-env with: python-version: ${{ matrix.python-version }} - - name: Run nox tests (shard ${{ matrix.shard }}/2) + - name: Run nox tests (shard ${{ matrix.shard }}/4) shell: bash run: | - mise exec python@${{ matrix.python-version }} -- python ./py/scripts/nox-matrix.py ${{ matrix.shard }} 4 + mise exec python@${{ matrix.python-version }} -- python ./py/scripts/nox-matrix.py ${{ matrix.shard }} 4 \ + ${{ matrix.os == 'ubuntu-latest' && matrix.python-version == '3.13' && format('--output-durations measured-durations-{0}.json', matrix.shard) || '' }} + - name: Upload measured durations + if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.13' + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: session-durations-shard-${{ matrix.shard }} + path: measured-durations-${{ matrix.shard }}.json + retention-days: 5 + + check-session-weights: + needs: [nox] + if: always() && needs.nox.result == 'success' + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 + - name: Download measured durations + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 + with: + pattern: session-durations-shard-* + merge-multiple: true + - name: Check session weight drift + run: python ./py/scripts/check-session-weights.py measured-durations-*.json adk-py: uses: ./.github/workflows/adk-py-test.yaml diff --git a/py/scripts/check-session-weights.py b/py/scripts/check-session-weights.py new file mode 100644 index 00000000..6af1fb8d --- /dev/null +++ b/py/scripts/check-session-weights.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 +"""Compare measured nox session durations against session-weights.json. + +Reads one or more measured-duration JSON files (produced by +``nox-matrix.py --output-durations``), merges them, and reports sessions +whose actual duration drifted significantly from the recorded weight. + +Exit codes: + 0 — all weights are within tolerance + 1 — at least one weight drifted beyond the threshold, or a new/missing + session was detected + +Usage: + python check-session-weights.py measured-shard-0.json measured-shard-1.json ... +""" + +import argparse +import json +import sys +from pathlib import Path + + +# A session is flagged when its measured duration differs from the recorded +# weight by more than this fraction (0.5 = 50%). +DRIFT_THRESHOLD = 0.5 + +# Ignore drift for sessions shorter than this (seconds). Short sessions +# have high relative variance and aren't worth chasing. +MIN_DURATION_FOR_DRIFT = 8 + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("measured_files", nargs="+", type=Path, help="JSON files with measured durations") + parser.add_argument( + "--weights", + type=Path, + default=Path(__file__).parent / "session-weights.json", + help="Path to session-weights.json (default: co-located with this script)", + ) + args = parser.parse_args() + + # Merge all measured durations (later files overwrite earlier for the same session) + measured: dict[str, int] = {} + for path in args.measured_files: + with open(path) as f: + measured.update(json.load(f)) + + if not measured: + print("⚠️ No measured durations found — nothing to check.") + sys.exit(0) + + with open(args.weights) as f: + weights_data: dict[str, int] = json.load(f) + + default_weight = weights_data.get("_default", 15) + meta_keys = {k for k in weights_data if k.startswith("_")} + + drifted: list[str] = [] + new_sessions: list[str] = [] + + print(f"Comparing {len(measured)} measured sessions against session-weights.json\n") + print(f"{'Session':<50} {'Expected':>8} {'Actual':>8} {'Drift':>8}") + print("-" * 78) + + for session in sorted(measured): + actual = measured[session] + expected = weights_data.get(session) + + if expected is None: + new_sessions.append(session) + print(f"{session:<50} {'(new)':>8} {actual:>7}s {'':>8}") + continue + + if expected == 0: + drift_pct = 0.0 + else: + drift_pct = (actual - expected) / expected + + flag = "" + if abs(drift_pct) > DRIFT_THRESHOLD and max(actual, expected) >= MIN_DURATION_FOR_DRIFT: + flag = " ⚠️" + drifted.append(session) + + print(f"{session:<50} {expected:>7}s {actual:>7}s {drift_pct:>+7.0%}{flag}") + + # Check for sessions in weights but not measured (may have been removed) + known_sessions = {k for k in weights_data if k not in meta_keys} + missing = sorted(known_sessions - set(measured)) + + print() + + if new_sessions: + print(f"🆕 {len(new_sessions)} new session(s) not in session-weights.json:") + for s in new_sessions: + print(f" {s}: {measured[s]}s") + print() + + if missing: + print(f"❓ {len(missing)} session(s) in session-weights.json but not measured") + print(" (may be in another shard — only a concern if missing from ALL shards):") + for s in missing: + print(f" {s}") + print() + + if drifted: + print(f"⚠️ {len(drifted)} session(s) drifted beyond {DRIFT_THRESHOLD:.0%} threshold.") + print(" Consider updating py/scripts/session-weights.json") + sys.exit(1) + else: + print("✅ All session weights are within tolerance.") + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/py/scripts/nox-matrix.py b/py/scripts/nox-matrix.py index 8f429db9..2cb53a93 100644 --- a/py/scripts/nox-matrix.py +++ b/py/scripts/nox-matrix.py @@ -11,6 +11,7 @@ import argparse import json +import re import subprocess import sys from pathlib import Path @@ -81,6 +82,12 @@ def main() -> None: parser.add_argument("shard_index", type=int, help="Zero-based shard index") parser.add_argument("num_shards", type=int, help="Total number of shards") parser.add_argument("--dry-run", action="store_true", help="Print assignment without running nox") + parser.add_argument( + "--output-durations", + type=Path, + default=None, + help="Write measured session durations (seconds) to a JSON file", + ) args = parser.parse_args() if args.shard_index >= args.num_shards: @@ -126,7 +133,30 @@ def main() -> None: # Run nox with the assigned sessions cmd = ["nox", "-f", str(noxfile), "-s", *my_sessions] - sys.exit(subprocess.run(cmd).returncode) + + if args.output_durations is None: + sys.exit(subprocess.run(cmd).returncode) + + # Stream output while capturing session durations + durations: dict[str, int] = {} + duration_re = re.compile(r"nox > Session (\S+) was successful in (\d+) seconds\.") + process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1) + assert process.stdout is not None + for line in process.stdout: + sys.stdout.write(line) + sys.stdout.flush() + m = duration_re.search(line) + if m: + durations[m.group(1)] = int(m.group(2)) + process.wait() + + args.output_durations.parent.mkdir(parents=True, exist_ok=True) + with open(args.output_durations, "w") as f: + json.dump(durations, f, indent=2, sort_keys=True) + f.write("\n") + print(f"Wrote {len(durations)} session durations to {args.output_durations}", file=sys.stderr) + + sys.exit(process.returncode) if __name__ == "__main__": From 216b09c80949f06707c9d116aab3101f1e44d5b5 Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Wed, 8 Apr 2026 12:20:30 -0400 Subject: [PATCH 6/9] ci: add --update flag to check-session-weights.py Adds a simple workflow for updating stale weights: download the measured-durations artifacts from a CI run, then run: python py/scripts/check-session-weights.py --update measured-*.json This overwrites session-weights.json with the measured values, adds new sessions, and preserves sessions that were not measured in this run. --- py/scripts/check-session-weights.py | 36 +++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/py/scripts/check-session-weights.py b/py/scripts/check-session-weights.py index 6af1fb8d..6aa7839b 100644 --- a/py/scripts/check-session-weights.py +++ b/py/scripts/check-session-weights.py @@ -12,6 +12,9 @@ Usage: python check-session-weights.py measured-shard-0.json measured-shard-1.json ... + +To update weights after downloading the measured-durations artifacts from CI: + python check-session-weights.py --update measured-shard-0.json measured-shard-1.json ... """ import argparse @@ -29,6 +32,30 @@ MIN_DURATION_FOR_DRIFT = 8 +def update_weights(weights_path: Path, weights_data: dict, measured: dict[str, int]) -> None: + """Overwrite session-weights.json with measured durations.""" + meta_keys = {k for k in weights_data if k.startswith("_")} + # Start with metadata keys + updated = {k: weights_data[k] for k in sorted(meta_keys)} + # Merge: keep measured values, drop sessions that no longer exist + all_sessions = sorted(set(weights_data.keys() - meta_keys) | set(measured.keys())) + for session in all_sessions: + if session in measured: + updated[session] = measured[session] + else: + # Session wasn't measured — keep the old weight (may be platform-specific + # or skipped; a full run across all shards would cover everything) + updated[session] = weights_data[session] + with open(weights_path, "w") as f: + json.dump(updated, f, indent=2, sort_keys=True) + f.write("\n") + n_changed = sum(1 for s in measured if s not in meta_keys and weights_data.get(s) != measured[s]) + n_new = sum(1 for s in measured if s not in weights_data) + print( + f"✅ Updated {weights_path} ({n_changed} changed, {n_new} new, {len(updated) - len(meta_keys)} total sessions)" + ) + + def main() -> None: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("measured_files", nargs="+", type=Path, help="JSON files with measured durations") @@ -38,6 +65,11 @@ def main() -> None: default=Path(__file__).parent / "session-weights.json", help="Path to session-weights.json (default: co-located with this script)", ) + parser.add_argument( + "--update", + action="store_true", + help="Update session-weights.json with measured durations and exit", + ) args = parser.parse_args() # Merge all measured durations (later files overwrite earlier for the same session) @@ -53,6 +85,10 @@ def main() -> None: with open(args.weights) as f: weights_data: dict[str, int] = json.load(f) + if args.update: + update_weights(args.weights, weights_data, measured) + return + default_weight = weights_data.get("_default", 15) meta_keys = {k for k in weights_data if k.startswith("_")} From e629c55975f0bd54d36a073627e9e9938482bc3a Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Wed, 8 Apr 2026 12:21:30 -0400 Subject: [PATCH 7/9] ci: replace per-PR weight check with weekly auto-update workflow Remove the --output-durations capture and check-session-weights job from the checks workflow. Add a new update-session-weights workflow that runs weekly (Monday 06:00 UTC) or on manual dispatch. It measures all nox session durations on ubuntu/3.13, checks for drift, and opens a PR updating session-weights.json when any session drifts beyond 50%. --- .github/workflows/checks.yaml | 25 +----- .github/workflows/update-session-weights.yaml | 78 +++++++++++++++++++ 2 files changed, 79 insertions(+), 24 deletions(-) create mode 100644 .github/workflows/update-session-weights.yaml diff --git a/.github/workflows/checks.yaml b/.github/workflows/checks.yaml index 17168eb2..4d4b1812 100644 --- a/.github/workflows/checks.yaml +++ b/.github/workflows/checks.yaml @@ -78,30 +78,7 @@ jobs: - name: Run nox tests (shard ${{ matrix.shard }}/4) shell: bash run: | - mise exec python@${{ matrix.python-version }} -- python ./py/scripts/nox-matrix.py ${{ matrix.shard }} 4 \ - ${{ matrix.os == 'ubuntu-latest' && matrix.python-version == '3.13' && format('--output-durations measured-durations-{0}.json', matrix.shard) || '' }} - - name: Upload measured durations - if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.13' - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 - with: - name: session-durations-shard-${{ matrix.shard }} - path: measured-durations-${{ matrix.shard }}.json - retention-days: 5 - - check-session-weights: - needs: [nox] - if: always() && needs.nox.result == 'success' - runs-on: ubuntu-latest - timeout-minutes: 5 - steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 - - name: Download measured durations - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 - with: - pattern: session-durations-shard-* - merge-multiple: true - - name: Check session weight drift - run: python ./py/scripts/check-session-weights.py measured-durations-*.json + mise exec python@${{ matrix.python-version }} -- python ./py/scripts/nox-matrix.py ${{ matrix.shard }} 4 adk-py: uses: ./.github/workflows/adk-py-test.yaml diff --git a/.github/workflows/update-session-weights.yaml b/.github/workflows/update-session-weights.yaml new file mode 100644 index 00000000..61e2d042 --- /dev/null +++ b/.github/workflows/update-session-weights.yaml @@ -0,0 +1,78 @@ +name: update-session-weights + +on: + schedule: + # Every Monday at 06:00 UTC + - cron: "0 6 * * 1" + workflow_dispatch: {} + +permissions: + contents: write + pull-requests: write + +jobs: + measure: + runs-on: ubuntu-latest + timeout-minutes: 30 + + strategy: + fail-fast: false + matrix: + shard: [0, 1, 2, 3] + + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 + - name: Setup Python environment + uses: ./.github/actions/setup-python-env + with: + python-version: "3.13" + - name: Run nox tests (shard ${{ matrix.shard }}/4) + run: | + mise exec python@3.13 -- python ./py/scripts/nox-matrix.py ${{ matrix.shard }} 4 \ + --output-durations measured-durations-${{ matrix.shard }}.json + - name: Upload measured durations + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: session-durations-shard-${{ matrix.shard }} + path: measured-durations-${{ matrix.shard }}.json + retention-days: 5 + + update: + needs: [measure] + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 + - name: Download measured durations + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 + with: + pattern: session-durations-shard-* + merge-multiple: true + - name: Check for drift + id: check + run: | + if python ./py/scripts/check-session-weights.py measured-durations-*.json; then + echo "drifted=false" >> "$GITHUB_OUTPUT" + else + echo "drifted=true" >> "$GITHUB_OUTPUT" + fi + - name: Update weights + if: steps.check.outputs.drifted == 'true' + run: | + python ./py/scripts/check-session-weights.py --update measured-durations-*.json + - name: Create pull request + if: steps.check.outputs.drifted == 'true' + uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # v7.0.8 + with: + commit-message: "ci: update nox session weights" + branch: auto/update-session-weights + title: "ci: update nox session weights" + body: | + Automated weekly update of `py/scripts/session-weights.json`. + + Session durations were re-measured on `ubuntu-latest` with Python 3.13 + and at least one session drifted beyond the 50% threshold. + + This keeps nox shard balancing accurate. + labels: ci + delete-branch: true From 33cedaa92b2e89c2ac1aa56ff28ceada17bb94cc Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Wed, 8 Apr 2026 12:37:37 -0400 Subject: [PATCH 8/9] cleanup --- py/scripts/check-session-weights.py | 2 -- py/scripts/nox-matrix.py | 3 --- 2 files changed, 5 deletions(-) diff --git a/py/scripts/check-session-weights.py b/py/scripts/check-session-weights.py index 6aa7839b..e90df606 100644 --- a/py/scripts/check-session-weights.py +++ b/py/scripts/check-session-weights.py @@ -35,7 +35,6 @@ def update_weights(weights_path: Path, weights_data: dict, measured: dict[str, int]) -> None: """Overwrite session-weights.json with measured durations.""" meta_keys = {k for k in weights_data if k.startswith("_")} - # Start with metadata keys updated = {k: weights_data[k] for k in sorted(meta_keys)} # Merge: keep measured values, drop sessions that no longer exist all_sessions = sorted(set(weights_data.keys() - meta_keys) | set(measured.keys())) @@ -89,7 +88,6 @@ def main() -> None: update_weights(args.weights, weights_data, measured) return - default_weight = weights_data.get("_default", 15) meta_keys = {k for k in weights_data if k.startswith("_")} drifted: list[str] = [] diff --git a/py/scripts/nox-matrix.py b/py/scripts/nox-matrix.py index 2cb53a93..4c9ff0c3 100644 --- a/py/scripts/nox-matrix.py +++ b/py/scripts/nox-matrix.py @@ -52,7 +52,6 @@ def assign_shards( default_weight: int, ) -> list[list[str]]: """Assign sessions to shards using greedy LPT bin-packing.""" - # Sort by weight descending weighted = [(s, weights.get(s, default_weight)) for s in sessions] weighted.sort(key=lambda x: -x[1]) @@ -64,7 +63,6 @@ def assign_shards( shard_assignments[lightest].append(name) shard_totals[lightest] += weight - # Print summary to stderr for i in range(total_shards): count = len(shard_assignments[i]) total = shard_totals[i] @@ -131,7 +129,6 @@ def main() -> None: print("\n".join(other_sessions)) return - # Run nox with the assigned sessions cmd = ["nox", "-f", str(noxfile), "-s", *my_sessions] if args.output_durations is None: From 5a0d45ec4966d1a7eb5c77cc04cf3657158b1a76 Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Wed, 8 Apr 2026 12:41:19 -0400 Subject: [PATCH 9/9] better comment --- py/scripts/check-session-weights.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/py/scripts/check-session-weights.py b/py/scripts/check-session-weights.py index e90df606..e7fea128 100644 --- a/py/scripts/check-session-weights.py +++ b/py/scripts/check-session-weights.py @@ -6,9 +6,9 @@ whose actual duration drifted significantly from the recorded weight. Exit codes: - 0 — all weights are within tolerance - 1 — at least one weight drifted beyond the threshold, or a new/missing - session was detected + 0 — all weights are within tolerance (new/missing sessions are reported + but do not cause a non-zero exit; they receive the default weight) + 1 — at least one weight drifted beyond the threshold Usage: python check-session-weights.py measured-shard-0.json measured-shard-1.json ...