From 3b5e4b691c818ae2897753c6c659a249b6c65300 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Tue, 19 May 2026 13:28:43 -0700 Subject: [PATCH 1/3] feat(benchmarks): add performance benchmarks and CI integration --- .github/workflows/benchmarks.yml | 78 +++++++++++ docs/Contributing.md => CONTRIBUTING.md | 47 +++++-- README.md | 2 +- bench.json | 132 +++++++++++++++++++ benchmarks/baselines.json | 13 ++ benchmarks/compare_to_baseline.py | 96 ++++++++++++++ benchmarks/conftest.py | 20 +++ benchmarks/test_github_commits_throughput.py | 63 +++++++++ benchmarks/test_service_bulk_insert.py | 55 ++++++++ boost_library_tracker/services.py | 2 +- boost_mailing_list_tracker/services.py | 2 +- boost_usage_tracker/services.py | 2 +- conftest.py | 6 + core/_version.py | 2 +- cppa_pinecone_sync/services.py | 2 +- cppa_slack_tracker/services.py | 2 +- cppa_user_tracker/services.py | 2 +- cppa_youtube_script_tracker/services.py | 2 +- docs/How_to_add_a_collector.md | 2 +- docs/Onboarding.md | 4 +- docs/README.md | 2 +- docs/Service_API.md | 2 +- docs/boost_library_docs_tracker.md | 2 +- docs/cross-app-dependencies.md | 6 +- docs/service_api/README.md | 2 +- docs/service_api/boost_usage_tracker.md | 2 +- docs/service_api/clang_github_tracker.md | 2 +- docs/service_api/cppa_pinecone_sync.md | 2 +- docs/service_api/cppa_user_tracker.md | 2 +- docs/service_api/discord_activity_tracker.md | 2 +- docs/service_api/github_activity_tracker.md | 2 +- github_activity_tracker/services.py | 2 +- pyproject.toml | 2 +- pytest.ini | 1 + requirements-dev.in | 1 + requirements-dev.lock | 15 ++- uv.lock | 7 + 37 files changed, 543 insertions(+), 45 deletions(-) create mode 100644 .github/workflows/benchmarks.yml rename docs/Contributing.md => CONTRIBUTING.md (63%) create mode 100644 bench.json create mode 100644 benchmarks/baselines.json create mode 100644 benchmarks/compare_to_baseline.py create mode 100644 benchmarks/conftest.py create mode 100644 benchmarks/test_github_commits_throughput.py create mode 100644 benchmarks/test_service_bulk_insert.py create mode 100644 uv.lock diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml new file mode 100644 index 00000000..f3d3d751 --- /dev/null +++ b/.github/workflows/benchmarks.yml @@ -0,0 +1,78 @@ +name: Benchmarks + +on: + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + benchmark: + runs-on: ubuntu-latest + timeout-minutes: 30 + + services: + postgres: + image: postgres:16 + env: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: postgres + ports: ["5432:5432"] + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + --shm-size=256mb + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v7 + with: + python-version: "3.13" + + - name: Cache uv + uses: actions/cache@v4 + with: + path: ~/.cache/uv + key: ${{ runner.os }}-uv-benchmark-${{ hashFiles('requirements-dev.lock') }} + restore-keys: | + ${{ runner.os }}-uv-benchmark- + ${{ runner.os }}-uv- + + - name: Install dependencies + env: + SETUPTOOLS_SCM_WRITE_TO_SOURCE: "1" + run: | + uv venv + uv pip install -r requirements-dev.lock + uv pip install -e . + + - name: Run benchmarks + env: + DATABASE_URL: postgres://postgres:postgres@127.0.0.1:5432/postgres + SECRET_KEY: for-testing-only + DJANGO_SETTINGS_MODULE: config.test_settings + RUN_BENCHMARKS: "1" + run: | + uv run pytest benchmarks/ -m benchmark --benchmark-only \ + --benchmark-json=bench.json -v \ + --benchmark-disable-gc + + - name: Compare to baselines + if: success() + run: | + uv run python benchmarks/compare_to_baseline.py bench.json benchmarks/baselines.json + + - name: Upload benchmark JSON + if: always() + uses: actions/upload-artifact@v4 + with: + name: benchmark-json + path: bench.json + retention-days: 30 diff --git a/docs/Contributing.md b/CONTRIBUTING.md similarity index 63% rename from docs/Contributing.md rename to CONTRIBUTING.md index 03ec2eb6..434e6e18 100644 --- a/docs/Contributing.md +++ b/CONTRIBUTING.md @@ -34,11 +34,11 @@ Each Django app that has **models** provides a **`services.py`** module. This is | `cppa_slack_tracker` | `cppa_slack_tracker/services.py` | Slack teams, channels, messages, membership. | | `wg21_paper_tracker` | `wg21_paper_tracker/services.py` | WG21 papers, authors, mailings. | -For a full list of functions, parameter/return types, and validation (e.g. empty `name` raises `ValueError`), see **[Service_API.md](Service_API.md)** and the per-app docs in **[service_api/](service_api/)** (index: [service_api/README.md](service_api/README.md)). DTO protocols shared across trackers are documented in **[service_api/core_protocols.md](service_api/core_protocols.md)** (generated from `core/protocols.py`). +For a full list of functions, parameter/return types, and validation (e.g. empty `name` raises `ValueError`), see **[docs/Service_API.md](docs/Service_API.md)** and the per-app docs in **[docs/service_api/](docs/service_api/)** (index: [docs/service_api/README.md](docs/service_api/README.md)). ### Regenerating service API docs -Reference tables in `docs/service_api/*.md` are produced by **[`scripts/generate_service_docs.py`](../scripts/generate_service_docs.py)** from each app’s `services.py` and from `core/protocols.py`. +Reference tables in `docs/service_api/*.md` are produced by **[`scripts/generate_service_docs.py`](scripts/generate_service_docs.py)** from each app’s `services.py` and from `core/protocols.py`. - **Markers:** Each file contains `` … ``. The script replaces **only** that region. Put hand-written notes (usage, cross-app warnings, command help) **below** the `END` marker. - **Regenerate locally:** `python scripts/generate_service_docs.py` (optional: `--app ` for one module). @@ -65,22 +65,47 @@ Reference tables in `docs/service_api/*.md` are produced by **[`scripts/generate ### Testing -- **Running tests:** From the project root, install dev deps (`pip install -r requirements-dev.lock` or `uv pip install -r requirements-dev.lock`), start the test database (`docker compose -f docker-compose.test.yml up -d`), set `DATABASE_URL` (and `SECRET_KEY` for the process) as in [README.md](../README.md#running-tests), then run `python -m pytest`. Tests **always use PostgreSQL** (`config.test_settings`); there is no SQLite fallback. -- See [README.md](../README.md#running-tests) and [Development_guideline.md](Development_guideline.md#testing-workflow) for full commands and options. +- **Running tests:** From the project root, install dev deps (`pip install -r requirements-dev.lock` or `uv pip install -r requirements-dev.lock`), start the test database (`docker compose -f docker-compose.test.yml up -d`), set `DATABASE_URL` (and `SECRET_KEY` for the process) as in [README.md](README.md#running-tests), then run `python -m pytest`. Tests **always use PostgreSQL** (`config.test_settings`); there is no SQLite fallback. +- See [README.md](README.md#running-tests) and [docs/Development_guideline.md](docs/Development_guideline.md#testing-workflow) for full commands and options. - **Unit tests for `services.py`:** Call the service functions and assert on the database (or mocks) as needed. - **Other tests:** Prefer service functions when setting up data. If you must create models directly for tests, keep it in test code (e.g. fixtures or test helpers) and avoid doing the same in production code. +### Performance benchmarks + +Throughput checks live under [`benchmarks/`](benchmarks/) and use **`pytest-benchmark`**. They are **not** collected during normal `pytest` runs: set **`RUN_BENCHMARKS=1`** so the root [`conftest.py`](conftest.py) stops ignoring that directory (see `collect_ignore`). Tests are marked with **`@pytest.mark.benchmark`**. + +**Prerequisites:** Same as unit tests: PostgreSQL, `DATABASE_URL`, `SECRET_KEY`, `DJANGO_SETTINGS_MODULE=config.test_settings` (see [README.md](README.md#running-tests)). + +**Run locally** (from repo root, with Postgres up): + +```bash +export RUN_BENCHMARKS=1 +export DATABASE_URL=postgres://postgres:postgres@127.0.0.1:5433/postgres +export SECRET_KEY=for-local-only +export DJANGO_SETTINGS_MODULE=config.test_settings +# Optional: batch size (default 50; match benchmarks/baselines.json "n") +export BENCHMARK_COMMIT_N=50 + +uv run pytest benchmarks/ -m benchmark --benchmark-only \ + --benchmark-json=bench.json -v +uv run python benchmarks/compare_to_baseline.py bench.json benchmarks/baselines.json +``` + +**Baselines:** [`benchmarks/baselines.json`](benchmarks/baselines.json) stores maximum acceptable **median** seconds per scenario (for the configured `n`). The compare script fails if any median exceeds `baseline_median × 1.25` (more than 25% slower than the reference). After a deliberate performance change or a CI image upgrade, update `median_seconds` (and `n` if you change `BENCHMARK_COMMIT_N`) using `stats.median` from the generated JSON. + +**CI:** The [`.github/workflows/benchmarks.yml`](.github/workflows/benchmarks.yml) workflow runs on **`workflow_dispatch`** only, uploads `bench.json` as an artifact, and runs the compare step on success. + ## Other guidelines -- **Branching:** Create feature branches from `develop`. Open pull requests against `develop`. See [Development_guideline.md](Development_guideline.md). +- **Branching:** Create feature branches from `develop`. Open pull requests against `develop`. See [docs/Development_guideline.md](docs/Development_guideline.md). - **Code style:** Use Python 3.11+ and follow Django and project conventions. Use the project’s logging (`logging.getLogger(__name__)`). Before pushing, run **`uv run pyright`** (with dev deps) for the paths covered by **`pyrightconfig.json`**, and ensure CI’s **lint** / **pyright** / **test** jobs would pass. - **Database:** Use the Django ORM and migrations. Writes only through the service layer as above. -- **Docs:** Update this doc (and app `services.py` docstrings) when adding new apps or changing the write rules. After changing `services.py` or `core/protocols.py`, run `python scripts/generate_service_docs.py` and commit the updated `docs/service_api/` files. +- **Docs:** Update this file (and app `services.py` docstrings) when adding new apps or changing the write rules. After changing `services.py` or `core/protocols.py`, run `python scripts/generate_service_docs.py` and commit the updated `docs/service_api/` files. ## Related documentation -- [Service_API.md](Service_API.md) – API reference for all service layer functions. -- [Development_guideline.md](Development_guideline.md) – Setup, workflow, adding apps. -- [Workflow.md](Workflow.md) – Execution order and collectors. -- [Schema.md](Schema.md) – Database schema. -- [cross-app-dependencies.md](cross-app-dependencies.md) – Complete map of every cross-app FK, MTI, ORM read, and Python import dependency, plus `import-linter` recommendations. +- [docs/Service_API.md](docs/Service_API.md) – API reference for all service layer functions. +- [docs/Development_guideline.md](docs/Development_guideline.md) – Setup, workflow, adding apps. +- [docs/Workflow.md](docs/Workflow.md) – Execution order and collectors. +- [docs/Schema.md](docs/Schema.md) – Database schema. +- [docs/cross-app-dependencies.md](docs/cross-app-dependencies.md) – Complete map of every cross-app FK, MTI, ORM read, and Python import dependency, plus `import-linter` recommendations. diff --git a/README.md b/README.md index 633bc609..818030e4 100644 --- a/README.md +++ b/README.md @@ -215,7 +215,7 @@ Docs are organized **by topic** (one doc per concern: workflow, workspace, servi - [Workspace.md](docs/Workspace.md) – Workspace layout and usage for file processing. - [Schema.md](docs/Schema.md) – Database schema and table relationships. - [Development_guideline.md](docs/Development_guideline.md) – Development setup, app requirements, and step-by-step workflow. -- [Contributing.md](docs/Contributing.md) – Service layer (single place for writes), **regenerating service API docs** (`scripts/generate_service_docs.py`), and contributor guidelines. +- [CONTRIBUTING.md](CONTRIBUTING.md) – Service layer (single place for writes), **regenerating service API docs** (`scripts/generate_service_docs.py`), and contributor guidelines. - [Service_API.md](docs/Service_API.md) – API reference and index for all service layer functions. - [service_api/](docs/service_api/) – Per-app service API docs (name, description, parameters, return types, validation). diff --git a/bench.json b/bench.json new file mode 100644 index 00000000..6342e51b --- /dev/null +++ b/bench.json @@ -0,0 +1,132 @@ +{ + "machine_info": { + "node": "Leos-Mac-mini.local", + "processor": "arm", + "machine": "arm64", + "python_compiler": "Clang 21.1.4 ", + "python_implementation": "CPython", + "python_implementation_version": "3.13.12", + "python_version": "3.13.12", + "python_build": [ + "main", + "Mar 10 2026 18:26:32" + ], + "release": "25.4.0", + "system": "Darwin", + "cpu": { + "python_version": "3.13.12.final.0 (64 bit)", + "cpuinfo_version": [ + 9, + 0, + 0 + ], + "cpuinfo_version_string": "9.0.0", + "arch": "ARM_8", + "bits": 64, + "count": 10, + "arch_string_raw": "arm64", + "brand_raw": "Apple M4" + } + }, + "commit_info": { + "id": "7bf1b7ea6657990eef44fdb362b762abb16e41ba", + "time": "2026-05-18T20:05:08-04:00", + "author_time": "2026-05-18T20:05:08-04:00", + "dirty": true, + "project": "boost-data-collector", + "branch": "develop" + }, + "benchmarks": [ + { + "group": null, + "name": "test_process_commit_data_batch", + "fullname": "benchmarks/test_github_commits_throughput.py::test_process_commit_data_batch", + "params": null, + "param": null, + "extra_info": { + "n": 50 + }, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 0.13009395799599588, + "max": 0.16657558304723352, + "mean": 0.14227045823354273, + "stddev": 0.01457181655810832, + "rounds": 5, + "median": 0.13689958304166794, + "iqr": 0.01724434396601282, + "q1": 0.1326302083034534, + "q3": 0.14987455226946622, + "iqr_outliers": 0, + "stddev_outliers": 1, + "outliers": "1;0", + "ld15iqr": 0.13009395799599588, + "hd15iqr": 0.16657558304723352, + "ops": 7.0288660936092535, + "total": 0.7113522911677137, + "data": [ + 0.16657558304723352, + 0.1334756250726059, + 0.13009395799599588, + 0.13689958304166794, + 0.14430754201021045 + ], + "iterations": 1 + } + }, + { + "group": null, + "name": "test_service_bulk_commits_and_file_changes", + "fullname": "benchmarks/test_service_bulk_insert.py::test_service_bulk_commits_and_file_changes", + "params": null, + "param": null, + "extra_info": { + "n": 50 + }, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 0.10591337503865361, + "max": 0.1513816670048982, + "mean": 0.13538706267718226, + "stddev": 0.01819949434483927, + "rounds": 6, + "median": 0.14058843749808148, + "iqr": 0.02617037494201213, + "q1": 0.12384004204068333, + "q3": 0.15001041698269546, + "iqr_outliers": 0, + "stddev_outliers": 1, + "outliers": "1;0", + "ld15iqr": 0.10591337503865361, + "hd15iqr": 0.1513816670048982, + "ops": 7.386230118489284, + "total": 0.8123223760630935, + "data": [ + 0.1513816670048982, + 0.15001041698269546, + 0.13251695793587714, + 0.12384004204068333, + 0.1486599170602858, + 0.10591337503865361 + ], + "iterations": 1 + } + } + ], + "datetime": "2026-05-19T18:09:23.360634+00:00", + "version": "5.2.3" +} \ No newline at end of file diff --git a/benchmarks/baselines.json b/benchmarks/baselines.json new file mode 100644 index 00000000..506ef4e9 --- /dev/null +++ b/benchmarks/baselines.json @@ -0,0 +1,13 @@ +{ + "description": "Maximum acceptable median wall time (seconds) per scenario at BENCHMARK_COMMIT_N. Update median_seconds after intentional perf work or when CI hardware changes; copy medians from --benchmark-json stats.median.", + "benchmarks": { + "benchmarks/test_github_commits_throughput.py::test_process_commit_data_batch": { + "median_seconds": 45.0, + "n": 50 + }, + "benchmarks/test_service_bulk_insert.py::test_service_bulk_commits_and_file_changes": { + "median_seconds": 35.0, + "n": 50 + } + } +} diff --git a/benchmarks/compare_to_baseline.py b/benchmarks/compare_to_baseline.py new file mode 100644 index 00000000..dc3bd0ef --- /dev/null +++ b/benchmarks/compare_to_baseline.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 +""" +Compare pytest-benchmark JSON (--benchmark-json) against benchmarks/baselines.json. + +Exits with status 1 if any baseline median is exceeded by more than 25% (slower). +""" + +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path + +DEFAULT_REGRESSION_RATIO = 1.25 + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Compare benchmark JSON to checked-in median baselines." + ) + parser.add_argument( + "results_json", + type=Path, + help="Path written by pytest-benchmark --benchmark-json", + ) + parser.add_argument( + "baselines_json", + type=Path, + help="Path to benchmarks/baselines.json", + ) + parser.add_argument( + "--regression-ratio", + type=float, + default=DEFAULT_REGRESSION_RATIO, + metavar="R", + help=f"Fail if median > baseline_median * R (default {DEFAULT_REGRESSION_RATIO})", + ) + args = parser.parse_args() + + results = json.loads(args.results_json.read_text(encoding="utf-8")) + baselines_doc = json.loads(args.baselines_json.read_text(encoding="utf-8")) + + bench_by_name = {b["fullname"]: b for b in results.get("benchmarks", [])} + expected: dict[str, dict] = baselines_doc.get("benchmarks", {}) + + failures: list[str] = [] + warnings: list[str] = [] + + for fullname, spec in expected.items(): + if spec.get("skip"): + continue + ref = spec.get("median_seconds") + if ref is None: + warnings.append(f"{fullname}: baseline has no median_seconds; skipping") + continue + + bench = bench_by_name.get(fullname) + if bench is None: + failures.append(f"{fullname}: missing from benchmark results") + continue + + median = float(bench["stats"]["median"]) + exp_n = spec.get("n") + if exp_n is not None: + got_n = bench.get("extra_info", {}).get("n") + if got_n is not None and int(got_n) != int(exp_n): + warnings.append( + f"{fullname}: baseline n={exp_n} but run reported n={got_n} " + "(set BENCHMARK_COMMIT_N to match baselines.json)" + ) + + limit = float(ref) * float(args.regression_ratio) + if median > limit: + failures.append( + f"{fullname}: median {median:.6f}s exceeds limit {limit:.6f}s " + f"(baseline {float(ref):.6f}s × {args.regression_ratio})" + ) + + for line in warnings: + print(f"WARNING: {line}", file=sys.stderr) + for line in failures: + print(f"FAIL: {line}", file=sys.stderr) + + if failures: + print( + f"Benchmark regression check failed ({len(failures)} scenario(s)).", + file=sys.stderr, + ) + return 1 + print("Benchmark regression check passed.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/benchmarks/conftest.py b/benchmarks/conftest.py new file mode 100644 index 00000000..3a7a82ff --- /dev/null +++ b/benchmarks/conftest.py @@ -0,0 +1,20 @@ +""" +Benchmark-only fixtures. Default pytest collection skips this directory unless +RUN_BENCHMARKS=1 (see root conftest.py). +""" + +from __future__ import annotations + +import os + +import pytest + + +@pytest.fixture +def benchmark_commit_n() -> int: + """Number of commits / service rows per benchmark iteration (default tuned for CI).""" + raw = os.environ.get("BENCHMARK_COMMIT_N", "50") + n = int(raw) + if n < 1: + raise ValueError("BENCHMARK_COMMIT_N must be >= 1") + return n diff --git a/benchmarks/test_github_commits_throughput.py b/benchmarks/test_github_commits_throughput.py new file mode 100644 index 00000000..0502dcba --- /dev/null +++ b/benchmarks/test_github_commits_throughput.py @@ -0,0 +1,63 @@ +""" +Benchmark: GitHub-shaped commit payloads through sync path `_process_commit_data`. + +Uses the unknown-author branch (no top-level author/committer) for stable +account resolution. Each payload includes one modified file with a unique path. +""" + +from __future__ import annotations + +import pytest + +from github_activity_tracker.models import FileChangeStatus +from github_activity_tracker.sync.commits import _process_commit_data + + +def _build_commit_payloads(n: int) -> list[dict]: + """REST-shaped dicts compatible with `_process_commit_data` (no network).""" + payloads: list[dict] = [] + for i in range(n): + sha = f"b{i:039d}" # 40 chars, unique per index + fname = f"benchmarks/path_{i}/file.txt" + payloads.append( + { + "sha": sha, + "commit": { + "message": f"benchmark commit {i}\n", + "author": { + "name": "Bench User", + "email": "bench@example.invalid", + "date": "2024-01-01T12:00:00Z", + }, + }, + "files": [ + { + "filename": fname, + "status": "modified", + "additions": 1, + "deletions": 1, + "patch": f"@@ benchmark {i} @@\n", + } + ], + } + ) + return payloads + + +@pytest.mark.benchmark +@pytest.mark.django_db(transaction=True) +def test_process_commit_data_batch( + benchmark, + github_repository, + benchmark_commit_n, +): + n = benchmark_commit_n + repo = github_repository + payloads = _build_commit_payloads(n) + + def run_batch() -> None: + for data in payloads: + _process_commit_data(repo, data) + + benchmark.extra_info["n"] = n + benchmark(run_batch) diff --git a/benchmarks/test_service_bulk_insert.py b/benchmarks/test_service_bulk_insert.py new file mode 100644 index 00000000..85cf5dd6 --- /dev/null +++ b/benchmarks/test_service_bulk_insert.py @@ -0,0 +1,55 @@ +""" +Benchmark: service-layer writes for N commits plus one file change each, in one transaction. +""" + +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from django.db import transaction + +from github_activity_tracker import services +from github_activity_tracker.models import FileChangeStatus + + +@pytest.mark.benchmark +@pytest.mark.django_db(transaction=True) +def test_service_bulk_commits_and_file_changes( + benchmark, + github_repository, + github_account, + benchmark_commit_n, +): + n = benchmark_commit_n + repo = github_repository + account = github_account + commit_at = datetime(2024, 6, 1, tzinfo=timezone.utc) + hashes = [f"svcbulk{i:056d}"[:40] for i in range(n)] + + def run_batch() -> None: + with transaction.atomic(): + for i in range(n): + commit_obj, _ = services.create_or_update_commit( + repo=repo, + account=account, + commit_hash=hashes[i], + comment=f"svc bulk {i}", + commit_at=commit_at, + ) + github_file, _ = services.create_or_update_github_file( + repo, + f"benchmarks/svc_bulk_{i}.txt", + is_deleted=False, + ) + services.add_commit_file_change( + commit_obj, + github_file, + status=FileChangeStatus.MODIFIED, + additions=1, + deletions=0, + patch="", + ) + + benchmark.extra_info["n"] = n + benchmark(run_batch) diff --git a/boost_library_tracker/services.py b/boost_library_tracker/services.py index 41701972..43cafea6 100644 --- a/boost_library_tracker/services.py +++ b/boost_library_tracker/services.py @@ -1,7 +1,7 @@ """ Service layer for boost_library_tracker. All creates/updates/deletes for this app's models must go through functions here. -See docs/Contributing.md. +See CONTRIBUTING.md. """ from __future__ import annotations diff --git a/boost_mailing_list_tracker/services.py b/boost_mailing_list_tracker/services.py index 92b83501..daf31f1d 100644 --- a/boost_mailing_list_tracker/services.py +++ b/boost_mailing_list_tracker/services.py @@ -2,7 +2,7 @@ Service layer for boost_mailing_list_tracker. All creates/updates/deletes for this app's models must go through functions here. -See docs/Contributing.md. +See CONTRIBUTING.md. """ from __future__ import annotations diff --git a/boost_usage_tracker/services.py b/boost_usage_tracker/services.py index 936ab6e9..92c8818f 100644 --- a/boost_usage_tracker/services.py +++ b/boost_usage_tracker/services.py @@ -2,7 +2,7 @@ Service layer for boost_usage_tracker. All creates/updates/deletes for this app's models must go through functions here. -See docs/Contributing.md for the project-wide rule. +See CONTRIBUTING.md for the project-wide rule. Includes bulk operations for speed (fewer round-trips): - bulk_create_or_update_boost_usage diff --git a/conftest.py b/conftest.py index 90475934..a63c9e31 100644 --- a/conftest.py +++ b/conftest.py @@ -2,8 +2,14 @@ Root conftest: register app-level fixture modules and optional session/global fixtures. """ +import os + import pytest +# Exclude benchmarks/ from default test collection (keeps CI and local pytest fast). +# Run benchmarks with: RUN_BENCHMARKS=1 uv run pytest benchmarks/ -m benchmark ... +collect_ignore = [] if os.environ.get("RUN_BENCHMARKS") == "1" else ["benchmarks"] + def _patch_django_context_copy_py314(): """Fix Django BaseContext.__copy__ on Python 3.14 (copy(super()) is broken there).""" diff --git a/core/_version.py b/core/_version.py index ccc4c4c3..6892cf81 100644 --- a/core/_version.py +++ b/core/_version.py @@ -1,2 +1,2 @@ # file generated by setuptools-scm; do not edit -version = "0.1.0" +version = "0.1.1.dev549+g7bf1b7ea6.d20260519" diff --git a/cppa_pinecone_sync/services.py b/cppa_pinecone_sync/services.py index 8c014cde..ef62e649 100644 --- a/cppa_pinecone_sync/services.py +++ b/cppa_pinecone_sync/services.py @@ -5,7 +5,7 @@ module. Do not call Model.objects.create(), model.save(), or model.delete() from outside this module (e.g. from management commands, views, or other apps). -See docs/Contributing.md for the project-wide rule. +See CONTRIBUTING.md for the project-wide rule. """ from __future__ import annotations diff --git a/cppa_slack_tracker/services.py b/cppa_slack_tracker/services.py index 9eab53b6..e5cbb80e 100644 --- a/cppa_slack_tracker/services.py +++ b/cppa_slack_tracker/services.py @@ -5,7 +5,7 @@ module. Do not call Model.objects.create(), model.save(), or model.delete() from outside this module (e.g. from management commands, views, or other apps). -See docs/Contributing.md for the project-wide rule. +See CONTRIBUTING.md for the project-wide rule. """ from __future__ import annotations diff --git a/cppa_user_tracker/services.py b/cppa_user_tracker/services.py index f2853230..1fbd92be 100644 --- a/cppa_user_tracker/services.py +++ b/cppa_user_tracker/services.py @@ -5,7 +5,7 @@ module. Do not call Model.objects.create(), model.save(), or model.delete() from outside this module (e.g. from management commands, views, or other apps). -See docs/Contributing.md for the project-wide rule. +See CONTRIBUTING.md for the project-wide rule. """ from __future__ import annotations diff --git a/cppa_youtube_script_tracker/services.py b/cppa_youtube_script_tracker/services.py index 7ec7877e..ff626266 100644 --- a/cppa_youtube_script_tracker/services.py +++ b/cppa_youtube_script_tracker/services.py @@ -5,7 +5,7 @@ module. Do not call Model.objects.create(), model.save(), or model.delete() from outside this module. -See docs/Contributing.md for the project-wide rule. +See CONTRIBUTING.md for the project-wide rule. """ from __future__ import annotations diff --git a/docs/How_to_add_a_collector.md b/docs/How_to_add_a_collector.md index 4dd5ef94..7cf6a7f2 100644 --- a/docs/How_to_add_a_collector.md +++ b/docs/How_to_add_a_collector.md @@ -31,7 +31,7 @@ The detailed contracts (abstract methods, lifecycle hooks, error handling, templ ## 4. Skeleton collector (minimal copy-paste example) -This section is a **canonical minimal pattern**: the management command is only responsible for parsing options and returning a collector from `get_collector()` (often ~10–15 lines). The **`AbstractCollector` subclass** implements `name`, `validate_config`, and `collect` (orchestration); `BaseCollectorCommand` still calls `run()`, which the base implements as validate-then-collect. The **service layer** (`services.py`) is the main place for DB and API logic—match the project rule that writes go through services (see [Contributing.md](Contributing.md#service-layer-single-place-for-writes)). +This section is a **canonical minimal pattern**: the management command is only responsible for parsing options and returning a collector from `get_collector()` (often ~10–15 lines). The **`AbstractCollector` subclass** implements `name`, `validate_config`, and `collect` (orchestration); `BaseCollectorCommand` still calls `run()`, which the base implements as validate-then-collect. The **service layer** (`services.py`) is the main place for DB and API logic—match the project rule that writes go through services (see [CONTRIBUTING.md](../CONTRIBUTING.md#service-layer-single-place-for-writes)). Keep imports and calls inside `collect()` going through `services.py` (for example `import my_skeleton_tracker.services as services` and only call functions from that module) so the write path stays obvious. diff --git a/docs/Onboarding.md b/docs/Onboarding.md index 7ab9ab0e..090f7e27 100644 --- a/docs/Onboarding.md +++ b/docs/Onboarding.md @@ -10,7 +10,7 @@ For setup steps (venv, migrate, tests), start with the root **[README.md](../REA 1. **One Django project, one database** — All installed apps share PostgreSQL (`boost_dashboard`). There is no per-app database isolation. 2. **Collectors are management commands** — Scheduled work is `python manage.py `. Production batches run **`run_scheduled_collectors`**, which reads **`config/boost_collector_schedule.yaml`** (see **[Workflow.md](Workflow.md)**). -3. **Writes go through `services.py`** — For apps that define models, creates/updates/deletes belong in that app’s **`services.py`**. Commands, fetchers, and other apps call those functions; they do not write models ad hoc (see **[Contributing.md](Contributing.md)**). +3. **Writes go through `services.py`** — For apps that define models, creates/updates/deletes belong in that app’s **`services.py`**. Commands, fetchers, and other apps call those functions; they do not write models ad hoc (see **[CONTRIBUTING.md](../CONTRIBUTING.md)**). 4. **Shared “collector contract” lives in `core`** — Prefer **`AbstractCollector`** (`name`, `validate_config`, `collect`) plus **`BaseCollectorCommand`** for a consistent shape; legacy **`CollectorBase`** (`run()` only) remains supported. See **[Core_public_API.md](Core_public_API.md)** and **[How_to_add_a_collector.md](How_to_add_a_collector.md)**. 5. **Cross-app coupling is intentionally loose** — Avoid **ForeignKeys** from one tracker app into another’s models when it would create tight coupling or import cycles. Prefer querying by IDs or shared reference tables (e.g. **Language**, **Identity**) as documented in **[Schema.md](Schema.md)** and **[Development_guideline.md](Development_guideline.md)**. @@ -23,7 +23,7 @@ For setup steps (venv, migrate, tests), start with the root **[README.md](../REA | 1 | [README.md](../README.md) | Prerequisites, setup, tests. | | 2 | [Architecture_data_flow.md](Architecture_data_flow.md) | Sources → collectors → DB / workspace → Pinecone. | | 3 | [Workflow.md](Workflow.md) | YAML schedules, Celery Beat, execution order. | -| 4 | [Contributing.md](Contributing.md) | Service-layer rule for DB writes. | +| 4 | [CONTRIBUTING.md](../CONTRIBUTING.md) | Service-layer rule for DB writes. | | 5 | [Workspace.md](Workspace.md) | Where files land under `WORKSPACE_DIR`. | | 6 | [Schema.md](Schema.md) — § Overview + diagrams for your area | Cross-app tables (identity, GitHub, Boost libraries). | | 7 | [Service_API.md](Service_API.md) + `service_api/.md` | Callable surface for writes you must use. | diff --git a/docs/README.md b/docs/README.md index b005266c..99820f58 100644 --- a/docs/README.md +++ b/docs/README.md @@ -19,7 +19,7 @@ Documentation is organized **by topic**, not by app. Each doc covers one cross-c | **Development** | [Development_guideline.md](Development_guideline.md) | Development setup, app requirements, and step-by-step workflow. | | **Testing / typing** | [README.md](../README.md#running-tests), [Development_guideline.md](Development_guideline.md#testing-workflow) | pytest (Postgres), coverage, when to run tests; **Pyright** (`uv run pyright`) and CI jobs. | | **Deployment** | [Deployment.md](Deployment.md) | CI/CD pipeline, environment secrets (`SSH_HOST`, `SSH_USER`, `SSH_PRIVATE_KEY`; optional `SSH_PORT`), server setup, and deploy script behavior. | -| **Contributing** | [Contributing.md](Contributing.md) | Service layer (single place for writes) and contributor guidelines. | +| **Contributing** | [CONTRIBUTING.md](../CONTRIBUTING.md) | Service layer (single place for writes) and contributor guidelines. | | **Service API** | [Service_API.md](Service_API.md) | API reference and index for all service layer functions. | | **Service API (per app)** | [service_api/](service_api/) | Per-app service API docs (name, description, parameters, return types, validation). | diff --git a/docs/Service_API.md b/docs/Service_API.md index a6499609..b0f656e2 100644 --- a/docs/Service_API.md +++ b/docs/Service_API.md @@ -66,5 +66,5 @@ See each app’s doc in [service_api/](service_api/) for parameter types, return ## Related docs -- [Contributing.md](Contributing.md) – Rule that all writes go through the service layer. +- [CONTRIBUTING.md](../CONTRIBUTING.md) – Rule that all writes go through the service layer. - [Schema.md](Schema.md) – Database schema and models. diff --git a/docs/boost_library_docs_tracker.md b/docs/boost_library_docs_tracker.md index 439862bc..5572b77f 100644 --- a/docs/boost_library_docs_tracker.md +++ b/docs/boost_library_docs_tracker.md @@ -180,4 +180,4 @@ When adding this app to the project, do all of the following: - [service_api/boost_library_docs_tracker.md](service_api/boost_library_docs_tracker.md) — Full service API reference for this app. - [Workflow.md](Workflow.md) — Execution order (this command runs after `run_boost_library_tracker`). - [Workspace.md](Workspace.md) — Workspace layout (`workspace/boost_library_docs_tracker/`). -- [Contributing.md](Contributing.md) — Service layer write rules. +- [CONTRIBUTING.md](../CONTRIBUTING.md) — Service layer write rules. diff --git a/docs/cross-app-dependencies.md b/docs/cross-app-dependencies.md index dc238280..ca7ac1b1 100644 --- a/docs/cross-app-dependencies.md +++ b/docs/cross-app-dependencies.md @@ -1,7 +1,7 @@ # Cross-App Dependencies This document maps every cross-app dependency between the tracker Django apps in this -project. It exists to make the [Contributing.md](Contributing.md) guideline — "prefer no +project. It exists to make the [CONTRIBUTING.md](../CONTRIBUTING.md) guideline — "prefer no ForeignKey from one tracker app into another's models" — visible and therefore enforceable. For **typed data boundaries** (run results, activity rows, checkpoints) shared across apps, prefer :mod:`core.protocols` (see [Core_public_API.md](Core_public_API.md#tracker-protocols-dtos)). @@ -76,7 +76,7 @@ These are hard database-level dependencies. They cannot be removed without migr ## 2. ORM Read Coupling (cross-app `.objects` queries outside `models.py`) -The [Contributing.md](Contributing.md) service layer rules enforce **write isolation** — +The [CONTRIBUTING.md](../CONTRIBUTING.md) service layer rules enforce **write isolation** — all inserts/updates/deletes go through `services.py`. However, **read isolation is not enforced**: any module may call `AnotherApp.Model.objects.filter(...)` directly. @@ -337,7 +337,7 @@ To add it to pre-commit: ## Related documentation -- [Contributing.md](Contributing.md) — service-layer write rules +- [CONTRIBUTING.md](../CONTRIBUTING.md) — service-layer write rules - [Core_public_API.md](Core_public_API.md) — `core` public surfaces and the coupling reduction goal - [Development_guideline.md](Development_guideline.md) — adding new apps - [`scripts/list_cross_app_imports.py`](../scripts/list_cross_app_imports.py) — discovery script diff --git a/docs/service_api/README.md b/docs/service_api/README.md index 6c20e608..571c2a86 100644 --- a/docs/service_api/README.md +++ b/docs/service_api/README.md @@ -38,4 +38,4 @@ Index of all app service modules. All writes to app models must go through the s - **wg21_paper_tracker** – WG21 paper and author persistence. - **core.protocols** – Structural contracts for sync outcomes and activity payloads (see [core_protocols.md](core_protocols.md)). -See [Contributing.md](../Contributing.md) for the rule that all writes go through the service layer, and for **regenerating** these docs from source. +See [CONTRIBUTING.md](../../CONTRIBUTING.md) for the rule that all writes go through the service layer, and for **regenerating** these docs from source. diff --git a/docs/service_api/boost_usage_tracker.md b/docs/service_api/boost_usage_tracker.md index 75f99655..a8275510 100644 --- a/docs/service_api/boost_usage_tracker.md +++ b/docs/service_api/boost_usage_tracker.md @@ -30,4 +30,4 @@ ## Related docs - [Schema.md](../Schema.md) – Section 4: Boost Usage Tracker. -- [Contributing.md](../Contributing.md) – Service layer rule. +- [CONTRIBUTING.md](../../CONTRIBUTING.md) – Service layer rule. diff --git a/docs/service_api/clang_github_tracker.md b/docs/service_api/clang_github_tracker.md index b66e135b..0f7c1570 100644 --- a/docs/service_api/clang_github_tracker.md +++ b/docs/service_api/clang_github_tracker.md @@ -30,4 +30,4 @@ Used by `clang_github_tracker.state_manager.resolve_start_end_dates` (with optio - [Schema.md](../Schema.md) – Section 2b: Clang GitHub Tracker. - [Workspace.md](../Workspace.md) – `workspace/raw/github_activity_tracker/`, `workspace/clang_github_tracker/`. -- [Contributing.md](../Contributing.md) – Service layer rule. +- [CONTRIBUTING.md](../../CONTRIBUTING.md) – Service layer rule. diff --git a/docs/service_api/cppa_pinecone_sync.md b/docs/service_api/cppa_pinecone_sync.md index b02aa89b..a56312c3 100644 --- a/docs/service_api/cppa_pinecone_sync.md +++ b/docs/service_api/cppa_pinecone_sync.md @@ -2,7 +2,7 @@ Module: `cppa_pinecone_sync.services` -All creates/updates/deletes for `PineconeFailList` and `PineconeSyncStatus` must go through this module. See [Contributing.md](../Contributing.md). +All creates/updates/deletes for `PineconeFailList` and `PineconeSyncStatus` must go through this module. See [CONTRIBUTING.md](../../CONTRIBUTING.md). --- diff --git a/docs/service_api/cppa_user_tracker.md b/docs/service_api/cppa_user_tracker.md index b07f994e..6313fe1b 100644 --- a/docs/service_api/cppa_user_tracker.md +++ b/docs/service_api/cppa_user_tracker.md @@ -34,5 +34,5 @@ ## Related - [Service API index](README.md) -- [Contributing](../Contributing.md) +- [CONTRIBUTING.md](../../CONTRIBUTING.md) - [Schema](../Schema.md) diff --git a/docs/service_api/discord_activity_tracker.md b/docs/service_api/discord_activity_tracker.md index ae628521..f9d63f61 100644 --- a/docs/service_api/discord_activity_tracker.md +++ b/docs/service_api/discord_activity_tracker.md @@ -145,6 +145,6 @@ Settings: - [DiscordChatExporter setup](../operations/discord_chat_exporter.md) — download, install, `.env` - [Service API index](README.md) -- [Contributing](../Contributing.md) +- [CONTRIBUTING](../../CONTRIBUTING.md) - [Schema](../Schema.md) - [Workspace](../Workspace.md) – raw archives under `{WORKSPACE_DIR}/raw/discord_activity_tracker///`; app folder `{WORKSPACE_DIR}/discord_activity_tracker/` (CLI `script/`, backfill drop `Discussion - c-cpp-discussion/`) diff --git a/docs/service_api/github_activity_tracker.md b/docs/service_api/github_activity_tracker.md index 6bd785e1..b75c9bc3 100644 --- a/docs/service_api/github_activity_tracker.md +++ b/docs/service_api/github_activity_tracker.md @@ -64,5 +64,5 @@ To sync a repo from GitHub (read last updated from DB, fetch from GitHub, save v ## Related - [Service API index](README.md) -- [Contributing](../Contributing.md) +- [CONTRIBUTING](../../CONTRIBUTING.md) - [Schema](../Schema.md) diff --git a/github_activity_tracker/services.py b/github_activity_tracker/services.py index 3cdc8418..c07cde6b 100644 --- a/github_activity_tracker/services.py +++ b/github_activity_tracker/services.py @@ -5,7 +5,7 @@ module. Do not call Model.objects.create(), model.save(), or model.delete() from outside this module (e.g. from management commands, views, or other apps). -See docs/Contributing.md for the project-wide rule. +See CONTRIBUTING.md for the project-wide rule. """ from __future__ import annotations diff --git a/pyproject.toml b/pyproject.toml index 4a8ef19a..0a6ac7a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ description = "Boost Data Collector Django project" [tool.setuptools.packages.find] where = ["."] include = ["*"] -exclude = ["*.tests", "*.tests.*", "tests", "tests.*"] +exclude = ["*.tests", "*.tests.*", "tests", "tests.*", "benchmarks", "benchmarks.*"] [tool.setuptools_scm] fallback_version = "0.1.0" diff --git a/pytest.ini b/pytest.ini index 9f457850..34f97da4 100644 --- a/pytest.ini +++ b/pytest.ini @@ -8,3 +8,4 @@ filterwarnings = ignore::PendingDeprecationWarning markers = django_db: mark test as using the database (django_db is built-in from pytest-django) + benchmark: performance benchmarks; collection requires RUN_BENCHMARKS=1 (see CONTRIBUTING.md) diff --git a/requirements-dev.in b/requirements-dev.in index f28d8476..f140b645 100644 --- a/requirements-dev.in +++ b/requirements-dev.in @@ -4,6 +4,7 @@ -r requirements.in pytest>=7.4,<9 +pytest-benchmark>=4.0,<6 pytest-django>=4.5,<5 django-stubs>=4.2.7,<5 django-stubs-ext>=4.2.7,<5 diff --git a/requirements-dev.lock b/requirements-dev.lock index 4d4126e2..ec2e7cb5 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -1,5 +1,5 @@ # This file was autogenerated by uv via the following command: -# uv pip compile requirements-dev.in -o requirements-dev.lock --python-version 3.11 +# uv pip compile requirements-dev.in -o requirements-dev.lock --python-version 3.13 aiohappyeyeballs==2.6.1 # via aiohttp aiohttp==3.13.5 @@ -12,13 +12,13 @@ annotated-types==0.7.0 # via pydantic asgiref==3.11.1 # via django -async-timeout==5.0.1 - # via redis attrs==26.1.0 # via # aiohttp # outcome # trio +audioop-lts==0.2.2 + # via discord-py beautifulsoup4==4.14.3 # via -r requirements.in billiard==4.2.4 @@ -175,6 +175,8 @@ psycopg==3.3.4 # via -r requirements.in psycopg-binary==3.3.4 # via psycopg +py-cpuinfo==9.0.0 + # via pytest-benchmark pyasn1==0.6.3 # via pyasn1-modules pyasn1-modules==0.4.2 @@ -206,8 +208,11 @@ pysocks==1.7.1 pytest==8.4.2 # via # -r requirements-dev.in + # pytest-benchmark # pytest-cov # pytest-django +pytest-benchmark==5.2.3 + # via -r requirements-dev.in pytest-cov==6.3.0 # via -r requirements-dev.in pytest-django==4.12.0 @@ -254,8 +259,6 @@ soupsieve==2.8.3 # via beautifulsoup4 sqlparse==0.5.5 # via django -tomli==2.4.1 - # via coverage tqdm==4.67.3 # via pinecone trio==0.33.0 @@ -270,12 +273,10 @@ types-pyyaml==6.0.12.20260510 # via django-stubs typing-extensions==4.15.0 # via - # aiosignal # beautifulsoup4 # django-stubs # django-stubs-ext # pinecone - # psycopg # pydantic # pydantic-core # pygithub diff --git a/uv.lock b/uv.lock new file mode 100644 index 00000000..2387c15c --- /dev/null +++ b/uv.lock @@ -0,0 +1,7 @@ +version = 1 +revision = 3 +requires-python = ">=3.11" + +[[package]] +name = "boost-data-collector" +source = { editable = "." } From 94302c71984cf42be97d28f6f1fd5f3e193564c5 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Tue, 19 May 2026 18:56:23 -0700 Subject: [PATCH 2/3] chore: remove uv.lock file and update version to 0.1.0 in _version.py --- bench.json | 2 +- benchmarks/test_github_commits_throughput.py | 1 - core/_version.py | 2 +- uv.lock | 7 ------- 4 files changed, 2 insertions(+), 10 deletions(-) delete mode 100644 uv.lock diff --git a/bench.json b/bench.json index 6342e51b..94e042ad 100644 --- a/bench.json +++ b/bench.json @@ -129,4 +129,4 @@ ], "datetime": "2026-05-19T18:09:23.360634+00:00", "version": "5.2.3" -} \ No newline at end of file +} diff --git a/benchmarks/test_github_commits_throughput.py b/benchmarks/test_github_commits_throughput.py index 0502dcba..73fa5308 100644 --- a/benchmarks/test_github_commits_throughput.py +++ b/benchmarks/test_github_commits_throughput.py @@ -9,7 +9,6 @@ import pytest -from github_activity_tracker.models import FileChangeStatus from github_activity_tracker.sync.commits import _process_commit_data diff --git a/core/_version.py b/core/_version.py index 6892cf81..ccc4c4c3 100644 --- a/core/_version.py +++ b/core/_version.py @@ -1,2 +1,2 @@ # file generated by setuptools-scm; do not edit -version = "0.1.1.dev549+g7bf1b7ea6.d20260519" +version = "0.1.0" diff --git a/uv.lock b/uv.lock deleted file mode 100644 index 2387c15c..00000000 --- a/uv.lock +++ /dev/null @@ -1,7 +0,0 @@ -version = 1 -revision = 3 -requires-python = ">=3.11" - -[[package]] -name = "boost-data-collector" -source = { editable = "." } From 54953975d7423855cf68c7a970232200222ea815 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Wed, 20 May 2026 08:39:19 -0700 Subject: [PATCH 3/3] chore: update .gitignore to exclude bench.json and remove the file; modify CI scripts for benchmark integration --- .github/workflows/actions.yml | 28 +++--- .github/workflows/benchmarks.yml | 8 +- .gitignore | 2 + CONTRIBUTING.md | 3 +- bench.json | 132 ------------------------- benchmarks/baselines.json | 4 +- benchmarks/compare_to_baseline.py | 2 +- benchmarks/test_service_bulk_insert.py | 2 +- core/_version.py | 2 +- docs/service_api/cppa_pinecone_sync.md | 2 +- 10 files changed, 28 insertions(+), 157 deletions(-) delete mode 100644 bench.json diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index b05da1b3..70916688 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -19,15 +19,15 @@ jobs: timeout-minutes: 15 steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - name: Install uv - uses: astral-sh/setup-uv@v7 + uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7 with: python-version: "3.13" - name: Cache uv - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4 with: path: ~/.cache/uv key: ${{ runner.os }}-uv-pre-commit @@ -35,7 +35,7 @@ jobs: ${{ runner.os }}-uv- - name: Cache pre-commit environments - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4 with: path: ~/.cache/pre-commit key: ${{ runner.os }}-pre-commit-${{ hashFiles('.pre-commit-config.yaml') }} @@ -51,15 +51,15 @@ jobs: timeout-minutes: 10 steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - name: Install uv - uses: astral-sh/setup-uv@v7 + uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7 with: python-version: "3.13" - name: Cache uv - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4 with: path: ~/.cache/uv key: ${{ runner.os }}-uv-pyright-${{ hashFiles('requirements-dev.lock') }} @@ -99,17 +99,17 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 with: fetch-depth: 0 - name: Install uv - uses: astral-sh/setup-uv@v7 + uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7 with: python-version: "3.13" - name: Cache uv - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4 with: path: ~/.cache/uv key: ${{ runner.os }}-uv-test @@ -161,7 +161,7 @@ jobs: - name: Upload HTML coverage report if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: coverage-html path: htmlcov/ @@ -169,14 +169,14 @@ jobs: - name: Upload XML coverage report if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: coverage-xml path: coverage.xml - name: Upload test results if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: pytest-results path: junit.xml @@ -189,7 +189,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - name: Create .env for CI run: | diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index f3d3d751..e80007ec 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -29,15 +29,15 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - name: Install uv - uses: astral-sh/setup-uv@v7 + uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7 with: python-version: "3.13" - name: Cache uv - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4 with: path: ~/.cache/uv key: ${{ runner.os }}-uv-benchmark-${{ hashFiles('requirements-dev.lock') }} @@ -71,7 +71,7 @@ jobs: - name: Upload benchmark JSON if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: benchmark-json path: bench.json diff --git a/.gitignore b/.gitignore index 4d5c116b..307e325b 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,8 @@ media/ .test_artifacts/ # Testing / coverage +# pytest-benchmark JSON (machine_info / commit_info; use CI artifact, do not commit) +bench.json .coverage coverage.xml coverage.json diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 434e6e18..f64f3aa0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -87,7 +87,8 @@ export DJANGO_SETTINGS_MODULE=config.test_settings export BENCHMARK_COMMIT_N=50 uv run pytest benchmarks/ -m benchmark --benchmark-only \ - --benchmark-json=bench.json -v + --benchmark-json=bench.json -v \ + --benchmark-disable-gc uv run python benchmarks/compare_to_baseline.py bench.json benchmarks/baselines.json ``` diff --git a/bench.json b/bench.json deleted file mode 100644 index 94e042ad..00000000 --- a/bench.json +++ /dev/null @@ -1,132 +0,0 @@ -{ - "machine_info": { - "node": "Leos-Mac-mini.local", - "processor": "arm", - "machine": "arm64", - "python_compiler": "Clang 21.1.4 ", - "python_implementation": "CPython", - "python_implementation_version": "3.13.12", - "python_version": "3.13.12", - "python_build": [ - "main", - "Mar 10 2026 18:26:32" - ], - "release": "25.4.0", - "system": "Darwin", - "cpu": { - "python_version": "3.13.12.final.0 (64 bit)", - "cpuinfo_version": [ - 9, - 0, - 0 - ], - "cpuinfo_version_string": "9.0.0", - "arch": "ARM_8", - "bits": 64, - "count": 10, - "arch_string_raw": "arm64", - "brand_raw": "Apple M4" - } - }, - "commit_info": { - "id": "7bf1b7ea6657990eef44fdb362b762abb16e41ba", - "time": "2026-05-18T20:05:08-04:00", - "author_time": "2026-05-18T20:05:08-04:00", - "dirty": true, - "project": "boost-data-collector", - "branch": "develop" - }, - "benchmarks": [ - { - "group": null, - "name": "test_process_commit_data_batch", - "fullname": "benchmarks/test_github_commits_throughput.py::test_process_commit_data_batch", - "params": null, - "param": null, - "extra_info": { - "n": 50 - }, - "options": { - "disable_gc": false, - "timer": "perf_counter", - "min_rounds": 5, - "max_time": 1.0, - "min_time": 5e-06, - "warmup": false - }, - "stats": { - "min": 0.13009395799599588, - "max": 0.16657558304723352, - "mean": 0.14227045823354273, - "stddev": 0.01457181655810832, - "rounds": 5, - "median": 0.13689958304166794, - "iqr": 0.01724434396601282, - "q1": 0.1326302083034534, - "q3": 0.14987455226946622, - "iqr_outliers": 0, - "stddev_outliers": 1, - "outliers": "1;0", - "ld15iqr": 0.13009395799599588, - "hd15iqr": 0.16657558304723352, - "ops": 7.0288660936092535, - "total": 0.7113522911677137, - "data": [ - 0.16657558304723352, - 0.1334756250726059, - 0.13009395799599588, - 0.13689958304166794, - 0.14430754201021045 - ], - "iterations": 1 - } - }, - { - "group": null, - "name": "test_service_bulk_commits_and_file_changes", - "fullname": "benchmarks/test_service_bulk_insert.py::test_service_bulk_commits_and_file_changes", - "params": null, - "param": null, - "extra_info": { - "n": 50 - }, - "options": { - "disable_gc": false, - "timer": "perf_counter", - "min_rounds": 5, - "max_time": 1.0, - "min_time": 5e-06, - "warmup": false - }, - "stats": { - "min": 0.10591337503865361, - "max": 0.1513816670048982, - "mean": 0.13538706267718226, - "stddev": 0.01819949434483927, - "rounds": 6, - "median": 0.14058843749808148, - "iqr": 0.02617037494201213, - "q1": 0.12384004204068333, - "q3": 0.15001041698269546, - "iqr_outliers": 0, - "stddev_outliers": 1, - "outliers": "1;0", - "ld15iqr": 0.10591337503865361, - "hd15iqr": 0.1513816670048982, - "ops": 7.386230118489284, - "total": 0.8123223760630935, - "data": [ - 0.1513816670048982, - 0.15001041698269546, - 0.13251695793587714, - 0.12384004204068333, - 0.1486599170602858, - 0.10591337503865361 - ], - "iterations": 1 - } - } - ], - "datetime": "2026-05-19T18:09:23.360634+00:00", - "version": "5.2.3" -} diff --git a/benchmarks/baselines.json b/benchmarks/baselines.json index 506ef4e9..45ed9378 100644 --- a/benchmarks/baselines.json +++ b/benchmarks/baselines.json @@ -2,11 +2,11 @@ "description": "Maximum acceptable median wall time (seconds) per scenario at BENCHMARK_COMMIT_N. Update median_seconds after intentional perf work or when CI hardware changes; copy medians from --benchmark-json stats.median.", "benchmarks": { "benchmarks/test_github_commits_throughput.py::test_process_commit_data_batch": { - "median_seconds": 45.0, + "median_seconds": 0.1369, "n": 50 }, "benchmarks/test_service_bulk_insert.py::test_service_bulk_commits_and_file_changes": { - "median_seconds": 35.0, + "median_seconds": 0.1406, "n": 50 } } diff --git a/benchmarks/compare_to_baseline.py b/benchmarks/compare_to_baseline.py index dc3bd0ef..7ecc7d2d 100644 --- a/benchmarks/compare_to_baseline.py +++ b/benchmarks/compare_to_baseline.py @@ -74,7 +74,7 @@ def main() -> int: if median > limit: failures.append( f"{fullname}: median {median:.6f}s exceeds limit {limit:.6f}s " - f"(baseline {float(ref):.6f}s × {args.regression_ratio})" + f"(baseline {float(ref):.6f}s x {args.regression_ratio})" ) for line in warnings: diff --git a/benchmarks/test_service_bulk_insert.py b/benchmarks/test_service_bulk_insert.py index 85cf5dd6..0c476861 100644 --- a/benchmarks/test_service_bulk_insert.py +++ b/benchmarks/test_service_bulk_insert.py @@ -25,7 +25,7 @@ def test_service_bulk_commits_and_file_changes( repo = github_repository account = github_account commit_at = datetime(2024, 6, 1, tzinfo=timezone.utc) - hashes = [f"svcbulk{i:056d}"[:40] for i in range(n)] + hashes = [f"{i:040x}" for i in range(n)] def run_batch() -> None: with transaction.atomic(): diff --git a/core/_version.py b/core/_version.py index ccc4c4c3..59aa09a2 100644 --- a/core/_version.py +++ b/core/_version.py @@ -1,2 +1,2 @@ # file generated by setuptools-scm; do not edit -version = "0.1.0" +version = "0.1.1.dev553+g94302c719.d20260520" diff --git a/docs/service_api/cppa_pinecone_sync.md b/docs/service_api/cppa_pinecone_sync.md index a56312c3..a6495527 100644 --- a/docs/service_api/cppa_pinecone_sync.md +++ b/docs/service_api/cppa_pinecone_sync.md @@ -22,4 +22,4 @@ All creates/updates/deletes for `PineconeFailList` and `PineconeSyncStatus` must ## Related - [Service API index](README.md) -- [Contributing](../Contributing.md) +- [CONTRIBUTING.md](../../CONTRIBUTING.md)