From 3b5e4b691c818ae2897753c6c659a249b6c65300 Mon Sep 17 00:00:00 2001
From: Leo Chen <leo.chen0412@outlook.com>
Date: Tue, 19 May 2026 13:28:43 -0700
Subject: [PATCH 1/3] feat(benchmarks): add performance benchmarks and CI
 integration

---
 .github/workflows/benchmarks.yml             |  78 +++++++++++
 docs/Contributing.md => CONTRIBUTING.md      |  47 +++++--
 README.md                                    |   2 +-
 bench.json                                   | 132 +++++++++++++++++++
 benchmarks/baselines.json                    |  13 ++
 benchmarks/compare_to_baseline.py            |  96 ++++++++++++++
 benchmarks/conftest.py                       |  20 +++
 benchmarks/test_github_commits_throughput.py |  63 +++++++++
 benchmarks/test_service_bulk_insert.py       |  55 ++++++++
 boost_library_tracker/services.py            |   2 +-
 boost_mailing_list_tracker/services.py       |   2 +-
 boost_usage_tracker/services.py              |   2 +-
 conftest.py                                  |   6 +
 core/_version.py                             |   2 +-
 cppa_pinecone_sync/services.py               |   2 +-
 cppa_slack_tracker/services.py               |   2 +-
 cppa_user_tracker/services.py                |   2 +-
 cppa_youtube_script_tracker/services.py      |   2 +-
 docs/How_to_add_a_collector.md               |   2 +-
 docs/Onboarding.md                           |   4 +-
 docs/README.md                               |   2 +-
 docs/Service_API.md                          |   2 +-
 docs/boost_library_docs_tracker.md           |   2 +-
 docs/cross-app-dependencies.md               |   6 +-
 docs/service_api/README.md                   |   2 +-
 docs/service_api/boost_usage_tracker.md      |   2 +-
 docs/service_api/clang_github_tracker.md     |   2 +-
 docs/service_api/cppa_pinecone_sync.md       |   2 +-
 docs/service_api/cppa_user_tracker.md        |   2 +-
 docs/service_api/discord_activity_tracker.md |   2 +-
 docs/service_api/github_activity_tracker.md  |   2 +-
 github_activity_tracker/services.py          |   2 +-
 pyproject.toml                               |   2 +-
 pytest.ini                                   |   1 +
 requirements-dev.in                          |   1 +
 requirements-dev.lock                        |  15 ++-
 uv.lock                                      |   7 +
 37 files changed, 543 insertions(+), 45 deletions(-)
 create mode 100644 .github/workflows/benchmarks.yml
 rename docs/Contributing.md => CONTRIBUTING.md (63%)
 create mode 100644 bench.json
 create mode 100644 benchmarks/baselines.json
 create mode 100644 benchmarks/compare_to_baseline.py
 create mode 100644 benchmarks/conftest.py
 create mode 100644 benchmarks/test_github_commits_throughput.py
 create mode 100644 benchmarks/test_service_bulk_insert.py
 create mode 100644 uv.lock

diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
new file mode 100644
index 00000000..f3d3d751
--- /dev/null
+++ b/.github/workflows/benchmarks.yml
@@ -0,0 +1,78 @@
+name: Benchmarks
+
+on:
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  benchmark:
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+
+    services:
+      postgres:
+        image: postgres:16
+        env:
+          POSTGRES_USER: postgres
+          POSTGRES_PASSWORD: postgres
+          POSTGRES_DB: postgres
+        ports: ["5432:5432"]
+        options: >-
+          --health-cmd pg_isready
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+          --shm-size=256mb
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v7
+        with:
+          python-version: "3.13"
+
+      - name: Cache uv
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/uv
+          key: ${{ runner.os }}-uv-benchmark-${{ hashFiles('requirements-dev.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-uv-benchmark-
+            ${{ runner.os }}-uv-
+
+      - name: Install dependencies
+        env:
+          SETUPTOOLS_SCM_WRITE_TO_SOURCE: "1"
+        run: |
+          uv venv
+          uv pip install -r requirements-dev.lock
+          uv pip install -e .
+
+      - name: Run benchmarks
+        env:
+          DATABASE_URL: postgres://postgres:postgres@127.0.0.1:5432/postgres
+          SECRET_KEY: for-testing-only
+          DJANGO_SETTINGS_MODULE: config.test_settings
+          RUN_BENCHMARKS: "1"
+        run: |
+          uv run pytest benchmarks/ -m benchmark --benchmark-only \
+            --benchmark-json=bench.json -v \
+            --benchmark-disable-gc
+
+      - name: Compare to baselines
+        if: success()
+        run: |
+          uv run python benchmarks/compare_to_baseline.py bench.json benchmarks/baselines.json
+
+      - name: Upload benchmark JSON
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-json
+          path: bench.json
+          retention-days: 30
diff --git a/docs/Contributing.md b/CONTRIBUTING.md
similarity index 63%
rename from docs/Contributing.md
rename to CONTRIBUTING.md
index 03ec2eb6..434e6e18 100644
--- a/docs/Contributing.md
+++ b/CONTRIBUTING.md
@@ -34,11 +34,11 @@ Each Django app that has **models** provides a **`services.py`** module. This is
 | `cppa_slack_tracker` | `cppa_slack_tracker/services.py` | Slack teams, channels, messages, membership. |
 | `wg21_paper_tracker` | `wg21_paper_tracker/services.py` | WG21 papers, authors, mailings. |
 
-For a full list of functions, parameter/return types, and validation (e.g. empty `name` raises `ValueError`), see **[Service_API.md](Service_API.md)** and the per-app docs in **[service_api/](service_api/)** (index: [service_api/README.md](service_api/README.md)). DTO protocols shared across trackers are documented in **[service_api/core_protocols.md](service_api/core_protocols.md)** (generated from `core/protocols.py`).
+For a full list of functions, parameter/return types, and validation (e.g. empty `name` raises `ValueError`), see **[docs/Service_API.md](docs/Service_API.md)** and the per-app docs in **[docs/service_api/](docs/service_api/)** (index: [docs/service_api/README.md](docs/service_api/README.md)).
 
 ### Regenerating service API docs
 
-Reference tables in `docs/service_api/*.md` are produced by **[`scripts/generate_service_docs.py`](../scripts/generate_service_docs.py)** from each app’s `services.py` and from `core/protocols.py`.
+Reference tables in `docs/service_api/*.md` are produced by **[`scripts/generate_service_docs.py`](scripts/generate_service_docs.py)** from each app’s `services.py` and from `core/protocols.py`.
 
 - **Markers:** Each file contains `<!-- SERVICE_API:GENERATED:START -->` … `<!-- SERVICE_API:GENERATED:END -->`. The script replaces **only** that region. Put hand-written notes (usage, cross-app warnings, command help) **below** the `END` marker.
 - **Regenerate locally:** `python scripts/generate_service_docs.py` (optional: `--app <django_app_label>` for one module).
@@ -65,22 +65,47 @@ Reference tables in `docs/service_api/*.md` are produced by **[`scripts/generate
 
 ### Testing
 
-- **Running tests:** From the project root, install dev deps (`pip install -r requirements-dev.lock` or `uv pip install -r requirements-dev.lock`), start the test database (`docker compose -f docker-compose.test.yml up -d`), set `DATABASE_URL` (and `SECRET_KEY` for the process) as in [README.md](../README.md#running-tests), then run `python -m pytest`. Tests **always use PostgreSQL** (`config.test_settings`); there is no SQLite fallback.
-- See [README.md](../README.md#running-tests) and [Development_guideline.md](Development_guideline.md#testing-workflow) for full commands and options.
+- **Running tests:** From the project root, install dev deps (`pip install -r requirements-dev.lock` or `uv pip install -r requirements-dev.lock`), start the test database (`docker compose -f docker-compose.test.yml up -d`), set `DATABASE_URL` (and `SECRET_KEY` for the process) as in [README.md](README.md#running-tests), then run `python -m pytest`. Tests **always use PostgreSQL** (`config.test_settings`); there is no SQLite fallback.
+- See [README.md](README.md#running-tests) and [docs/Development_guideline.md](docs/Development_guideline.md#testing-workflow) for full commands and options.
 - **Unit tests for `services.py`:** Call the service functions and assert on the database (or mocks) as needed.
 - **Other tests:** Prefer service functions when setting up data. If you must create models directly for tests, keep it in test code (e.g. fixtures or test helpers) and avoid doing the same in production code.
 
+### Performance benchmarks
+
+Throughput checks live under [`benchmarks/`](benchmarks/) and use **`pytest-benchmark`**. They are **not** collected during normal `pytest` runs: set **`RUN_BENCHMARKS=1`** so the root [`conftest.py`](conftest.py) stops ignoring that directory (see `collect_ignore`). Tests are marked with **`@pytest.mark.benchmark`**.
+
+**Prerequisites:** Same as unit tests: PostgreSQL, `DATABASE_URL`, `SECRET_KEY`, `DJANGO_SETTINGS_MODULE=config.test_settings` (see [README.md](README.md#running-tests)).
+
+**Run locally** (from repo root, with Postgres up):
+
+```bash
+export RUN_BENCHMARKS=1
+export DATABASE_URL=postgres://postgres:postgres@127.0.0.1:5433/postgres
+export SECRET_KEY=for-local-only
+export DJANGO_SETTINGS_MODULE=config.test_settings
+# Optional: batch size (default 50; match benchmarks/baselines.json "n")
+export BENCHMARK_COMMIT_N=50
+
+uv run pytest benchmarks/ -m benchmark --benchmark-only \
+  --benchmark-json=bench.json -v
+uv run python benchmarks/compare_to_baseline.py bench.json benchmarks/baselines.json
+```
+
+**Baselines:** [`benchmarks/baselines.json`](benchmarks/baselines.json) stores maximum acceptable **median** seconds per scenario (for the configured `n`). The compare script fails if any median exceeds `baseline_median × 1.25` (more than 25% slower than the reference). After a deliberate performance change or a CI image upgrade, update `median_seconds` (and `n` if you change `BENCHMARK_COMMIT_N`) using `stats.median` from the generated JSON.
+
+**CI:** The [`.github/workflows/benchmarks.yml`](.github/workflows/benchmarks.yml) workflow runs on **`workflow_dispatch`** only, uploads `bench.json` as an artifact, and runs the compare step on success.
+
 ## Other guidelines
 
-- **Branching:** Create feature branches from `develop`. Open pull requests against `develop`. See [Development_guideline.md](Development_guideline.md).
+- **Branching:** Create feature branches from `develop`. Open pull requests against `develop`. See [docs/Development_guideline.md](docs/Development_guideline.md).
 - **Code style:** Use Python 3.11+ and follow Django and project conventions. Use the project’s logging (`logging.getLogger(__name__)`). Before pushing, run **`uv run pyright`** (with dev deps) for the paths covered by **`pyrightconfig.json`**, and ensure CI’s **lint** / **pyright** / **test** jobs would pass.
 - **Database:** Use the Django ORM and migrations. Writes only through the service layer as above.
-- **Docs:** Update this doc (and app `services.py` docstrings) when adding new apps or changing the write rules. After changing `services.py` or `core/protocols.py`, run `python scripts/generate_service_docs.py` and commit the updated `docs/service_api/` files.
+- **Docs:** Update this file (and app `services.py` docstrings) when adding new apps or changing the write rules. After changing `services.py` or `core/protocols.py`, run `python scripts/generate_service_docs.py` and commit the updated `docs/service_api/` files.
 
 ## Related documentation
 
-- [Service_API.md](Service_API.md) – API reference for all service layer functions.
-- [Development_guideline.md](Development_guideline.md) – Setup, workflow, adding apps.
-- [Workflow.md](Workflow.md) – Execution order and collectors.
-- [Schema.md](Schema.md) – Database schema.
-- [cross-app-dependencies.md](cross-app-dependencies.md) – Complete map of every cross-app FK, MTI, ORM read, and Python import dependency, plus `import-linter` recommendations.
+- [docs/Service_API.md](docs/Service_API.md) – API reference for all service layer functions.
+- [docs/Development_guideline.md](docs/Development_guideline.md) – Setup, workflow, adding apps.
+- [docs/Workflow.md](docs/Workflow.md) – Execution order and collectors.
+- [docs/Schema.md](docs/Schema.md) – Database schema.
+- [docs/cross-app-dependencies.md](docs/cross-app-dependencies.md) – Complete map of every cross-app FK, MTI, ORM read, and Python import dependency, plus `import-linter` recommendations.
diff --git a/README.md b/README.md
index 633bc609..818030e4 100644
--- a/README.md
+++ b/README.md
@@ -215,7 +215,7 @@ Docs are organized **by topic** (one doc per concern: workflow, workspace, servi
 - [Workspace.md](docs/Workspace.md) – Workspace layout and usage for file processing.
 - [Schema.md](docs/Schema.md) – Database schema and table relationships.
 - [Development_guideline.md](docs/Development_guideline.md) – Development setup, app requirements, and step-by-step workflow.
-- [Contributing.md](docs/Contributing.md) – Service layer (single place for writes), **regenerating service API docs** (`scripts/generate_service_docs.py`), and contributor guidelines.
+- [CONTRIBUTING.md](CONTRIBUTING.md) – Service layer (single place for writes), **regenerating service API docs** (`scripts/generate_service_docs.py`), and contributor guidelines.
 - [Service_API.md](docs/Service_API.md) – API reference and index for all service layer functions.
 - [service_api/](docs/service_api/) – Per-app service API docs (name, description, parameters, return types, validation).
 
diff --git a/bench.json b/bench.json
new file mode 100644
index 00000000..6342e51b
--- /dev/null
+++ b/bench.json
@@ -0,0 +1,132 @@
+{
+    "machine_info": {
+        "node": "Leos-Mac-mini.local",
+        "processor": "arm",
+        "machine": "arm64",
+        "python_compiler": "Clang 21.1.4 ",
+        "python_implementation": "CPython",
+        "python_implementation_version": "3.13.12",
+        "python_version": "3.13.12",
+        "python_build": [
+            "main",
+            "Mar 10 2026 18:26:32"
+        ],
+        "release": "25.4.0",
+        "system": "Darwin",
+        "cpu": {
+            "python_version": "3.13.12.final.0 (64 bit)",
+            "cpuinfo_version": [
+                9,
+                0,
+                0
+            ],
+            "cpuinfo_version_string": "9.0.0",
+            "arch": "ARM_8",
+            "bits": 64,
+            "count": 10,
+            "arch_string_raw": "arm64",
+            "brand_raw": "Apple M4"
+        }
+    },
+    "commit_info": {
+        "id": "7bf1b7ea6657990eef44fdb362b762abb16e41ba",
+        "time": "2026-05-18T20:05:08-04:00",
+        "author_time": "2026-05-18T20:05:08-04:00",
+        "dirty": true,
+        "project": "boost-data-collector",
+        "branch": "develop"
+    },
+    "benchmarks": [
+        {
+            "group": null,
+            "name": "test_process_commit_data_batch",
+            "fullname": "benchmarks/test_github_commits_throughput.py::test_process_commit_data_batch",
+            "params": null,
+            "param": null,
+            "extra_info": {
+                "n": 50
+            },
+            "options": {
+                "disable_gc": false,
+                "timer": "perf_counter",
+                "min_rounds": 5,
+                "max_time": 1.0,
+                "min_time": 5e-06,
+                "warmup": false
+            },
+            "stats": {
+                "min": 0.13009395799599588,
+                "max": 0.16657558304723352,
+                "mean": 0.14227045823354273,
+                "stddev": 0.01457181655810832,
+                "rounds": 5,
+                "median": 0.13689958304166794,
+                "iqr": 0.01724434396601282,
+                "q1": 0.1326302083034534,
+                "q3": 0.14987455226946622,
+                "iqr_outliers": 0,
+                "stddev_outliers": 1,
+                "outliers": "1;0",
+                "ld15iqr": 0.13009395799599588,
+                "hd15iqr": 0.16657558304723352,
+                "ops": 7.0288660936092535,
+                "total": 0.7113522911677137,
+                "data": [
+                    0.16657558304723352,
+                    0.1334756250726059,
+                    0.13009395799599588,
+                    0.13689958304166794,
+                    0.14430754201021045
+                ],
+                "iterations": 1
+            }
+        },
+        {
+            "group": null,
+            "name": "test_service_bulk_commits_and_file_changes",
+            "fullname": "benchmarks/test_service_bulk_insert.py::test_service_bulk_commits_and_file_changes",
+            "params": null,
+            "param": null,
+            "extra_info": {
+                "n": 50
+            },
+            "options": {
+                "disable_gc": false,
+                "timer": "perf_counter",
+                "min_rounds": 5,
+                "max_time": 1.0,
+                "min_time": 5e-06,
+                "warmup": false
+            },
+            "stats": {
+                "min": 0.10591337503865361,
+                "max": 0.1513816670048982,
+                "mean": 0.13538706267718226,
+                "stddev": 0.01819949434483927,
+                "rounds": 6,
+                "median": 0.14058843749808148,
+                "iqr": 0.02617037494201213,
+                "q1": 0.12384004204068333,
+                "q3": 0.15001041698269546,
+                "iqr_outliers": 0,
+                "stddev_outliers": 1,
+                "outliers": "1;0",
+                "ld15iqr": 0.10591337503865361,
+                "hd15iqr": 0.1513816670048982,
+                "ops": 7.386230118489284,
+                "total": 0.8123223760630935,
+                "data": [
+                    0.1513816670048982,
+                    0.15001041698269546,
+                    0.13251695793587714,
+                    0.12384004204068333,
+                    0.1486599170602858,
+                    0.10591337503865361
+                ],
+                "iterations": 1
+            }
+        }
+    ],
+    "datetime": "2026-05-19T18:09:23.360634+00:00",
+    "version": "5.2.3"
+}
\ No newline at end of file
diff --git a/benchmarks/baselines.json b/benchmarks/baselines.json
new file mode 100644
index 00000000..506ef4e9
--- /dev/null
+++ b/benchmarks/baselines.json
@@ -0,0 +1,13 @@
+{
+  "description": "Maximum acceptable median wall time (seconds) per scenario at BENCHMARK_COMMIT_N. Update median_seconds after intentional perf work or when CI hardware changes; copy medians from --benchmark-json stats.median.",
+  "benchmarks": {
+    "benchmarks/test_github_commits_throughput.py::test_process_commit_data_batch": {
+      "median_seconds": 45.0,
+      "n": 50
+    },
+    "benchmarks/test_service_bulk_insert.py::test_service_bulk_commits_and_file_changes": {
+      "median_seconds": 35.0,
+      "n": 50
+    }
+  }
+}
diff --git a/benchmarks/compare_to_baseline.py b/benchmarks/compare_to_baseline.py
new file mode 100644
index 00000000..dc3bd0ef
--- /dev/null
+++ b/benchmarks/compare_to_baseline.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python3
+"""
+Compare pytest-benchmark JSON (--benchmark-json) against benchmarks/baselines.json.
+
+Exits with status 1 if any baseline median is exceeded by more than 25% (slower).
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+DEFAULT_REGRESSION_RATIO = 1.25
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(
+        description="Compare benchmark JSON to checked-in median baselines."
+    )
+    parser.add_argument(
+        "results_json",
+        type=Path,
+        help="Path written by pytest-benchmark --benchmark-json",
+    )
+    parser.add_argument(
+        "baselines_json",
+        type=Path,
+        help="Path to benchmarks/baselines.json",
+    )
+    parser.add_argument(
+        "--regression-ratio",
+        type=float,
+        default=DEFAULT_REGRESSION_RATIO,
+        metavar="R",
+        help=f"Fail if median > baseline_median * R (default {DEFAULT_REGRESSION_RATIO})",
+    )
+    args = parser.parse_args()
+
+    results = json.loads(args.results_json.read_text(encoding="utf-8"))
+    baselines_doc = json.loads(args.baselines_json.read_text(encoding="utf-8"))
+
+    bench_by_name = {b["fullname"]: b for b in results.get("benchmarks", [])}
+    expected: dict[str, dict] = baselines_doc.get("benchmarks", {})
+
+    failures: list[str] = []
+    warnings: list[str] = []
+
+    for fullname, spec in expected.items():
+        if spec.get("skip"):
+            continue
+        ref = spec.get("median_seconds")
+        if ref is None:
+            warnings.append(f"{fullname}: baseline has no median_seconds; skipping")
+            continue
+
+        bench = bench_by_name.get(fullname)
+        if bench is None:
+            failures.append(f"{fullname}: missing from benchmark results")
+            continue
+
+        median = float(bench["stats"]["median"])
+        exp_n = spec.get("n")
+        if exp_n is not None:
+            got_n = bench.get("extra_info", {}).get("n")
+            if got_n is not None and int(got_n) != int(exp_n):
+                warnings.append(
+                    f"{fullname}: baseline n={exp_n} but run reported n={got_n} "
+                    "(set BENCHMARK_COMMIT_N to match baselines.json)"
+                )
+
+        limit = float(ref) * float(args.regression_ratio)
+        if median > limit:
+            failures.append(
+                f"{fullname}: median {median:.6f}s exceeds limit {limit:.6f}s "
+                f"(baseline {float(ref):.6f}s × {args.regression_ratio})"
+            )
+
+    for line in warnings:
+        print(f"WARNING: {line}", file=sys.stderr)
+    for line in failures:
+        print(f"FAIL: {line}", file=sys.stderr)
+
+    if failures:
+        print(
+            f"Benchmark regression check failed ({len(failures)} scenario(s)).",
+            file=sys.stderr,
+        )
+        return 1
+    print("Benchmark regression check passed.")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/benchmarks/conftest.py b/benchmarks/conftest.py
new file mode 100644
index 00000000..3a7a82ff
--- /dev/null
+++ b/benchmarks/conftest.py
@@ -0,0 +1,20 @@
+"""
+Benchmark-only fixtures. Default pytest collection skips this directory unless
+RUN_BENCHMARKS=1 (see root conftest.py).
+"""
+
+from __future__ import annotations
+
+import os
+
+import pytest
+
+
+@pytest.fixture
+def benchmark_commit_n() -> int:
+    """Number of commits / service rows per benchmark iteration (default tuned for CI)."""
+    raw = os.environ.get("BENCHMARK_COMMIT_N", "50")
+    n = int(raw)
+    if n < 1:
+        raise ValueError("BENCHMARK_COMMIT_N must be >= 1")
+    return n
diff --git a/benchmarks/test_github_commits_throughput.py b/benchmarks/test_github_commits_throughput.py
new file mode 100644
index 00000000..0502dcba
--- /dev/null
+++ b/benchmarks/test_github_commits_throughput.py
@@ -0,0 +1,63 @@
+"""
+Benchmark: GitHub-shaped commit payloads through sync path `_process_commit_data`.
+
+Uses the unknown-author branch (no top-level author/committer) for stable
+account resolution. Each payload includes one modified file with a unique path.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from github_activity_tracker.models import FileChangeStatus
+from github_activity_tracker.sync.commits import _process_commit_data
+
+
+def _build_commit_payloads(n: int) -> list[dict]:
+    """REST-shaped dicts compatible with `_process_commit_data` (no network)."""
+    payloads: list[dict] = []
+    for i in range(n):
+        sha = f"b{i:039d}"  # 40 chars, unique per index
+        fname = f"benchmarks/path_{i}/file.txt"
+        payloads.append(
+            {
+                "sha": sha,
+                "commit": {
+                    "message": f"benchmark commit {i}\n",
+                    "author": {
+                        "name": "Bench User",
+                        "email": "bench@example.invalid",
+                        "date": "2024-01-01T12:00:00Z",
+                    },
+                },
+                "files": [
+                    {
+                        "filename": fname,
+                        "status": "modified",
+                        "additions": 1,
+                        "deletions": 1,
+                        "patch": f"@@ benchmark {i} @@\n",
+                    }
+                ],
+            }
+        )
+    return payloads
+
+
+@pytest.mark.benchmark
+@pytest.mark.django_db(transaction=True)
+def test_process_commit_data_batch(
+    benchmark,
+    github_repository,
+    benchmark_commit_n,
+):
+    n = benchmark_commit_n
+    repo = github_repository
+    payloads = _build_commit_payloads(n)
+
+    def run_batch() -> None:
+        for data in payloads:
+            _process_commit_data(repo, data)
+
+    benchmark.extra_info["n"] = n
+    benchmark(run_batch)
diff --git a/benchmarks/test_service_bulk_insert.py b/benchmarks/test_service_bulk_insert.py
new file mode 100644
index 00000000..85cf5dd6
--- /dev/null
+++ b/benchmarks/test_service_bulk_insert.py
@@ -0,0 +1,55 @@
+"""
+Benchmark: service-layer writes for N commits plus one file change each, in one transaction.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime, timezone
+
+import pytest
+from django.db import transaction
+
+from github_activity_tracker import services
+from github_activity_tracker.models import FileChangeStatus
+
+
+@pytest.mark.benchmark
+@pytest.mark.django_db(transaction=True)
+def test_service_bulk_commits_and_file_changes(
+    benchmark,
+    github_repository,
+    github_account,
+    benchmark_commit_n,
+):
+    n = benchmark_commit_n
+    repo = github_repository
+    account = github_account
+    commit_at = datetime(2024, 6, 1, tzinfo=timezone.utc)
+    hashes = [f"svcbulk{i:056d}"[:40] for i in range(n)]
+
+    def run_batch() -> None:
+        with transaction.atomic():
+            for i in range(n):
+                commit_obj, _ = services.create_or_update_commit(
+                    repo=repo,
+                    account=account,
+                    commit_hash=hashes[i],
+                    comment=f"svc bulk {i}",
+                    commit_at=commit_at,
+                )
+                github_file, _ = services.create_or_update_github_file(
+                    repo,
+                    f"benchmarks/svc_bulk_{i}.txt",
+                    is_deleted=False,
+                )
+                services.add_commit_file_change(
+                    commit_obj,
+                    github_file,
+                    status=FileChangeStatus.MODIFIED,
+                    additions=1,
+                    deletions=0,
+                    patch="",
+                )
+
+    benchmark.extra_info["n"] = n
+    benchmark(run_batch)
diff --git a/boost_library_tracker/services.py b/boost_library_tracker/services.py
index 41701972..43cafea6 100644
--- a/boost_library_tracker/services.py
+++ b/boost_library_tracker/services.py
@@ -1,7 +1,7 @@
 """
 Service layer for boost_library_tracker.
 All creates/updates/deletes for this app's models must go through functions here.
-See docs/Contributing.md.
+See CONTRIBUTING.md.
 """
 
 from __future__ import annotations
diff --git a/boost_mailing_list_tracker/services.py b/boost_mailing_list_tracker/services.py
index 92b83501..daf31f1d 100644
--- a/boost_mailing_list_tracker/services.py
+++ b/boost_mailing_list_tracker/services.py
@@ -2,7 +2,7 @@
 Service layer for boost_mailing_list_tracker.
 
 All creates/updates/deletes for this app's models must go through functions here.
-See docs/Contributing.md.
+See CONTRIBUTING.md.
 """
 
 from __future__ import annotations
diff --git a/boost_usage_tracker/services.py b/boost_usage_tracker/services.py
index 936ab6e9..92c8818f 100644
--- a/boost_usage_tracker/services.py
+++ b/boost_usage_tracker/services.py
@@ -2,7 +2,7 @@
 Service layer for boost_usage_tracker.
 
 All creates/updates/deletes for this app's models must go through functions here.
-See docs/Contributing.md for the project-wide rule.
+See CONTRIBUTING.md for the project-wide rule.
 
 Includes bulk operations for speed (fewer round-trips):
 - bulk_create_or_update_boost_usage
diff --git a/conftest.py b/conftest.py
index 90475934..a63c9e31 100644
--- a/conftest.py
+++ b/conftest.py
@@ -2,8 +2,14 @@
 Root conftest: register app-level fixture modules and optional session/global fixtures.
 """
 
+import os
+
 import pytest
 
+# Exclude benchmarks/ from default test collection (keeps CI and local pytest fast).
+# Run benchmarks with: RUN_BENCHMARKS=1 uv run pytest benchmarks/ -m benchmark ...
+collect_ignore = [] if os.environ.get("RUN_BENCHMARKS") == "1" else ["benchmarks"]
+
 
 def _patch_django_context_copy_py314():
     """Fix Django BaseContext.__copy__ on Python 3.14 (copy(super()) is broken there)."""
diff --git a/core/_version.py b/core/_version.py
index ccc4c4c3..6892cf81 100644
--- a/core/_version.py
+++ b/core/_version.py
@@ -1,2 +1,2 @@
 # file generated by setuptools-scm; do not edit
-version = "0.1.0"
+version = "0.1.1.dev549+g7bf1b7ea6.d20260519"
diff --git a/cppa_pinecone_sync/services.py b/cppa_pinecone_sync/services.py
index 8c014cde..ef62e649 100644
--- a/cppa_pinecone_sync/services.py
+++ b/cppa_pinecone_sync/services.py
@@ -5,7 +5,7 @@
 module. Do not call Model.objects.create(), model.save(), or model.delete() from
 outside this module (e.g. from management commands, views, or other apps).
 
-See docs/Contributing.md for the project-wide rule.
+See CONTRIBUTING.md for the project-wide rule.
 """
 
 from __future__ import annotations
diff --git a/cppa_slack_tracker/services.py b/cppa_slack_tracker/services.py
index 9eab53b6..e5cbb80e 100644
--- a/cppa_slack_tracker/services.py
+++ b/cppa_slack_tracker/services.py
@@ -5,7 +5,7 @@
 module. Do not call Model.objects.create(), model.save(), or model.delete() from
 outside this module (e.g. from management commands, views, or other apps).
 
-See docs/Contributing.md for the project-wide rule.
+See CONTRIBUTING.md for the project-wide rule.
 """
 
 from __future__ import annotations
diff --git a/cppa_user_tracker/services.py b/cppa_user_tracker/services.py
index f2853230..1fbd92be 100644
--- a/cppa_user_tracker/services.py
+++ b/cppa_user_tracker/services.py
@@ -5,7 +5,7 @@
 module. Do not call Model.objects.create(), model.save(), or model.delete() from
 outside this module (e.g. from management commands, views, or other apps).
 
-See docs/Contributing.md for the project-wide rule.
+See CONTRIBUTING.md for the project-wide rule.
 """
 
 from __future__ import annotations
diff --git a/cppa_youtube_script_tracker/services.py b/cppa_youtube_script_tracker/services.py
index 7ec7877e..ff626266 100644
--- a/cppa_youtube_script_tracker/services.py
+++ b/cppa_youtube_script_tracker/services.py
@@ -5,7 +5,7 @@
 module. Do not call Model.objects.create(), model.save(), or model.delete() from
 outside this module.
 
-See docs/Contributing.md for the project-wide rule.
+See CONTRIBUTING.md for the project-wide rule.
 """
 
 from __future__ import annotations
diff --git a/docs/How_to_add_a_collector.md b/docs/How_to_add_a_collector.md
index 4dd5ef94..7cf6a7f2 100644
--- a/docs/How_to_add_a_collector.md
+++ b/docs/How_to_add_a_collector.md
@@ -31,7 +31,7 @@ The detailed contracts (abstract methods, lifecycle hooks, error handling, templ
 
 ## 4. Skeleton collector (minimal copy-paste example)
 
-This section is a **canonical minimal pattern**: the management command is only responsible for parsing options and returning a collector from `get_collector()` (often ~10–15 lines). The **`AbstractCollector` subclass** implements `name`, `validate_config`, and `collect` (orchestration); `BaseCollectorCommand` still calls `run()`, which the base implements as validate-then-collect. The **service layer** (`services.py`) is the main place for DB and API logic—match the project rule that writes go through services (see [Contributing.md](Contributing.md#service-layer-single-place-for-writes)).
+This section is a **canonical minimal pattern**: the management command is only responsible for parsing options and returning a collector from `get_collector()` (often ~10–15 lines). The **`AbstractCollector` subclass** implements `name`, `validate_config`, and `collect` (orchestration); `BaseCollectorCommand` still calls `run()`, which the base implements as validate-then-collect. The **service layer** (`services.py`) is the main place for DB and API logic—match the project rule that writes go through services (see [CONTRIBUTING.md](../CONTRIBUTING.md#service-layer-single-place-for-writes)).
 
 Keep imports and calls inside `collect()` going through `services.py` (for example `import my_skeleton_tracker.services as services` and only call functions from that module) so the write path stays obvious.
 
diff --git a/docs/Onboarding.md b/docs/Onboarding.md
index 7ab9ab0e..090f7e27 100644
--- a/docs/Onboarding.md
+++ b/docs/Onboarding.md
@@ -10,7 +10,7 @@ For setup steps (venv, migrate, tests), start with the root **[README.md](../REA
 
 1. **One Django project, one database** — All installed apps share PostgreSQL (`boost_dashboard`). There is no per-app database isolation.
 2. **Collectors are management commands** — Scheduled work is `python manage.py <command>`. Production batches run **`run_scheduled_collectors`**, which reads **`config/boost_collector_schedule.yaml`** (see **[Workflow.md](Workflow.md)**).
-3. **Writes go through `services.py`** — For apps that define models, creates/updates/deletes belong in that app’s **`services.py`**. Commands, fetchers, and other apps call those functions; they do not write models ad hoc (see **[Contributing.md](Contributing.md)**).
+3. **Writes go through `services.py`** — For apps that define models, creates/updates/deletes belong in that app’s **`services.py`**. Commands, fetchers, and other apps call those functions; they do not write models ad hoc (see **[CONTRIBUTING.md](../CONTRIBUTING.md)**).
 4. **Shared “collector contract” lives in `core`** — Prefer **`AbstractCollector`** (`name`, `validate_config`, `collect`) plus **`BaseCollectorCommand`** for a consistent shape; legacy **`CollectorBase`** (`run()` only) remains supported. See **[Core_public_API.md](Core_public_API.md)** and **[How_to_add_a_collector.md](How_to_add_a_collector.md)**.
 5. **Cross-app coupling is intentionally loose** — Avoid **ForeignKeys** from one tracker app into another’s models when it would create tight coupling or import cycles. Prefer querying by IDs or shared reference tables (e.g. **Language**, **Identity**) as documented in **[Schema.md](Schema.md)** and **[Development_guideline.md](Development_guideline.md)**.
 
@@ -23,7 +23,7 @@ For setup steps (venv, migrate, tests), start with the root **[README.md](../REA
 | 1 | [README.md](../README.md) | Prerequisites, setup, tests. |
 | 2 | [Architecture_data_flow.md](Architecture_data_flow.md) | Sources → collectors → DB / workspace → Pinecone. |
 | 3 | [Workflow.md](Workflow.md) | YAML schedules, Celery Beat, execution order. |
-| 4 | [Contributing.md](Contributing.md) | Service-layer rule for DB writes. |
+| 4 | [CONTRIBUTING.md](../CONTRIBUTING.md) | Service-layer rule for DB writes. |
 | 5 | [Workspace.md](Workspace.md) | Where files land under `WORKSPACE_DIR`. |
 | 6 | [Schema.md](Schema.md) — § Overview + diagrams for your area | Cross-app tables (identity, GitHub, Boost libraries). |
 | 7 | [Service_API.md](Service_API.md) + `service_api/<app>.md` | Callable surface for writes you must use. |
diff --git a/docs/README.md b/docs/README.md
index b005266c..99820f58 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -19,7 +19,7 @@ Documentation is organized **by topic**, not by app. Each doc covers one cross-c
 | **Development** | [Development_guideline.md](Development_guideline.md) | Development setup, app requirements, and step-by-step workflow. |
 | **Testing / typing** | [README.md](../README.md#running-tests), [Development_guideline.md](Development_guideline.md#testing-workflow) | pytest (Postgres), coverage, when to run tests; **Pyright** (`uv run pyright`) and CI jobs. |
 | **Deployment** | [Deployment.md](Deployment.md) | CI/CD pipeline, environment secrets (`SSH_HOST`, `SSH_USER`, `SSH_PRIVATE_KEY`; optional `SSH_PORT`), server setup, and deploy script behavior. |
-| **Contributing** | [Contributing.md](Contributing.md) | Service layer (single place for writes) and contributor guidelines. |
+| **Contributing** | [CONTRIBUTING.md](../CONTRIBUTING.md) | Service layer (single place for writes) and contributor guidelines. |
 | **Service API** | [Service_API.md](Service_API.md) | API reference and index for all service layer functions. |
 | **Service API (per app)** | [service_api/](service_api/) | Per-app service API docs (name, description, parameters, return types, validation). |
 
diff --git a/docs/Service_API.md b/docs/Service_API.md
index a6499609..b0f656e2 100644
--- a/docs/Service_API.md
+++ b/docs/Service_API.md
@@ -66,5 +66,5 @@ See each app’s doc in [service_api/](service_api/) for parameter types, return
 
 ## Related docs
 
-- [Contributing.md](Contributing.md) – Rule that all writes go through the service layer.
+- [CONTRIBUTING.md](../CONTRIBUTING.md) – Rule that all writes go through the service layer.
 - [Schema.md](Schema.md) – Database schema and models.
diff --git a/docs/boost_library_docs_tracker.md b/docs/boost_library_docs_tracker.md
index 439862bc..5572b77f 100644
--- a/docs/boost_library_docs_tracker.md
+++ b/docs/boost_library_docs_tracker.md
@@ -180,4 +180,4 @@ When adding this app to the project, do all of the following:
 - [service_api/boost_library_docs_tracker.md](service_api/boost_library_docs_tracker.md) — Full service API reference for this app.
 - [Workflow.md](Workflow.md) — Execution order (this command runs after `run_boost_library_tracker`).
 - [Workspace.md](Workspace.md) — Workspace layout (`workspace/boost_library_docs_tracker/`).
-- [Contributing.md](Contributing.md) — Service layer write rules.
+- [CONTRIBUTING.md](../CONTRIBUTING.md) — Service layer write rules.
diff --git a/docs/cross-app-dependencies.md b/docs/cross-app-dependencies.md
index dc238280..ca7ac1b1 100644
--- a/docs/cross-app-dependencies.md
+++ b/docs/cross-app-dependencies.md
@@ -1,7 +1,7 @@
 # Cross-App Dependencies
 
 This document maps every cross-app dependency between the tracker Django apps in this
-project.  It exists to make the [Contributing.md](Contributing.md) guideline — "prefer no
+project.  It exists to make the [CONTRIBUTING.md](../CONTRIBUTING.md) guideline — "prefer no
 ForeignKey from one tracker app into another's models" — visible and therefore enforceable.
 For **typed data boundaries** (run results, activity rows, checkpoints) shared across apps,
 prefer :mod:`core.protocols` (see [Core_public_API.md](Core_public_API.md#tracker-protocols-dtos)).
@@ -76,7 +76,7 @@ These are hard database-level dependencies.  They cannot be removed without migr
 
 ## 2. ORM Read Coupling (cross-app `.objects` queries outside `models.py`)
 
-The [Contributing.md](Contributing.md) service layer rules enforce **write isolation** —
+The [CONTRIBUTING.md](../CONTRIBUTING.md) service layer rules enforce **write isolation** —
 all inserts/updates/deletes go through `services.py`.  However, **read isolation is not
 enforced**: any module may call `AnotherApp.Model.objects.filter(...)` directly.
 
@@ -337,7 +337,7 @@ To add it to pre-commit:
 
 ## Related documentation
 
-- [Contributing.md](Contributing.md) — service-layer write rules
+- [CONTRIBUTING.md](../CONTRIBUTING.md) — service-layer write rules
 - [Core_public_API.md](Core_public_API.md) — `core` public surfaces and the coupling reduction goal
 - [Development_guideline.md](Development_guideline.md) — adding new apps
 - [`scripts/list_cross_app_imports.py`](../scripts/list_cross_app_imports.py) — discovery script
diff --git a/docs/service_api/README.md b/docs/service_api/README.md
index 6c20e608..571c2a86 100644
--- a/docs/service_api/README.md
+++ b/docs/service_api/README.md
@@ -38,4 +38,4 @@ Index of all app service modules. All writes to app models must go through the s
 - **wg21_paper_tracker** – WG21 paper and author persistence.
 - **core.protocols** – Structural contracts for sync outcomes and activity payloads (see [core_protocols.md](core_protocols.md)).
 
-See [Contributing.md](../Contributing.md) for the rule that all writes go through the service layer, and for **regenerating** these docs from source.
+See [CONTRIBUTING.md](../../CONTRIBUTING.md) for the rule that all writes go through the service layer, and for **regenerating** these docs from source.
diff --git a/docs/service_api/boost_usage_tracker.md b/docs/service_api/boost_usage_tracker.md
index 75f99655..a8275510 100644
--- a/docs/service_api/boost_usage_tracker.md
+++ b/docs/service_api/boost_usage_tracker.md
@@ -30,4 +30,4 @@
 ## Related docs
 
 - [Schema.md](../Schema.md) – Section 4: Boost Usage Tracker.
-- [Contributing.md](../Contributing.md) – Service layer rule.
+- [CONTRIBUTING.md](../../CONTRIBUTING.md) – Service layer rule.
diff --git a/docs/service_api/clang_github_tracker.md b/docs/service_api/clang_github_tracker.md
index b66e135b..0f7c1570 100644
--- a/docs/service_api/clang_github_tracker.md
+++ b/docs/service_api/clang_github_tracker.md
@@ -30,4 +30,4 @@ Used by `clang_github_tracker.state_manager.resolve_start_end_dates` (with optio
 
 - [Schema.md](../Schema.md) – Section 2b: Clang GitHub Tracker.
 - [Workspace.md](../Workspace.md) – `workspace/raw/github_activity_tracker/`, `workspace/clang_github_tracker/`.
-- [Contributing.md](../Contributing.md) – Service layer rule.
+- [CONTRIBUTING.md](../../CONTRIBUTING.md) – Service layer rule.
diff --git a/docs/service_api/cppa_pinecone_sync.md b/docs/service_api/cppa_pinecone_sync.md
index b02aa89b..a56312c3 100644
--- a/docs/service_api/cppa_pinecone_sync.md
+++ b/docs/service_api/cppa_pinecone_sync.md
@@ -2,7 +2,7 @@
 
 Module: `cppa_pinecone_sync.services`
 
-All creates/updates/deletes for `PineconeFailList` and `PineconeSyncStatus` must go through this module. See [Contributing.md](../Contributing.md).
+All creates/updates/deletes for `PineconeFailList` and `PineconeSyncStatus` must go through this module. See [CONTRIBUTING.md](../../CONTRIBUTING.md).
 
 ---
 <!-- SERVICE_API:GENERATED:START -->
diff --git a/docs/service_api/cppa_user_tracker.md b/docs/service_api/cppa_user_tracker.md
index b07f994e..6313fe1b 100644
--- a/docs/service_api/cppa_user_tracker.md
+++ b/docs/service_api/cppa_user_tracker.md
@@ -34,5 +34,5 @@
 ## Related
 
 - [Service API index](README.md)
-- [Contributing](../Contributing.md)
+- [CONTRIBUTING.md](../../CONTRIBUTING.md)
 - [Schema](../Schema.md)
diff --git a/docs/service_api/discord_activity_tracker.md b/docs/service_api/discord_activity_tracker.md
index ae628521..f9d63f61 100644
--- a/docs/service_api/discord_activity_tracker.md
+++ b/docs/service_api/discord_activity_tracker.md
@@ -145,6 +145,6 @@ Settings:
 
 - [DiscordChatExporter setup](../operations/discord_chat_exporter.md) — download, install, `.env`
 - [Service API index](README.md)
-- [Contributing](../Contributing.md)
+- [CONTRIBUTING](../../CONTRIBUTING.md)
 - [Schema](../Schema.md)
 - [Workspace](../Workspace.md) – raw archives under `{WORKSPACE_DIR}/raw/discord_activity_tracker/<server_id>/<channel_id>/`; app folder `{WORKSPACE_DIR}/discord_activity_tracker/` (CLI `script/`, backfill drop `Discussion - c-cpp-discussion/`)
diff --git a/docs/service_api/github_activity_tracker.md b/docs/service_api/github_activity_tracker.md
index 6bd785e1..b75c9bc3 100644
--- a/docs/service_api/github_activity_tracker.md
+++ b/docs/service_api/github_activity_tracker.md
@@ -64,5 +64,5 @@ To sync a repo from GitHub (read last updated from DB, fetch from GitHub, save v
 ## Related
 
 - [Service API index](README.md)
-- [Contributing](../Contributing.md)
+- [CONTRIBUTING](../../CONTRIBUTING.md)
 - [Schema](../Schema.md)
diff --git a/github_activity_tracker/services.py b/github_activity_tracker/services.py
index 3cdc8418..c07cde6b 100644
--- a/github_activity_tracker/services.py
+++ b/github_activity_tracker/services.py
@@ -5,7 +5,7 @@
 module. Do not call Model.objects.create(), model.save(), or model.delete() from
 outside this module (e.g. from management commands, views, or other apps).
 
-See docs/Contributing.md for the project-wide rule.
+See CONTRIBUTING.md for the project-wide rule.
 """
 
 from __future__ import annotations
diff --git a/pyproject.toml b/pyproject.toml
index 4a8ef19a..0a6ac7a3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,7 +11,7 @@ description = "Boost Data Collector Django project"
 [tool.setuptools.packages.find]
 where = ["."]
 include = ["*"]
-exclude = ["*.tests", "*.tests.*", "tests", "tests.*"]
+exclude = ["*.tests", "*.tests.*", "tests", "tests.*", "benchmarks", "benchmarks.*"]
 
 [tool.setuptools_scm]
 fallback_version = "0.1.0"
diff --git a/pytest.ini b/pytest.ini
index 9f457850..34f97da4 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -8,3 +8,4 @@ filterwarnings =
     ignore::PendingDeprecationWarning
 markers =
     django_db: mark test as using the database (django_db is built-in from pytest-django)
+    benchmark: performance benchmarks; collection requires RUN_BENCHMARKS=1 (see CONTRIBUTING.md)
diff --git a/requirements-dev.in b/requirements-dev.in
index f28d8476..f140b645 100644
--- a/requirements-dev.in
+++ b/requirements-dev.in
@@ -4,6 +4,7 @@
 -r requirements.in
 
 pytest>=7.4,<9
+pytest-benchmark>=4.0,<6
 pytest-django>=4.5,<5
 django-stubs>=4.2.7,<5
 django-stubs-ext>=4.2.7,<5
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 4d4126e2..ec2e7cb5 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -1,5 +1,5 @@
 # This file was autogenerated by uv via the following command:
-#    uv pip compile requirements-dev.in -o requirements-dev.lock --python-version 3.11
+#    uv pip compile requirements-dev.in -o requirements-dev.lock --python-version 3.13
 aiohappyeyeballs==2.6.1
     # via aiohttp
 aiohttp==3.13.5
@@ -12,13 +12,13 @@ annotated-types==0.7.0
     # via pydantic
 asgiref==3.11.1
     # via django
-async-timeout==5.0.1
-    # via redis
 attrs==26.1.0
     # via
     #   aiohttp
     #   outcome
     #   trio
+audioop-lts==0.2.2
+    # via discord-py
 beautifulsoup4==4.14.3
     # via -r requirements.in
 billiard==4.2.4
@@ -175,6 +175,8 @@ psycopg==3.3.4
     # via -r requirements.in
 psycopg-binary==3.3.4
     # via psycopg
+py-cpuinfo==9.0.0
+    # via pytest-benchmark
 pyasn1==0.6.3
     # via pyasn1-modules
 pyasn1-modules==0.4.2
@@ -206,8 +208,11 @@ pysocks==1.7.1
 pytest==8.4.2
     # via
     #   -r requirements-dev.in
+    #   pytest-benchmark
     #   pytest-cov
     #   pytest-django
+pytest-benchmark==5.2.3
+    # via -r requirements-dev.in
 pytest-cov==6.3.0
     # via -r requirements-dev.in
 pytest-django==4.12.0
@@ -254,8 +259,6 @@ soupsieve==2.8.3
     # via beautifulsoup4
 sqlparse==0.5.5
     # via django
-tomli==2.4.1
-    # via coverage
 tqdm==4.67.3
     # via pinecone
 trio==0.33.0
@@ -270,12 +273,10 @@ types-pyyaml==6.0.12.20260510
     # via django-stubs
 typing-extensions==4.15.0
     # via
-    #   aiosignal
     #   beautifulsoup4
     #   django-stubs
     #   django-stubs-ext
     #   pinecone
-    #   psycopg
     #   pydantic
     #   pydantic-core
     #   pygithub
diff --git a/uv.lock b/uv.lock
new file mode 100644
index 00000000..2387c15c
--- /dev/null
+++ b/uv.lock
@@ -0,0 +1,7 @@
+version = 1
+revision = 3
+requires-python = ">=3.11"
+
+[[package]]
+name = "boost-data-collector"
+source = { editable = "." }

From 94302c71984cf42be97d28f6f1fd5f3e193564c5 Mon Sep 17 00:00:00 2001
From: Leo Chen <leo.chen0412@outlook.com>
Date: Tue, 19 May 2026 18:56:23 -0700
Subject: [PATCH 2/3] chore: remove uv.lock file and update version to 0.1.0 in
 _version.py

---
 bench.json                                   | 2 +-
 benchmarks/test_github_commits_throughput.py | 1 -
 core/_version.py                             | 2 +-
 uv.lock                                      | 7 -------
 4 files changed, 2 insertions(+), 10 deletions(-)
 delete mode 100644 uv.lock

diff --git a/bench.json b/bench.json
index 6342e51b..94e042ad 100644
--- a/bench.json
+++ b/bench.json
@@ -129,4 +129,4 @@
     ],
     "datetime": "2026-05-19T18:09:23.360634+00:00",
     "version": "5.2.3"
-}
\ No newline at end of file
+}
diff --git a/benchmarks/test_github_commits_throughput.py b/benchmarks/test_github_commits_throughput.py
index 0502dcba..73fa5308 100644
--- a/benchmarks/test_github_commits_throughput.py
+++ b/benchmarks/test_github_commits_throughput.py
@@ -9,7 +9,6 @@
 
 import pytest
 
-from github_activity_tracker.models import FileChangeStatus
 from github_activity_tracker.sync.commits import _process_commit_data
 
 
diff --git a/core/_version.py b/core/_version.py
index 6892cf81..ccc4c4c3 100644
--- a/core/_version.py
+++ b/core/_version.py
@@ -1,2 +1,2 @@
 # file generated by setuptools-scm; do not edit
-version = "0.1.1.dev549+g7bf1b7ea6.d20260519"
+version = "0.1.0"
diff --git a/uv.lock b/uv.lock
deleted file mode 100644
index 2387c15c..00000000
--- a/uv.lock
+++ /dev/null
@@ -1,7 +0,0 @@
-version = 1
-revision = 3
-requires-python = ">=3.11"
-
-[[package]]
-name = "boost-data-collector"
-source = { editable = "." }

From 54953975d7423855cf68c7a970232200222ea815 Mon Sep 17 00:00:00 2001
From: Leo Chen <leo.chen0412@outlook.com>
Date: Wed, 20 May 2026 08:39:19 -0700
Subject: [PATCH 3/3] chore: update .gitignore to exclude bench.json and remove
 the file; modify CI scripts for benchmark integration

---
 .github/workflows/actions.yml          |  28 +++---
 .github/workflows/benchmarks.yml       |   8 +-
 .gitignore                             |   2 +
 CONTRIBUTING.md                        |   3 +-
 bench.json                             | 132 -------------------------
 benchmarks/baselines.json              |   4 +-
 benchmarks/compare_to_baseline.py      |   2 +-
 benchmarks/test_service_bulk_insert.py |   2 +-
 core/_version.py                       |   2 +-
 docs/service_api/cppa_pinecone_sync.md |   2 +-
 10 files changed, 28 insertions(+), 157 deletions(-)
 delete mode 100644 bench.json

diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml
index b05da1b3..70916688 100644
--- a/.github/workflows/actions.yml
+++ b/.github/workflows/actions.yml
@@ -19,15 +19,15 @@ jobs:
     timeout-minutes: 15
     steps:
       - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
 
       - name: Install uv
-        uses: astral-sh/setup-uv@v7
+        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
         with:
           python-version: "3.13"
 
       - name: Cache uv
-        uses: actions/cache@v4
+        uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4
         with:
           path: ~/.cache/uv
           key: ${{ runner.os }}-uv-pre-commit
@@ -35,7 +35,7 @@ jobs:
             ${{ runner.os }}-uv-
 
       - name: Cache pre-commit environments
-        uses: actions/cache@v4
+        uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4
         with:
           path: ~/.cache/pre-commit
           key: ${{ runner.os }}-pre-commit-${{ hashFiles('.pre-commit-config.yaml') }}
@@ -51,15 +51,15 @@ jobs:
     timeout-minutes: 10
     steps:
       - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
 
       - name: Install uv
-        uses: astral-sh/setup-uv@v7
+        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
         with:
           python-version: "3.13"
 
       - name: Cache uv
-        uses: actions/cache@v4
+        uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4
         with:
           path: ~/.cache/uv
           key: ${{ runner.os }}-uv-pyright-${{ hashFiles('requirements-dev.lock') }}
@@ -99,17 +99,17 @@ jobs:
 
     steps:
       - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
         with:
           fetch-depth: 0
 
       - name: Install uv
-        uses: astral-sh/setup-uv@v7
+        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
         with:
           python-version: "3.13"
 
       - name: Cache uv
-        uses: actions/cache@v4
+        uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4
         with:
           path: ~/.cache/uv
           key: ${{ runner.os }}-uv-test
@@ -161,7 +161,7 @@ jobs:
 
       - name: Upload HTML coverage report
         if: always()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
         with:
           name: coverage-html
           path: htmlcov/
@@ -169,14 +169,14 @@ jobs:
 
       - name: Upload XML coverage report
         if: always()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
         with:
           name: coverage-xml
           path: coverage.xml
 
       - name: Upload test results
         if: always()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
         with:
           name: pytest-results
           path: junit.xml
@@ -189,7 +189,7 @@ jobs:
 
     steps:
       - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
 
       - name: Create .env for CI
         run: |
diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
index f3d3d751..e80007ec 100644
--- a/.github/workflows/benchmarks.yml
+++ b/.github/workflows/benchmarks.yml
@@ -29,15 +29,15 @@ jobs:
 
     steps:
       - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
 
       - name: Install uv
-        uses: astral-sh/setup-uv@v7
+        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
         with:
           python-version: "3.13"
 
       - name: Cache uv
-        uses: actions/cache@v4
+        uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4
         with:
           path: ~/.cache/uv
           key: ${{ runner.os }}-uv-benchmark-${{ hashFiles('requirements-dev.lock') }}
@@ -71,7 +71,7 @@ jobs:
 
       - name: Upload benchmark JSON
         if: always()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
         with:
           name: benchmark-json
           path: bench.json
diff --git a/.gitignore b/.gitignore
index 4d5c116b..307e325b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,6 +21,8 @@ media/
 .test_artifacts/
 
 # Testing / coverage
+# pytest-benchmark JSON (machine_info / commit_info; use CI artifact, do not commit)
+bench.json
 .coverage
 coverage.xml
 coverage.json
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 434e6e18..f64f3aa0 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -87,7 +87,8 @@ export DJANGO_SETTINGS_MODULE=config.test_settings
 export BENCHMARK_COMMIT_N=50
 
 uv run pytest benchmarks/ -m benchmark --benchmark-only \
-  --benchmark-json=bench.json -v
+  --benchmark-json=bench.json -v \
+  --benchmark-disable-gc
 uv run python benchmarks/compare_to_baseline.py bench.json benchmarks/baselines.json
 ```
 
diff --git a/bench.json b/bench.json
deleted file mode 100644
index 94e042ad..00000000
--- a/bench.json
+++ /dev/null
@@ -1,132 +0,0 @@
-{
-    "machine_info": {
-        "node": "Leos-Mac-mini.local",
-        "processor": "arm",
-        "machine": "arm64",
-        "python_compiler": "Clang 21.1.4 ",
-        "python_implementation": "CPython",
-        "python_implementation_version": "3.13.12",
-        "python_version": "3.13.12",
-        "python_build": [
-            "main",
-            "Mar 10 2026 18:26:32"
-        ],
-        "release": "25.4.0",
-        "system": "Darwin",
-        "cpu": {
-            "python_version": "3.13.12.final.0 (64 bit)",
-            "cpuinfo_version": [
-                9,
-                0,
-                0
-            ],
-            "cpuinfo_version_string": "9.0.0",
-            "arch": "ARM_8",
-            "bits": 64,
-            "count": 10,
-            "arch_string_raw": "arm64",
-            "brand_raw": "Apple M4"
-        }
-    },
-    "commit_info": {
-        "id": "7bf1b7ea6657990eef44fdb362b762abb16e41ba",
-        "time": "2026-05-18T20:05:08-04:00",
-        "author_time": "2026-05-18T20:05:08-04:00",
-        "dirty": true,
-        "project": "boost-data-collector",
-        "branch": "develop"
-    },
-    "benchmarks": [
-        {
-            "group": null,
-            "name": "test_process_commit_data_batch",
-            "fullname": "benchmarks/test_github_commits_throughput.py::test_process_commit_data_batch",
-            "params": null,
-            "param": null,
-            "extra_info": {
-                "n": 50
-            },
-            "options": {
-                "disable_gc": false,
-                "timer": "perf_counter",
-                "min_rounds": 5,
-                "max_time": 1.0,
-                "min_time": 5e-06,
-                "warmup": false
-            },
-            "stats": {
-                "min": 0.13009395799599588,
-                "max": 0.16657558304723352,
-                "mean": 0.14227045823354273,
-                "stddev": 0.01457181655810832,
-                "rounds": 5,
-                "median": 0.13689958304166794,
-                "iqr": 0.01724434396601282,
-                "q1": 0.1326302083034534,
-                "q3": 0.14987455226946622,
-                "iqr_outliers": 0,
-                "stddev_outliers": 1,
-                "outliers": "1;0",
-                "ld15iqr": 0.13009395799599588,
-                "hd15iqr": 0.16657558304723352,
-                "ops": 7.0288660936092535,
-                "total": 0.7113522911677137,
-                "data": [
-                    0.16657558304723352,
-                    0.1334756250726059,
-                    0.13009395799599588,
-                    0.13689958304166794,
-                    0.14430754201021045
-                ],
-                "iterations": 1
-            }
-        },
-        {
-            "group": null,
-            "name": "test_service_bulk_commits_and_file_changes",
-            "fullname": "benchmarks/test_service_bulk_insert.py::test_service_bulk_commits_and_file_changes",
-            "params": null,
-            "param": null,
-            "extra_info": {
-                "n": 50
-            },
-            "options": {
-                "disable_gc": false,
-                "timer": "perf_counter",
-                "min_rounds": 5,
-                "max_time": 1.0,
-                "min_time": 5e-06,
-                "warmup": false
-            },
-            "stats": {
-                "min": 0.10591337503865361,
-                "max": 0.1513816670048982,
-                "mean": 0.13538706267718226,
-                "stddev": 0.01819949434483927,
-                "rounds": 6,
-                "median": 0.14058843749808148,
-                "iqr": 0.02617037494201213,
-                "q1": 0.12384004204068333,
-                "q3": 0.15001041698269546,
-                "iqr_outliers": 0,
-                "stddev_outliers": 1,
-                "outliers": "1;0",
-                "ld15iqr": 0.10591337503865361,
-                "hd15iqr": 0.1513816670048982,
-                "ops": 7.386230118489284,
-                "total": 0.8123223760630935,
-                "data": [
-                    0.1513816670048982,
-                    0.15001041698269546,
-                    0.13251695793587714,
-                    0.12384004204068333,
-                    0.1486599170602858,
-                    0.10591337503865361
-                ],
-                "iterations": 1
-            }
-        }
-    ],
-    "datetime": "2026-05-19T18:09:23.360634+00:00",
-    "version": "5.2.3"
-}
diff --git a/benchmarks/baselines.json b/benchmarks/baselines.json
index 506ef4e9..45ed9378 100644
--- a/benchmarks/baselines.json
+++ b/benchmarks/baselines.json
@@ -2,11 +2,11 @@
   "description": "Maximum acceptable median wall time (seconds) per scenario at BENCHMARK_COMMIT_N. Update median_seconds after intentional perf work or when CI hardware changes; copy medians from --benchmark-json stats.median.",
   "benchmarks": {
     "benchmarks/test_github_commits_throughput.py::test_process_commit_data_batch": {
-      "median_seconds": 45.0,
+      "median_seconds": 0.1369,
       "n": 50
     },
     "benchmarks/test_service_bulk_insert.py::test_service_bulk_commits_and_file_changes": {
-      "median_seconds": 35.0,
+      "median_seconds": 0.1406,
       "n": 50
     }
   }
diff --git a/benchmarks/compare_to_baseline.py b/benchmarks/compare_to_baseline.py
index dc3bd0ef..7ecc7d2d 100644
--- a/benchmarks/compare_to_baseline.py
+++ b/benchmarks/compare_to_baseline.py
@@ -74,7 +74,7 @@ def main() -> int:
         if median > limit:
             failures.append(
                 f"{fullname}: median {median:.6f}s exceeds limit {limit:.6f}s "
-                f"(baseline {float(ref):.6f}s × {args.regression_ratio})"
+                f"(baseline {float(ref):.6f}s x {args.regression_ratio})"
             )
 
     for line in warnings:
diff --git a/benchmarks/test_service_bulk_insert.py b/benchmarks/test_service_bulk_insert.py
index 85cf5dd6..0c476861 100644
--- a/benchmarks/test_service_bulk_insert.py
+++ b/benchmarks/test_service_bulk_insert.py
@@ -25,7 +25,7 @@ def test_service_bulk_commits_and_file_changes(
     repo = github_repository
     account = github_account
     commit_at = datetime(2024, 6, 1, tzinfo=timezone.utc)
-    hashes = [f"svcbulk{i:056d}"[:40] for i in range(n)]
+    hashes = [f"{i:040x}" for i in range(n)]
 
     def run_batch() -> None:
         with transaction.atomic():
diff --git a/core/_version.py b/core/_version.py
index ccc4c4c3..59aa09a2 100644
--- a/core/_version.py
+++ b/core/_version.py
@@ -1,2 +1,2 @@
 # file generated by setuptools-scm; do not edit
-version = "0.1.0"
+version = "0.1.1.dev553+g94302c719.d20260520"
diff --git a/docs/service_api/cppa_pinecone_sync.md b/docs/service_api/cppa_pinecone_sync.md
index a56312c3..a6495527 100644
--- a/docs/service_api/cppa_pinecone_sync.md
+++ b/docs/service_api/cppa_pinecone_sync.md
@@ -22,4 +22,4 @@ All creates/updates/deletes for `PineconeFailList` and `PineconeSyncStatus` must
 ## Related
 
 - [Service API index](README.md)
-- [Contributing](../Contributing.md)
+- [CONTRIBUTING.md](../../CONTRIBUTING.md)