From ad55c04aa2a191c0b261ad73bcbe0d850df285ed Mon Sep 17 00:00:00 2001 From: zho Date: Tue, 19 May 2026 00:08:17 +0800 Subject: [PATCH 1/3] added generate_service_docs.py and updated service api documents --- .pre-commit-config.yaml | 8 + README.md | 2 +- docs/Contributing.md | 18 +- docs/Service_API.md | 13 + docs/service_api/README.md | 10 +- .../service_api/boost_library_docs_tracker.md | 24 +- docs/service_api/boost_library_tracker.md | 103 +---- .../service_api/boost_mailing_list_tracker.md | 23 + docs/service_api/boost_usage_tracker.md | 34 +- docs/service_api/clang_github_tracker.md | 26 +- docs/service_api/core_protocols.md | 57 +++ docs/service_api/cppa_pinecone_sync.md | 72 +-- docs/service_api/cppa_slack_tracker.md | 26 ++ docs/service_api/cppa_user_tracker.md | 84 +--- .../cppa_youtube_script_tracker.md | 32 +- docs/service_api/discord_activity_tracker.md | 82 +--- docs/service_api/github_activity_tracker.md | 91 ++-- docs/service_api/wg21_paper_tracker.md | 25 + scripts/generate_service_docs.py | 434 ++++++++++++++++++ 19 files changed, 781 insertions(+), 383 deletions(-) create mode 100644 docs/service_api/boost_mailing_list_tracker.md create mode 100644 docs/service_api/core_protocols.md create mode 100644 docs/service_api/cppa_slack_tracker.md create mode 100644 docs/service_api/wg21_paper_tracker.md create mode 100644 scripts/generate_service_docs.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6d98af33..ac0d3b91 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -37,3 +37,11 @@ repos: entry: djhtml --tabwidth 2 files: .*/templates/.*\.html$ alias: autoformat + - repo: local + hooks: + - id: generate-service-docs + name: check service API docs are up to date + entry: python scripts/generate_service_docs.py --check + language: system + pass_filenames: false + always_run: true diff --git a/README.md b/README.md index c8680af5..633bc609 100644 --- a/README.md +++ b/README.md @@ -215,7 +215,7 @@ Docs are organized **by topic** (one doc per concern: workflow, workspace, servi - [Workspace.md](docs/Workspace.md) – Workspace layout and usage for file processing. - [Schema.md](docs/Schema.md) – Database schema and table relationships. - [Development_guideline.md](docs/Development_guideline.md) – Development setup, app requirements, and step-by-step workflow. -- [Contributing.md](docs/Contributing.md) – Service layer (single place for writes) and contributor guidelines. +- [Contributing.md](docs/Contributing.md) – Service layer (single place for writes), **regenerating service API docs** (`scripts/generate_service_docs.py`), and contributor guidelines. - [Service_API.md](docs/Service_API.md) – API reference and index for all service layer functions. - [service_api/](docs/service_api/) – Per-app service API docs (name, description, parameters, return types, validation). diff --git a/docs/Contributing.md b/docs/Contributing.md index 28dfa198..03ec2eb6 100644 --- a/docs/Contributing.md +++ b/docs/Contributing.md @@ -28,8 +28,22 @@ Each Django app that has **models** provides a **`services.py`** module. This is | `boost_usage_tracker` | `boost_usage_tracker/services.py` | External repos, Boost usage, missing-header tmp. | | `cppa_pinecone_sync` | `cppa_pinecone_sync/services.py` | Pinecone fail list and sync status writes. | | `discord_activity_tracker` | `discord_activity_tracker/services.py` | Servers, channels, messages, reactions (Discord user profiles in cppa_user_tracker). | +| `cppa_youtube_script_tracker` | `cppa_youtube_script_tracker/services.py` | YouTube channels, videos, tags, transcript state, speaker links. | +| `clang_github_tracker` | `clang_github_tracker/services.py` | Clang/llvm GitHub issue, PR, and commit upserts; fetch watermarks. | +| `boost_mailing_list_tracker` | `boost_mailing_list_tracker/services.py` | Mailing list messages and names. | +| `cppa_slack_tracker` | `cppa_slack_tracker/services.py` | Slack teams, channels, messages, membership. | +| `wg21_paper_tracker` | `wg21_paper_tracker/services.py` | WG21 papers, authors, mailings. | -For a full list of functions, parameter/return types, and validation (e.g. empty `name` raises `ValueError`), see **[Service_API.md](Service_API.md)** and the per-app docs in **[service_api/](service_api/)** (index: [service_api/README.md](service_api/README.md)). +For a full list of functions, parameter/return types, and validation (e.g. empty `name` raises `ValueError`), see **[Service_API.md](Service_API.md)** and the per-app docs in **[service_api/](service_api/)** (index: [service_api/README.md](service_api/README.md)). DTO protocols shared across trackers are documented in **[service_api/core_protocols.md](service_api/core_protocols.md)** (generated from `core/protocols.py`). + +### Regenerating service API docs + +Reference tables in `docs/service_api/*.md` are produced by **[`scripts/generate_service_docs.py`](../scripts/generate_service_docs.py)** from each app’s `services.py` and from `core/protocols.py`. + +- **Markers:** Each file contains `` … ``. The script replaces **only** that region. Put hand-written notes (usage, cross-app warnings, command help) **below** the `END` marker. +- **Regenerate locally:** `python scripts/generate_service_docs.py` (optional: `--app ` for one module). +- **Check only:** `python scripts/generate_service_docs.py --check` exits non-zero if committed markdown would change. +- **CI / pre-commit:** The **lint** job runs pre-commit, which includes this check. Pull requests that change **only** ignored paths (`**.md`, `docs/**` per `.github/workflows/actions.yml`) do not run CI; any PR that touches `**/services.py` or `core/protocols.py` still runs the check—regenerate docs before pushing. ### How to use @@ -61,7 +75,7 @@ For a full list of functions, parameter/return types, and validation (e.g. empty - **Branching:** Create feature branches from `develop`. Open pull requests against `develop`. See [Development_guideline.md](Development_guideline.md). - **Code style:** Use Python 3.11+ and follow Django and project conventions. Use the project’s logging (`logging.getLogger(__name__)`). Before pushing, run **`uv run pyright`** (with dev deps) for the paths covered by **`pyrightconfig.json`**, and ensure CI’s **lint** / **pyright** / **test** jobs would pass. - **Database:** Use the Django ORM and migrations. Writes only through the service layer as above. -- **Docs:** Update this doc (and app `services.py` docstrings) when adding new apps or changing the write rules. +- **Docs:** Update this doc (and app `services.py` docstrings) when adding new apps or changing the write rules. After changing `services.py` or `core/protocols.py`, run `python scripts/generate_service_docs.py` and commit the updated `docs/service_api/` files. ## Related documentation diff --git a/docs/Service_API.md b/docs/Service_API.md index 959346ff..a6499609 100644 --- a/docs/Service_API.md +++ b/docs/Service_API.md @@ -15,6 +15,11 @@ All writes to app models must go through the service layer. The API is documente | **boost_library_docs_tracker** | `boost_library_docs_tracker.services` | Globally unique doc content (BoostDocContent) and (library-version, page) relation tracking (BoostLibraryDocumentation). | | **boost_usage_tracker** | `boost_usage_tracker.services` | External repos, Boost usage, missing-header tmp. | | **discord_activity_tracker** | `discord_activity_tracker.services` | Discord servers, channels, messages, reactions (authors: `cppa_user_tracker.DiscordProfile`). | +| **cppa_youtube_script_tracker** | `cppa_youtube_script_tracker.services` | YouTube channels, videos, tags, transcript state; speaker links. | +| **clang_github_tracker** | `clang_github_tracker.services` | Upsert llvm issue/PR/commit rows; fetch watermarks. | +| **boost_mailing_list_tracker** | `boost_mailing_list_tracker.services` | Mailing list messages and names. | +| **cppa_slack_tracker** | `cppa_slack_tracker.services` | Slack teams, channels, messages, membership. | +| **wg21_paper_tracker** | `wg21_paper_tracker.services` | WG21 papers, authors, mailings. | --- @@ -28,6 +33,14 @@ All writes to app models must go through the service layer. The API is documente - **[service_api/cppa_pinecone_sync.md](service_api/cppa_pinecone_sync.md)** – API for `cppa_pinecone_sync.services`. - **[service_api/boost_usage_tracker.md](service_api/boost_usage_tracker.md)** – API for `boost_usage_tracker.services`. - **[service_api/discord_activity_tracker.md](service_api/discord_activity_tracker.md)** – API for `discord_activity_tracker.services`; management commands, sync modules, and Pinecone notes. +- **[service_api/cppa_youtube_script_tracker.md](service_api/cppa_youtube_script_tracker.md)** – API for `cppa_youtube_script_tracker.services`; preprocessor, fetcher, workspace, and transcript helpers. +- **[service_api/clang_github_tracker.md](service_api/clang_github_tracker.md)** – API for `clang_github_tracker.services`. +- **[service_api/boost_mailing_list_tracker.md](service_api/boost_mailing_list_tracker.md)** – API for `boost_mailing_list_tracker.services`. +- **[service_api/cppa_slack_tracker.md](service_api/cppa_slack_tracker.md)** – API for `cppa_slack_tracker.services`. +- **[service_api/wg21_paper_tracker.md](service_api/wg21_paper_tracker.md)** – API for `wg21_paper_tracker.services`. +- **[service_api/core_protocols.md](service_api/core_protocols.md)** – `core.protocols` DTO protocols (`TrackerResult`, `ActivityRecord`, `IncrementalState`). + +Tables in each file are **generated** from source; see [Contributing.md](Contributing.md#regenerating-service-api-docs). --- diff --git a/docs/service_api/README.md b/docs/service_api/README.md index 4a726830..6c20e608 100644 --- a/docs/service_api/README.md +++ b/docs/service_api/README.md @@ -15,6 +15,10 @@ Index of all app service modules. All writes to app models must go through the s | [discord_activity_tracker.services](discord_activity_tracker.md) | discord_activity_tracker | Servers, channels, messages, reactions (user profiles in cppa_user_tracker). | | [cppa_youtube_script_tracker.services](cppa_youtube_script_tracker.md) | cppa_youtube_script_tracker | YouTube channels, videos, transcript state, and speaker links for C++ conference talks. | | [clang_github_tracker.services](clang_github_tracker.md) | clang_github_tracker | Upsert llvm issue/PR/commit rows; DB watermarks for API fetch windows. | +| [boost_mailing_list_tracker.services](boost_mailing_list_tracker.md) | boost_mailing_list_tracker | Mailing list messages and list names. | +| [cppa_slack_tracker.services](cppa_slack_tracker.md) | cppa_slack_tracker | Slack teams, channels, messages, and membership changes. | +| [wg21_paper_tracker.services](wg21_paper_tracker.md) | wg21_paper_tracker | WG21 papers, authors, and mailings. | +| [core.protocols](core_protocols.md) | core | Runtime-checkable DTO protocols (`TrackerResult`, `ActivityRecord`, `IncrementalState`); see also [Core public API](../Core_public_API.md). | --- @@ -29,5 +33,9 @@ Index of all app service modules. All writes to app models must go through the s - **cppa_youtube_script_tracker** – Get-or-create YouTubeChannel, YouTubeVideo; update transcript state; link speakers to videos. Speaker profiles (`YoutubeSpeaker`) in cppa_user_tracker. - **cppa_pinecone_sync** – Get/clear/record failed IDs in PineconeFailList; get/update PineconeSyncStatus. - **clang_github_tracker** – Upsert `ClangGithubIssueItem` / `ClangGithubCommit` during sync or backfill; read `Max(github_updated_at)` / `Max(github_committed_at)` for fetch cursors. +- **boost_mailing_list_tracker** – Mailing list message and name helpers. +- **cppa_slack_tracker** – Slack team/channel/message persistence and membership sync. +- **wg21_paper_tracker** – WG21 paper and author persistence. +- **core.protocols** – Structural contracts for sync outcomes and activity payloads (see [core_protocols.md](core_protocols.md)). -See [Contributing.md](../Contributing.md) for the rule that all writes go through the service layer. +See [Contributing.md](../Contributing.md) for the rule that all writes go through the service layer, and for **regenerating** these docs from source. diff --git a/docs/service_api/boost_library_docs_tracker.md b/docs/service_api/boost_library_docs_tracker.md index 7ad8f4b2..37e3d3ed 100644 --- a/docs/service_api/boost_library_docs_tracker.md +++ b/docs/service_api/boost_library_docs_tracker.md @@ -8,15 +8,20 @@ **Pinecone upsert state** is stored on `BoostDocContent.is_upserted`, not on `BoostLibraryDocumentation` (the join table has only the two FKs plus `created_at`). --- + -## BoostDocContent +## Public API (generated) -| Function | Parameter types | Return type | Notes | -| -------------------------------- | ------------------------------------------------------------------- | ----------------------------- | --------------------------------------------------------------------- | -| `get_or_create_doc_content` | `url: str`, `content_hash: str`, `version_id: int \| None = None` | `tuple[BoostDocContent, str]` | See return values below. `ValueError` if `url` is empty. | -| `set_doc_content_upserted` | `doc: BoostDocContent`, `value: bool` | `BoostDocContent` | Sets `is_upserted`. | -| `set_doc_content_upserted_by_ids`| `ids: list[int]`, `value: bool` | `int` | Bulk `UPDATE`; returns number of rows updated. | -| `get_unupserted_doc_contents` | — | `QuerySet[BoostDocContent]` | `is_upserted=False`; used for Pinecone sync worklists. | +| Function | Parameters | Return type | Summary | +| --- | --- | --- | --- | +| `get_docs_for_library_version` | library_version_id: int | django_models.QuerySet | Return all BoostLibraryDocumentation rows for this library-version. | +| `get_or_create_doc_content` | url: str, content_hash: str, version_id: int \| None = None | tuple[BoostDocContent, str] | Get or create a BoostDocContent row for the given content_hash. Page content is NOT stored in the DB; it lives in workspace files. | +| `get_unupserted_doc_contents` | | django_models.QuerySet | Return all BoostDocContent rows that have not been upserted to Pinecone. | +| `link_content_to_library_version` | library_version_id: int, doc_content_id: int | tuple[BoostLibraryDocumentation, bool] | Get or create a BoostLibraryDocumentation row for the (library_version, doc_content) pair. Returns (relation, created). | +| `set_doc_content_upserted` | doc: BoostDocContent, value: bool | BoostDocContent | Set is_upserted on a BoostDocContent row. | +| `set_doc_content_upserted_by_ids` | ids: list[int], value: bool | int | Bulk-set is_upserted for BoostDocContent rows with the given PKs. Returns the number of rows updated. | + + ### `get_or_create_doc_content` return values @@ -33,7 +38,4 @@ The second element is a `str` indicating what changed: Join table: one row per `(boost_library_version, boost_doc_content)` pair. **No** `page_count`, status fields, or `updated_at` on the model. -| Function | Parameter types | Return type | Notes | -| --------------------------------- | ---------------------------------------------------- | ---------------------------------------- | --------------------------------------------------------------------- | -| `link_content_to_library_version` | `library_version_id: int`, `doc_content_id: int` | `tuple[BoostLibraryDocumentation, bool]` | `get_or_create` on the pair. Second value is `created`. | -| `get_docs_for_library_version` | `library_version_id: int` | `QuerySet[BoostLibraryDocumentation]` | All join rows for that library version. | +See the generated **Public API** table above for `link_content_to_library_version` and `get_docs_for_library_version`. diff --git a/docs/service_api/boost_library_tracker.md b/docs/service_api/boost_library_tracker.md index 71b90cf0..ef48f6f0 100644 --- a/docs/service_api/boost_library_tracker.md +++ b/docs/service_api/boost_library_tracker.md @@ -6,81 +6,28 @@ **Type notation:** Model types refer to `boost_library_tracker.models`. Cross-app: `GitHubRepository`, `GitHubFile` are from `github_activity_tracker.models`; `GitHubAccount` is from `cppa_user_tracker.models`. --- - -## BoostLibraryRepository - -| Function | Parameter types | Return type | Raises | -| ------------------------------- | ---------------------------------- | ---------------------------------------- | ------ | -| `get_or_create_boost_library_repo` | `github_repository: GitHubRepository` | `tuple[BoostLibraryRepository, bool]` | — | - ---- - -## BoostLibrary - -| Function | Parameter types | Return type | Raises | -| --------------------------- | ---------------------------------------- | ----------------------------- | ------ | -| `get_or_create_boost_library` | `repo: BoostLibraryRepository`, `name: str` | `tuple[BoostLibrary, bool]` | `ValueError` if `name` is empty or whitespace-only. | - ---- - -## BoostFile - -| Function | Parameter types | Return type | -| ------------------------- | ---------------------------------------- | ------------------------- | -| `get_or_create_boost_file` | `github_file: GitHubFile`, `library: BoostLibrary` | `tuple[BoostFile, bool]` | - ---- - -## BoostVersion - -| Function | Parameter types | Return type | Raises | -| --------------------------- | ----------------------------------- | --------------------------- | ------ | -| `get_or_create_boost_version` | `version: str`, `version_created_at=None` | `tuple[BoostVersion, bool]` | `ValueError` if `version` is empty or whitespace-only. | - ---- - -## BoostLibraryVersion - -| Function | Parameter types | Return type | -| --------------------------------- | --------------------------------------------------------------------- | ----------------------------------- | -| `get_or_create_boost_library_version` | `library: BoostLibrary`, `version: BoostVersion`, `cpp_version=""`, `description=""` | `tuple[BoostLibraryVersion, bool]` | - ---- - -## BoostDependency - -| Function | Parameter types | Return type | -| ------------------- | ------------------------------------------------------------ | ------------------------------ | -| `add_boost_dependency` | `client_library: BoostLibrary`, `version: BoostVersion`, `dep_library: BoostLibrary` | `tuple[BoostDependency, bool]` | - ---- - -## DependencyChangeLog - -| Function | Parameter types | Return type | -| ------------------------ | ------------------------------------------------------------------ | ------------------------------------- | -| `add_dependency_changelog` | `client_library: BoostLibrary`, `dep_library: BoostLibrary`, `is_add: bool`, `created_at` | `tuple[DependencyChangeLog, bool]` | - ---- - -## BoostLibraryCategory - -| Function | Parameter types | Return type | Raises | -| --------------------------------- | --------------- | --------------------------------------- | ------ | -| `get_or_create_boost_library_category` | `name: str` | `tuple[BoostLibraryCategory, bool]` | `ValueError` if `name` is empty or whitespace-only. | - ---- - -## BoostLibraryCategoryRelationship - -| Function | Parameter types | Return type | -| -------------------- | --------------------------------------------------- | --------------------------------------------- | -| `add_library_category` | `library: BoostLibrary`, `category: BoostLibraryCategory` | `tuple[BoostLibraryCategoryRelationship, bool]` | - ---- - -## BoostLibraryRoleRelationship - -| Function | Parameter types | Return type | -| ----------------------- | ------------------------------------------------------------------------------- | --------------------------------------------- | -| `add_library_version_role` | `library_version: BoostLibraryVersion`, `account: GitHubAccount`, `is_maintainer=False`, `is_author=False` | `tuple[BoostLibraryRoleRelationship, bool]` | + + +## Public API (generated) + +| Function | Parameters | Return type | Summary | +| --- | --- | --- | --- | +| `add_boost_dependency` | client_library: BoostLibrary, version: BoostVersion, dep_library: BoostLibrary | tuple[BoostDependency, bool] | Add a dependency (idempotent). Returns (dependency, created). | +| `add_dependency_changelog` | client_library: BoostLibrary, dep_library: BoostLibrary, is_add: bool, created_at | tuple[DependencyChangeLog, bool] | Add or update a dependency changelog entry. If exists (same client, dep, created_at), updates is_add. Returns (log, created). | +| `add_library_category` | library: BoostLibrary, category: BoostLibraryCategory | tuple[BoostLibraryCategoryRelationship, bool] | Link library to category (idempotent). Returns (relation, created). | +| `add_library_version_role` | library_version: BoostLibraryVersion, account: GitHubAccount, is_maintainer: bool = False, is_author: bool = False | tuple[BoostLibraryRoleRelationship, bool] | Add or update maintainer/author for a library version. Returns (relation, created). | +| `get_or_create_account_from_name` | name: str | GitHubAccount | Get or create a GitHubAccount for a contributor name string (from libraries.json). | +| `get_or_create_boost_file` | github_file: GitHubFile, library: BoostLibrary | tuple[BoostFile, bool] | Get or create BoostFile linking a GitHubFile to a BoostLibrary. If exists, updates library. | +| `get_or_create_boost_library` | repo: BoostLibraryRepository, name: str | tuple[BoostLibrary, bool] | Get or create a BoostLibrary by repo and name. If exists, no extra fields to update. | +| `get_or_create_boost_library_category` | name: str | tuple[BoostLibraryCategory, bool] | Get or create BoostLibraryCategory by name. | +| `get_or_create_boost_library_repo` | github_repository: GitHubRepository | tuple[BoostLibraryRepository, bool] | Get or create BoostLibraryRepository for a GitHub repository (inherited model). Creates only the child row (no parent save) to avoid NOT NULL errors on corrupt parent rows. | +| `get_or_create_boost_library_version` | library: BoostLibrary, version: BoostVersion, cpp_version: str \| None = None, description: str \| None = None, key: str \| None = None, documentation: str \| None = None | tuple[BoostLibraryVersion, bool] | Get or create BoostLibraryVersion for library + version. If exists, updates only fields that are provided (not None). | +| `get_or_create_boost_version` | version: str, version_created_at = None | tuple[BoostVersion, bool] | Get or create BoostVersion by version string. If exists, updates version_created_at. | + + + +## Related + +- [Service API index](README.md) +- [Contributing](../Contributing.md) +- [Schema](../Schema.md) diff --git a/docs/service_api/boost_mailing_list_tracker.md b/docs/service_api/boost_mailing_list_tracker.md new file mode 100644 index 00000000..65564f67 --- /dev/null +++ b/docs/service_api/boost_mailing_list_tracker.md @@ -0,0 +1,23 @@ +# boost_mailing_list_tracker.services + +**Module path:** `boost_mailing_list_tracker.services` +**Description:** Service layer for mailing list messages and names. All creates/updates/deletes for this app's models must go through functions here. + +**Type notation:** Model types refer to `boost_mailing_list_tracker.models`. + +--- + + +## Public API (generated) + +| Function | Parameters | Return type | Summary | +| --- | --- | --- | --- | +| `delete_mailing_list_message` | message: MailingListMessage | None | Delete a MailingListMessage. | +| `get_or_create_mailing_list_message` | sender: MailingListProfile, msg_id: str, sent_at: datetime, parent_id: str = '', thread_id: str = '', subject: str = '', content: str = '', list_name: str = '' | tuple[MailingListMessage, bool] | Get or create a MailingListMessage by msg_id (unique). | + + + +## Related + +- [Service API index](README.md) +- [Contributing](../Contributing.md) diff --git a/docs/service_api/boost_usage_tracker.md b/docs/service_api/boost_usage_tracker.md index 82dba8da..75f99655 100644 --- a/docs/service_api/boost_usage_tracker.md +++ b/docs/service_api/boost_usage_tracker.md @@ -6,24 +6,22 @@ **Type notation:** Model types refer to `boost_usage_tracker.models`. Cross-app: `GitHubRepository`, `GitHubFile` are from `github_activity_tracker.models`; `BoostFile` is from `boost_library_tracker.models`. --- - -## BoostExternalRepository - -| Function | Parameter types | Return type | Raises | -| ----------------------------------- | ----------------------------------------------------------------------------------------------------------- | -------------------------------------- | ------ | -| `get_or_create_boost_external_repo` | `github_repository: GitHubRepository`, `boost_version=""`, `is_boost_embedded=False`, `is_boost_used=False` | `tuple[BoostExternalRepository, bool]` | — | -| `update_boost_external_repo` | `ext_repo: BoostExternalRepository`, `boost_version=None`, `is_boost_embedded=None`, `is_boost_used=None` | `BoostExternalRepository` | — | - ---- - -## BoostUsage - -| Function | Parameter types | Return type | Raises | -| ------------------------------------ | ----------------------------------------------------------------------------------- | ------------------------------------------------ | ------ | -| `create_or_update_boost_usage` | `repo`, `boost_header: BoostFile`, `file_path: GitHubFile`, `last_commit_date=None` | `tuple[BoostUsage, bool]` | — | -| `mark_usage_excepted` | `usage: BoostUsage` | `BoostUsage` | — | -| `get_active_usages_for_repo` | `repo: BoostExternalRepository` | `list[BoostUsage]` | — | -| `get_or_create_missing_header_usage` | `repo`, `file_path: GitHubFile`, `header_name: str`, `last_commit_date=None` | `tuple[BoostUsage, BoostMissingHeaderTmp, bool]` | — | + + +## Public API (generated) + +| Function | Parameters | Return type | Summary | +| --- | --- | --- | --- | +| `bulk_create_or_update_boost_usage` | repo: BoostExternalRepository, items: list[tuple['BoostFile', 'GitHubFile', Optional[datetime]]] | tuple[int, int] | Create or update many BoostUsage rows in bulk. | +| `create_or_update_boost_usage` | repo: BoostExternalRepository, boost_header: 'BoostFile', file_path: 'GitHubFile', last_commit_date: Optional[datetime] = None | tuple[BoostUsage, bool] | Create or update a BoostUsage record. | +| `get_active_usages_for_repo` | repo: BoostExternalRepository | list[BoostUsage] | Return all active (non-excepted) BoostUsage records for *repo*. | +| `get_or_create_boost_external_repo` | github_repository: 'GitHubRepository', boost_version: str = '', is_boost_embedded: bool = False, is_boost_used: bool = False | tuple[BoostExternalRepository, bool] | Get or create BoostExternalRepository for a GitHubRepository (multi-table inheritance). | +| `get_or_create_missing_header_usage` | repo: BoostExternalRepository, file_path: 'GitHubFile', header_name: str, last_commit_date: Optional[datetime] = None | tuple[BoostUsage, BoostMissingHeaderTmp, bool] | Get or create a placeholder BoostUsage (boost_header=null) and a BoostMissingHeaderTmp. | +| `mark_usage_excepted` | usage: BoostUsage | BoostUsage | Mark a BoostUsage record as excepted (include no longer detected). | +| `mark_usages_excepted_bulk` | usage_ids: list[int] | int | Set excepted_at to today for multiple BoostUsage rows in one query. | +| `update_boost_external_repo` | ext_repo: BoostExternalRepository, boost_version: Optional[str] = None, is_boost_embedded: Optional[bool] = None, is_boost_used: Optional[bool] = None | BoostExternalRepository | Update mutable fields on an existing BoostExternalRepository. | + + **Note:** `get_or_create_missing_header_usage` creates or reuses a placeholder `BoostUsage` with `boost_header=None` and a `BoostMissingHeaderTmp` row for the unresolved `header_name`. Used when the header is not yet in BoostFile/GitHubFile. diff --git a/docs/service_api/clang_github_tracker.md b/docs/service_api/clang_github_tracker.md index 3e53ff51..4beb0ac3 100644 --- a/docs/service_api/clang_github_tracker.md +++ b/docs/service_api/clang_github_tracker.md @@ -6,23 +6,21 @@ **Type notation:** Models live in `clang_github_tracker.models`. --- + -## Upserts +## Public API (generated) -| Function | Parameters | Return | Raises | -| -------- | ---------- | ------ | ------ | -| `upsert_issue_item` | `number: int`, `*, is_pull_request: bool`, `github_created_at`, `github_updated_at` | `tuple[ClangGithubIssueItem, bool]` (instance, created) | — | -| `upsert_commit` | `sha: str`, `*, github_committed_at` | `tuple[ClangGithubCommit, bool]` | `ValueError` if `sha` is not 40 hex chars | +| Function | Parameters | Return type | Summary | +| --- | --- | --- | --- | +| `get_commit_watermark` | | Optional[datetime] | Max ``github_committed_at`` across commits (API fetch cursor base). | +| `get_issue_item_watermark` | | Optional[datetime] | Max ``github_updated_at`` across issues and PRs (API fetch cursor base). | +| `start_after_watermark` | max_dt: datetime \| None | datetime \| None | Return ``max + 1ms`` for API fetch lower bound, or ``None`` if no watermark. | +| `upsert_commit` | sha: str, github_committed_at: datetime \| None | tuple[ClangGithubCommit, bool] | Create or update a ClangGithubCommit by ``sha``. Returns (instance, created). | +| `upsert_commits_batch` | rows: Sequence[tuple[str, datetime \| None]], batch_size: int = DEFAULT_UPSERT_BATCH_SIZE | tuple[int, int] | Batch upsert commits by ``sha``. Skips rows whose sha is not 40 chars. | +| `upsert_issue_item` | number: int, is_pull_request: bool, github_created_at: datetime \| None, github_updated_at: datetime \| None | tuple[ClangGithubIssueItem, bool] | Create or update a ClangGithubIssueItem by ``number``. Returns (instance, created). | +| `upsert_issue_items_batch` | rows: Sequence[tuple[int, bool, datetime \| None, datetime \| None]], batch_size: int = DEFAULT_UPSERT_BATCH_SIZE | tuple[int, int] | Batch upsert issue/PR rows by ``number``. | ---- - -## API fetch watermarks - -| Function | Return | Notes | -| -------- | ------ | ----- | -| `get_issue_item_watermark` | `datetime \| None` | `Max(github_updated_at)` over all issue/PR rows (unified issues+PR stream). | -| `get_commit_watermark` | `datetime \| None` | `Max(github_committed_at)` over commits. | -| `start_after_watermark` | `datetime \| None` | `max_dt + timedelta(milliseconds=1)` or `None` if `max_dt` is `None`. | + Used by `clang_github_tracker.state_manager.resolve_start_end_dates` (with optional CLI `--since` / `--until` bounds). diff --git a/docs/service_api/core_protocols.md b/docs/service_api/core_protocols.md new file mode 100644 index 00000000..36fdf35c --- /dev/null +++ b/docs/service_api/core_protocols.md @@ -0,0 +1,57 @@ +# core.protocols + +**Module path:** `core.protocols` +**Description:** Portable DTO protocols for tracker sync and collection boundaries (`TrackerResult`, `ActivityRecord`, `IncrementalState`). See also [Core public API](../Core_public_API.md). + +--- + + +## Protocol types (generated) + +### `ActivityRecord` + +Portable activity event (not a Django model). + +| Property | Type | +| --- | --- | +| `source_system` | str | +| `external_id` | str | +| `occurred_at` | str | +| `activity_type` | str | +| `actor_external_id` | str | +| `source_url` | str \| None | +| `summary` | str | + +### `IncrementalState` + +Serializable checkpoint between runs (opaque token + human marker + extras). + +| Property | Type | +| --- | --- | +| `checkpoint_token` | str \| None | +| `human_readable_marker` | str \| None | +| `extras` | Mapping[str, Any] | + +### `TrackerResult` + +Outcome of one logical collection or sync cycle. + +| Property | Type | +| --- | --- | +| `success` | bool | +| `counts` | Mapping[str, int] | + +## Module functions (generated) + +| Function | Parameters | Return type | Summary | +| --- | --- | --- | --- | +| `require_activity_record` | obj: object | ActivityRecord | Return *obj* if it satisfies :class:`ActivityRecord`; else raise ``TypeError``. | +| `require_tracker_result` | obj: object | TrackerResult | Return *obj* if it satisfies :class:`TrackerResult`; else raise ``TypeError``. | + + + +## Related + +- [Core public API](../Core_public_API.md) — orchestration vs data protocols +- [Service API index](README.md) +- [Contributing](../Contributing.md) diff --git a/docs/service_api/cppa_pinecone_sync.md b/docs/service_api/cppa_pinecone_sync.md index b734355f..b02aa89b 100644 --- a/docs/service_api/cppa_pinecone_sync.md +++ b/docs/service_api/cppa_pinecone_sync.md @@ -5,67 +5,21 @@ Module: `cppa_pinecone_sync.services` All creates/updates/deletes for `PineconeFailList` and `PineconeSyncStatus` must go through this module. See [Contributing.md](../Contributing.md). --- + -## PineconeFailList +## Public API (generated) -### `get_failed_ids(app_type: str) -> list[str]` +| Function | Parameters | Return type | Summary | +| --- | --- | --- | --- | +| `clear_failed_ids` | app_type: str | int | Delete all PineconeFailList records for the given app_type. Returns count deleted. | +| `get_failed_ids` | app_type: str | list[str] | Return all failed_id values for the given app_type. | +| `get_final_sync_at` | app_type: str | Optional[datetime] | Return final_sync_at for the given app_type, or None if no record exists. | +| `record_failed_ids` | app_type: str, failed_ids: list[str] | list[PineconeFailList] | Bulk-create PineconeFailList entries for each failed_id. Returns created objects. | +| `update_sync_status` | app_type: str, final_sync_at: Optional[datetime] = None | PineconeSyncStatus | Create or update PineconeSyncStatus for the given app_type. | -Return all `failed_id` values for the given application. + -| Parameter | Type | Description | -| --------- | ----- | ------------------------------------ | -| `app_type` | `str` | Application type (e.g. `"slack"`). | +## Related -**Returns:** `list[str]` of failed_id values. - ---- - -### `clear_failed_ids(app_type: str) -> int` - -Delete all `PineconeFailList` records for the given application. - -| Parameter | Type | Description | -| --------- | ----- | --------------- | -| `app_type` | `str` | Application type. | - -**Returns:** `int` — number of rows deleted. - ---- - -### `record_failed_ids(app_type: str, failed_ids: list[str]) -> list[PineconeFailList]` - -Bulk-create `PineconeFailList` entries for each failed ID. - -| Parameter | Type | Description | -| ------------ | ----------- | -------------------------------------- | -| `app_type` | `str` | Application type. | -| `failed_ids` | `list[str]` | List of source record IDs that failed. | - -**Returns:** `list[PineconeFailList]` — created objects. Empty list if `failed_ids` is empty. - ---- - -## PineconeSyncStatus - -### `get_final_sync_at(app_type: str) -> datetime | None` - -Return `final_sync_at` for the given application, or `None` if no record exists. - -| Parameter | Type | Description | -| --------- | ----- | --------------- | -| `app_type` | `str` | Application type. | - -**Returns:** `datetime | None`. - ---- - -### `update_sync_status(app_type: str, final_sync_at: datetime | None = None) -> PineconeSyncStatus` - -Create or update `PineconeSyncStatus` for the given application. Sets `final_sync_at` to the provided value, or `now()` if not given. - -| Parameter | Type | Description | -| --------------- | ------------------ | ---------------------------------------- | -| `app_type` | `str` | Application type. | -| `final_sync_at` | `datetime \| None` | Timestamp. Defaults to `timezone.now()`. | - -**Returns:** `PineconeSyncStatus` instance. +- [Service API index](README.md) +- [Contributing](../Contributing.md) diff --git a/docs/service_api/cppa_slack_tracker.md b/docs/service_api/cppa_slack_tracker.md new file mode 100644 index 00000000..b35e154c --- /dev/null +++ b/docs/service_api/cppa_slack_tracker.md @@ -0,0 +1,26 @@ +# cppa_slack_tracker.services + +**Module path:** `cppa_slack_tracker.services` +**Description:** Service layer for Slack tracker models. All creates/updates/deletes for this app's models must go through functions in this module. + +**Type notation:** Model types refer to `cppa_slack_tracker.models` unless noted in docstrings. + +--- + + +## Public API (generated) + +| Function | Parameters | Return type | Summary | +| --- | --- | --- | --- | +| `add_channel_membership_change` | channel: SlackChannel, slack_user_id: str, ts: str, is_joined: bool | SlackChannelMembershipChangeLog | Record a channel join/leave and update current membership. Returns the change log entry. Raises ValueError if user not found. | +| `get_or_create_slack_channel` | slack_channel: dict[str, Any], team: SlackTeam | tuple[Optional[SlackChannel], bool] | Get or create a Slack channel. Returns (channel, created); channel is None when skipped. | +| `get_or_create_slack_team` | team_data: dict[str, Any] | tuple[SlackTeam, bool] | Get or create a Slack team (workspace). Requires team_data['team_id']. Returns (SlackTeam, created). | +| `save_slack_message` | channel: SlackChannel, slack_message: dict[str, Any] | Optional[SlackMessage] | Save or update a Slack message from a Slack API payload. | +| `sync_channel_memberships` | channel: SlackChannel, member_ids: list[str] | None | Sync current channel memberships to match member_ids (add new, mark removed as deleted). | + + + +## Related + +- [Service API index](README.md) +- [Contributing](../Contributing.md) diff --git a/docs/service_api/cppa_user_tracker.md b/docs/service_api/cppa_user_tracker.md index 8f506423..b07f994e 100644 --- a/docs/service_api/cppa_user_tracker.md +++ b/docs/service_api/cppa_user_tracker.md @@ -6,66 +6,30 @@ **Type notation:** Model types refer to `cppa_user_tracker.models` (e.g. `Identity`, `BaseProfile`, `Email`). --- - -## Identity - -| Function | Parameter types | Return type | Description | -| ------------------------ | ---------------------------------------------------------------------------------- | ----------------------- | --------------------------------------------------------------------------------------- | -| `create_identity` | `display_name: str = ""`, `description: str = ""` | `Identity` | Create a new Identity. | -| `get_or_create_identity` | `display_name: str = ""`, `description: str = ""`, `defaults: dict \| None = None` | `tuple[Identity, bool]` | Get or create an Identity by `display_name`. `defaults` overrides fields when creating. | - ---- - -## TmpIdentity - -| Function | Parameter types | Return type | Description | -| --------------------- | ------------------------------------------------- | ------------- | ------------------------------- | -| `create_tmp_identity` | `display_name: str = ""`, `description: str = ""` | `TmpIdentity` | Create a TmpIdentity (staging). | - ---- - -## TempProfileIdentityRelation - -| Function | Parameter types | Return type | Description | -| --------------------------------------- | ----------------------------------------------------------- | ------------------------------------------ | ---------------------------------------------- | -| `add_temp_profile_identity_relation` | `base_profile: BaseProfile`, `target_identity: TmpIdentity` | `tuple[TempProfileIdentityRelation, bool]` | Link a BaseProfile to a TmpIdentity (staging). | -| `remove_temp_profile_identity_relation` | `base_profile: BaseProfile`, `target_identity: TmpIdentity` | `None` | Remove the staging relation. | - ---- - -## MailingListProfile - -| Function | Parameter types | Return type | Description | -| ------------------------------------ | ------------------------------------------- | --------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `get_or_create_mailing_list_profile` | `display_name: str = ""`, `email: str = ""` | `tuple[MailingListProfile, bool]` | Get or create a MailingListProfile by display_name and email. Looks up a profile with this display_name and an Email with this address; if found, returns it. Otherwise creates a new profile, adds the email via `add_email`, and returns the new profile. Raises `ValueError` if `display_name` or `email` is missing or empty. | - ---- - -## WG21PaperAuthorProfile - -| Function | Parameter types | Return type | Description | -| -------------------------------------- | -------------------------------------------- | -------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `get_or_create_wg21_paper_author_profile` | `display_name: str`, `email: str \| None = None` | `tuple[WG21PaperAuthorProfile, bool]` | Resolve by display_name (optional email for disambiguation). If no profile exists, creates one and adds email if provided. If one exists, returns it. If multiple exist and one matches the email, returns that profile. If multiple exist and no email is provided, returns the first. If multiple exist and the supplied email matches none, creates a new profile with that email. **Side effect:** if `email` is supplied and the resolved or created profile does not already have that email, the function associates it with the profile (so existing profiles may be updated). Returns the profile and a boolean indicating creation. Use when linking paper authors so that same name + same email link to the same profile. | - ---- - -## DiscordProfile - -| Function | Parameter types | Return type | Description | -| ---------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------- | ----------------------------------------------------------------------------------------------------------------------------- | -| `get_or_create_discord_profile` | `discord_user_id: int`, `username: str = ""`, `display_name: str = ""`, `avatar_url: str = ""`, `is_bot: bool = False`, `identity: Identity \| None = None` | `tuple[DiscordProfile, bool]` | Get or create a DiscordProfile by `discord_user_id`. Updates username, display_name, avatar_url, is_bot if profile exists. | - ---- - -## Email - -| Function | Parameter types | Return type | Description | -| -------------- | ----------------------------------------------------------------------------------------------- | ----------- | ------------------------------------------------------------------ | -| `add_email` | `base_profile: BaseProfile`, `email: str`, `is_primary: bool = False`, `is_active: bool = True` | `Email` | Add an email to a BaseProfile. | -| `update_email` | `email_obj: Email`, `**kwargs: Any` | `Email` | Update an Email. Allowed keys: `email`, `is_primary`, `is_active`. | -| `remove_email` | `email_obj: Email` | `None` | Delete an email. | - ---- + + +## Public API (generated) + +| Function | Parameters | Return type | Summary | +| --- | --- | --- | --- | +| `add_email` | base_profile: BaseProfile, email: str, is_primary: bool = False, is_active: bool = True | Email | Add an email to a BaseProfile. Returns the new Email. | +| `add_temp_profile_identity_relation` | base_profile: BaseProfile, target_identity: TmpIdentity | tuple[TempProfileIdentityRelation, bool] | Link a BaseProfile to a TmpIdentity (staging). Returns (relation, created). | +| `create_identity` | display_name: str = '', description: str = '' | Identity | Create an Identity. Returns the new Identity. | +| `create_tmp_identity` | display_name: str = '', description: str = '' | TmpIdentity | Create a TmpIdentity (staging). Returns the new TmpIdentity. | +| `get_or_create_discord_profile` | discord_user_id: int, username: str = '', display_name: str = '', avatar_url: str = '', is_bot: bool = False, identity: Optional[Identity] = None | tuple[DiscordProfile, bool] | Get or create a DiscordProfile by discord_user_id. Returns (profile, created). | +| `get_or_create_github_account` | github_account_id: int, username: str = '', display_name: str = '', avatar_url: str = '', account_type: str = GitHubAccountType.USER, identity: Optional[Identity] = None | tuple[GitHubAccount, bool] | Get or create a GitHubAccount by github_account_id. Returns (account, created). | +| `get_or_create_identity` | display_name: str = '', description: str = '', defaults: Optional[dict[str, Any]] = None | tuple[Identity, bool] | Get or create an Identity by display_name. If exists, updates description from defaults. | +| `get_or_create_mailing_list_profile` | display_name: str = '', email: str = '' | tuple[MailingListProfile, bool] | Get or create a MailingListProfile by display_name and email. Returns (profile, created). | +| `get_or_create_owner_account` | client: GitHubClientProtocol, owner: str | GitHubAccount | Get or create a GitHubAccount for an owner (org or user). For use by any app. | +| `get_or_create_slack_user` | user_data: dict[str, Any] | tuple[SlackUser, bool] | Get or create a SlackUser from Slack API user data. Returns (SlackUser, created). | +| `get_or_create_unknown_github_account` | name: Optional[str] = None, email: str = '' | tuple[GitHubAccount, bool] | Get or create a GitHubAccount for commits with no API author/committer. | +| `get_or_create_wg21_paper_author_profile` | display_name: str, email: Optional[str] = None | tuple[WG21PaperAuthorProfile, bool] | Get or create a WG21PaperAuthorProfile by display_name, with optional email disambiguation. | +| `get_or_create_youtube_speaker` | external_id: str, display_name: str = '', identity: Optional[Identity] = None | tuple[YoutubeSpeaker, bool] | Get or create a YoutubeSpeaker by external_id. Returns (speaker, created). | +| `remove_email` | email_obj: Email | None | Remove an email from a profile. | +| `remove_temp_profile_identity_relation` | base_profile: BaseProfile, target_identity: TmpIdentity | None | Remove the staging relation between base_profile and target_identity. | +| `update_email` | email_obj: Email, **kwargs: Any | Email | Update an Email instance. Allowed keys: email, is_primary, is_active. | + + ## Related diff --git a/docs/service_api/cppa_youtube_script_tracker.md b/docs/service_api/cppa_youtube_script_tracker.md index a429912a..aff26eb6 100644 --- a/docs/service_api/cppa_youtube_script_tracker.md +++ b/docs/service_api/cppa_youtube_script_tracker.md @@ -6,21 +6,21 @@ **Type notation:** Model types refer to `cppa_youtube_script_tracker.models` unless noted. `YoutubeSpeaker` refers to `cppa_user_tracker.models.YoutubeSpeaker`. --- + -## YouTubeChannel +## Public API (generated) -| Function | Parameter types | Return type | Description | -| ----------------------- | ------------------------------------------------ | ---------------- | ------------------------------------------------------------------------------- | -| `get_or_create_channel` | `channel_id: str`, `channel_title: str = ""` | `YouTubeChannel` | Get or create channel by `channel_id`; updates `channel_title` if it has changed. | +| Function | Parameters | Return type | Summary | +| --- | --- | --- | --- | +| `get_or_create_channel` | channel_id: str, channel_title: str = '' | YouTubeChannel | Get or create a YouTubeChannel by channel_id (PK). | +| `get_or_create_tag` | tag_name: str | CppaTags | Get or create a CppaTags entry by tag_name. | +| `get_or_create_video` | video_id: str, channel: Optional[YouTubeChannel], metadata_dict: dict[str, Any] | tuple[YouTubeVideo, bool] | Get or create a YouTubeVideo by video_id (PK). Returns (video, created). | +| `link_speaker_to_video` | video: YouTubeVideo, speaker: Any | YouTubeVideoSpeaker | Link a YoutubeSpeaker to a YouTubeVideo (get-or-create). Returns YouTubeVideoSpeaker. | +| `link_tag_to_video` | video: YouTubeVideo, tag: CppaTags | YouTubeVideoTags | Link a CppaTags entry to a YouTubeVideo (get-or-create). Returns YouTubeVideoTags. | +| `remove_speaker_links_by_name` | video: YouTubeVideo, speaker_name: str | int | Remove all speaker links for a video where speaker.display_name matches speaker_name. | +| `update_video_transcript` | video: YouTubeVideo, transcript_path: str | YouTubeVideo | Mark video as having a transcript and store its path. Returns the updated video. | ---- - -## YouTubeVideo - -| Function | Parameter types | Return type | Description | -| ---------------------- | ---------------------------------------------------------------------------------- | ------------------------ | ----------------------------------------------------------------------------------------------- | -| `get_or_create_video` | `video_id: str`, `channel: YouTubeChannel \| None`, `metadata_dict: dict` | `tuple[YouTubeVideo, bool]` | Get or create video by `video_id`. Raises `ValueError` if `video_id` is empty. | -| `update_video_transcript` | `video: YouTubeVideo`, `transcript_path: str` | `YouTubeVideo` | Set `has_transcript=True` and `transcript_path` on the video; saves `update_fields`. | + `metadata_dict` accepted keys: @@ -40,14 +40,6 @@ Tags are not part of `metadata_dict`; use `get_or_create_tag` and `link_tag_to_v --- -## YouTubeVideoSpeaker - -| Function | Parameter types | Return type | Description | -| --------------------- | --------------------------------------------- | -------------------- | -------------------------------------------------------- | -| `link_speaker_to_video` | `video: YouTubeVideo`, `speaker: YoutubeSpeaker` | `YouTubeVideoSpeaker` | Get-or-create M2M link between a video and a speaker. | - ---- - ## YoutubeSpeaker (in cppa_user_tracker) | Function | Parameter types | Return type | Description | diff --git a/docs/service_api/discord_activity_tracker.md b/docs/service_api/discord_activity_tracker.md index 3afc5fd9..ae628521 100644 --- a/docs/service_api/discord_activity_tracker.md +++ b/docs/service_api/discord_activity_tracker.md @@ -6,6 +6,26 @@ **Type notation:** Model types refer to `discord_activity_tracker.models` unless noted. `DiscordProfile` refers to `cppa_user_tracker.models.DiscordProfile`. --- + + +## Public API (generated) + +| Function | Parameters | Return type | Summary | +| --- | --- | --- | --- | +| `add_or_update_reaction` | message: DiscordMessage, emoji: str, count: int | Tuple[DiscordReaction, bool] | Upsert one reaction row per (message, emoji) with the given reaction count. | +| `bulk_process_message_batch` | message_data_list: List[Dict[str, Any]], channel: DiscordChannel | int | Run user upsert, message upsert, and reaction upsert inside one DB transaction. | +| `bulk_upsert_discord_messages` | message_data_list: List[Dict[str, Any]], channel: DiscordChannel, user_map: Dict[int, DiscordProfile] | Dict[int, DiscordMessage] | Bulk upsert messages for one channel using ``bulk_create(update_conflicts=True)``. | +| `bulk_upsert_discord_reactions` | reaction_data_list: List[Dict[str, Any]], message_map: Dict[int, DiscordMessage] | None | Bulk upsert reactions using ``bulk_create(update_conflicts=True)``. | +| `bulk_upsert_discord_users` | user_data_list: List[Dict[str, Any]] | Dict[int, DiscordProfile] | Upsert author profiles for a batch of messages. | +| `create_or_update_discord_message` | message_id: int, channel: DiscordChannel, author: DiscordProfile, content: str, message_created_at: datetime, message_edited_at: Optional[datetime] = None, reply_to_message_id: Optional[int] = None, attachment_urls: Optional[list] = None, message_type: str = 'Default', is_pinned: bool = False | Tuple[DiscordMessage, bool] | Create or update a single message by Discord ``message_id`` (upsert). | +| `get_active_channels` | server: DiscordServer, days: int = 30, channel_ids: Optional[List[int]] = None | QuerySet[DiscordChannel] | Same as ``queryset_channels_with_recent_messages`` with ``cutoff = now - days``. | +| `get_channel_latest_message_at` | channel: DiscordChannel | Optional[datetime] | Return the latest ``message_created_at`` among non-deleted messages in a channel. | +| `get_or_create_discord_channel` | server: DiscordServer, channel_id: int, channel_name: str, channel_type: str, topic: str = '', position: int = 0, category_id: Optional[int] = None, category_name: str = '' | Tuple[DiscordChannel, bool] | Get or create a channel row and refresh fields when the row already exists. | +| `get_or_create_discord_server` | server_id: int, server_name: str, icon_url: str = '' | Tuple[DiscordServer, bool] | Get or create a Discord guild (server) row and refresh metadata when it already exists. | +| `mark_message_deleted` | message: DiscordMessage, deleted_at: Optional[datetime] = None | DiscordMessage | Soft-delete a message: set ``is_deleted`` and ``deleted_at``. | +| `queryset_channels_with_recent_messages` | server: DiscordServer, cutoff: datetime, channel_ids: Optional[List[int]] = None | QuerySet[DiscordChannel] | Channels on ``server`` with at least one non-deleted message at or after ``cutoff``. | + + ## Service contract @@ -36,68 +56,6 @@ Collectors, management commands, and sync layers classify failures with [`classi --- -## DiscordServer - -| Function | Parameter types | Return type | Description | -| ----------------------------- | ------------------------------------------------------------------ | ---------------------------- | ----------------------------------------------------------------- | -| `get_or_create_discord_server` | `server_id: int`, `server_name: str`, `icon_url: str = ""` | `tuple[DiscordServer, bool]` | Get or create server; update name/icon if changed. | - ---- - -## DiscordChannel - -New fields (migration `0005`): `category_id: BigIntegerField | null`, `category_name: CharField`. - -| Function | Parameter types | Return type | Description | -| ------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------ | ------------------------------------------------------------------------- | -| `get_or_create_discord_channel` | `server: DiscordServer`, `channel_id: int`, `channel_name: str`, `channel_type: str`, `topic: str = ""`, `position: int = 0`, `category_id: int \| None = None`, `category_name: str = ""` | `tuple[DiscordChannel, bool]` | Get or create channel; update all fields (incl. category) if changed. | -| `get_channel_latest_message_at` | `channel: DiscordChannel` | `datetime \| None` | Max `message_created_at` among non-deleted `DiscordMessage` rows for the channel. | -| `queryset_channels_with_recent_messages` | `server: DiscordServer`, `cutoff: datetime`, `channel_ids: list[int] \| None = None` | `QuerySet[DiscordChannel]` | Channels on the server with at least one non-deleted message at or after `cutoff`; optional Discord `channel_id` allowlist. | - ---- - -## DiscordMessage - -New fields (migration `0005`): `message_type: CharField` (default `"Default"`), `is_pinned: BooleanField` (default `False`). - -| Function | Parameter types | Return type | Description | -| ---------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------ | ------------------------------ | -| `create_or_update_discord_message` | `message_id: int`, `channel: DiscordChannel`, `author: DiscordProfile`, `content: str`, `message_created_at: datetime`, `message_edited_at: datetime \| None = None`, `reply_to_message_id: int \| None = None`, `attachment_urls: list \| None = None`, `message_type: str = "Default"`, `is_pinned: bool = False` | `tuple[DiscordMessage, bool]` | Create or update message. | -| `mark_message_deleted` | `message: DiscordMessage`, `deleted_at: datetime \| None = None` | `DiscordMessage` | Mark message as deleted. | - ---- - -## DiscordReaction - -| Function | Parameter types | Return type | Description | -| ------------------------ | ------------------------------------------------------ | ------------------------------- | ------------------------ | -| `add_or_update_reaction` | `message: DiscordMessage`, `emoji: str`, `count: int` | `tuple[DiscordReaction, bool]` | Add or update reaction. | - ---- - -## Bulk operations - -Message and reaction upserts use `bulk_create(update_conflicts=True)` on `DiscordMessage` and `DiscordReaction`. **`bulk_upsert_discord_users`** does not: `DiscordProfile` uses multi-table inheritance, so users are deduplicated and updated with targeted queries / `get_or_create_discord_profile` per missing row (see `services.py`). - -Inputs are lists of pre-normalised message dicts (from `sync.messages._prepare_message_data` or `sync.chat_exporter.convert_exporter_message_to_dict`). - -| Function | Parameter types | Return type | Description | -| ----------------------------- | ------------------------------------------------------------------------------------------------------- | ----------- | ----------------------------------------------------------------------------------------------- | -| `bulk_upsert_discord_users` | `user_data_list: list[dict]` | `dict[int, DiscordProfile]` | Upsert `DiscordProfile` rows; returns `{discord_user_id: profile}`. | -| `bulk_upsert_discord_messages` | `message_data_list: list[dict]`, `channel: DiscordChannel`, `user_map: dict[int, DiscordProfile]` | `dict[int, DiscordMessage]` | Upsert `DiscordMessage` rows incl. `message_type` and `is_pinned`; returns `{message_id: msg}`. | -| `bulk_upsert_discord_reactions` | `reaction_data_list: list[dict]`, `message_map: dict[int, DiscordMessage]` | `None` | Upsert `DiscordReaction` rows. | -| `bulk_process_message_batch` | `message_data_list: list[dict]`, `channel: DiscordChannel` | `int` | Runs users → messages → reactions inside one `transaction.atomic()`. Return value is **`len(message_data_list)`** when non-empty (not the count of rows actually upserted); see **Raises and edge behavior** above. | - ---- - -## Query helpers - -| Function | Parameter types | Return type | Description | -| --------------------- | ------------------------------------------------------------------ | ----------- | --------------------------------------------------- | -| `get_active_channels` | `server: DiscordServer`, `days: int = 30`, `channel_ids: list[int] \| None = None` | `QuerySet` | Same as `queryset_channels_with_recent_messages` with `cutoff = now - days`. | - ---- - ## Sync package (`discord_activity_tracker.sync`) | Module / symbol | Role | diff --git a/docs/service_api/github_activity_tracker.md b/docs/service_api/github_activity_tracker.md index 570281ea..6bd785e1 100644 --- a/docs/service_api/github_activity_tracker.md +++ b/docs/service_api/github_activity_tracker.md @@ -6,63 +6,40 @@ **Type notation:** Model types refer to `github_activity_tracker.models`. Cross-app: `GitHubAccount` is `cppa_user_tracker.models.GitHubAccount`. --- - -## Language - -| Function | Parameter types | Return type | Description | Raises | -| ------------------------ | --------------- | -------------------- | --------------------------------- | ------ | -| `get_or_create_language` | `name: str` | `tuple[Language, bool]` | Get or create a Language by name. | `ValueError` if `name` is empty or whitespace-only. | - ---- - -## License - -| Function | Parameter types | Return type | Description | Raises | -| ----------------------- | ----------------------------------- | ------------------- | -------------------------------- | ------ | -| `get_or_create_license` | `name: str`, `spdx_id: str = ""`, `url: str = ""` | `tuple[License, bool]` | Get or create a License by name. | `ValueError` if `name` is empty or whitespace-only. | - ---- - -## GitHubRepository - -| Function | Parameter types | Return type | Description | -| -------------------------- | ----------------------------------------------- | --------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- | -| `get_or_create_repository` | `owner_account: GitHubAccount`, `repo_name: str`, `**defaults: Any` | `tuple[GitHubRepository, bool]` | Get or create a repo by owner and name. `defaults` used when creating (e.g. `stars`, `forks`, `description`, `repo_pushed_at`). | -| `add_repo_license` | `repo: GitHubRepository`, `license_obj: License` | `None` | Add a License to a repo (M2M). Idempotent. | -| `remove_repo_license` | `repo: GitHubRepository`, `license_obj: License` | `None` | Remove a License from a repo. | - ---- - -## RepoLanguage - -| Function | Parameter types | Return type | Description | -| --------------------------------- | --------------------------------------- | ----------------------------- | ------------------------------------------------------- | -| `add_repo_language` | `repo: GitHubRepository`, `language: Language`, `line_count: int = 0` | `tuple[RepoLanguage, bool]` | Add or get repo–language link with `line_count`. | -| `update_repo_language_line_count` | `repo: GitHubRepository`, `language: Language`, `line_count: int` | `RepoLanguage` | Update `line_count` for an existing repo–language link. | - ---- - -## Issue (assignees and labels) - -| Function | Parameter types | Return type | Description | -| ----------------------- | -------------------------- | --------------------------- | ---------------------------------------------- | -| `add_issue_assignee` | `issue: Issue`, `account: GitHubAccount` | `None` | Add an assignee to an issue (M2M). Idempotent. | -| `remove_issue_assignee` | `issue: Issue`, `account: GitHubAccount` | `None` | Remove an assignee from an issue. | -| `add_issue_label` | `issue: Issue`, `label_name: str` | `tuple[IssueLabel, bool]` | Add a label to an issue. | -| `remove_issue_label` | `issue: Issue`, `label_name: str` | `None` | Remove a label from an issue. | - ---- - -## Pull request (assignees and labels) - -| Function | Parameter types | Return type | Description | -| --------------------------- | ----------------------- | --------------------------------- | ------------------------------------------ | -| `add_pr_assignee` | `pr: PullRequest`, `account: GitHubAccount` | `None` | Add an assignee to a PR (M2M). Idempotent. | -| `remove_pr_assignee` | `pr: PullRequest`, `account: GitHubAccount` | `None` | Remove an assignee from a PR. | -| `add_pull_request_label` | `pr: PullRequest`, `label_name: str` | `tuple[PullRequestLabel, bool]` | Add a label to a pull request. | -| `remove_pull_request_label` | `pr: PullRequest`, `label_name: str` | `None` | Remove a label from a pull request. | - ---- + + +## Public API (generated) + +| Function | Parameters | Return type | Summary | +| --- | --- | --- | --- | +| `add_commit_file_change` | commit: GitCommit, github_file: GitHubFile, status: str, additions: int = 0, deletions: int = 0, patch: str = '' | tuple[GitCommitFileChange, bool] | Add or update a file change for a commit. If exists, updates status, additions, deletions, patch. Returns (file_change, created). | +| `add_issue_assignee` | issue: Issue, account: GitHubAccount | None | Add an assignee to an issue (M2M). Idempotent. | +| `add_issue_label` | issue: Issue, label_name: str | tuple[IssueLabel, bool] | Add a label to an issue. Returns (IssueLabel, created). | +| `add_pr_assignee` | pr: PullRequest, account: GitHubAccount | None | Add an assignee to a PR (M2M). Idempotent. | +| `add_pull_request_label` | pr: PullRequest, label_name: str | tuple[PullRequestLabel, bool] | Add a label to a pull request. Returns (PullRequestLabel, created). | +| `add_repo_language` | repo: GitHubRepository, language: Language, line_count: int = 0 | tuple[RepoLanguage, bool] | Add or update a repo–language link with line_count. If exists, updates line_count. Returns (RepoLanguage, created). | +| `add_repo_license` | repo: GitHubRepository, license_obj: License | None | Add a License to a repo (M2M). Idempotent. | +| `create_or_update_commit` | repo: GitHubRepository, account: GitHubAccount, commit_hash: str, comment: str = '', commit_at: Optional[datetime] = None | tuple[GitCommit, bool] | Create or update a GitCommit by repo + commit_hash. Returns (commit, created). | +| `create_or_update_created_repos_by_language` | language: Language, year: int, all_repos: int, significant_repos: int | tuple[CreatedReposByLanguage, bool] | Create or update CreatedReposByLanguage for (language, year). | +| `create_or_update_github_file` | repo: GitHubRepository, filename: str, is_deleted: bool = False | tuple[GitHubFile, bool] | Create or update a GitHubFile by repo + filename. Returns (file, created). | +| `create_or_update_issue` | repo: GitHubRepository, account: GitHubAccount, issue_number: int, issue_id: int, title: str = '', body: str = '', state: str = IssueState.OPEN, state_reason: str = '', issue_created_at: Optional[datetime] = None, issue_updated_at: Optional[datetime] = None, issue_closed_at: Optional[datetime] = None | tuple[Issue, bool] | Create or update an Issue by issue_id. Returns (issue, created). | +| `create_or_update_issue_comment` | issue: Issue, account: GitHubAccount, issue_comment_id: int, body: str = '', issue_comment_created_at: Optional[datetime] = None, issue_comment_updated_at: Optional[datetime] = None | tuple[IssueComment, bool] | Create or update an IssueComment by issue_comment_id. Returns (comment, created). | +| `create_or_update_pr_comment` | pr: PullRequest, account: GitHubAccount, pr_comment_id: int, body: str = '', pr_comment_created_at: Optional[datetime] = None, pr_comment_updated_at: Optional[datetime] = None | tuple[PullRequestComment, bool] | Create or update a PullRequestComment by pr_comment_id. Returns (comment, created). | +| `create_or_update_pr_review` | pr: PullRequest, account: GitHubAccount, pr_review_id: int, body: str = '', in_reply_to_id: Optional[int] = None, pr_review_created_at: Optional[datetime] = None, pr_review_updated_at: Optional[datetime] = None | tuple[PullRequestReview, bool] | Create or update a PullRequestReview by pr_review_id. Returns (review, created). | +| `create_or_update_pull_request` | repo: GitHubRepository, account: GitHubAccount, pr_number: int, pr_id: int, title: str = '', body: str = '', state: str = PullRequestState.OPEN, head_hash: str = '', base_hash: str = '', pr_created_at: Optional[datetime] = None, pr_updated_at: Optional[datetime] = None, pr_merged_at: Optional[datetime] = None, pr_closed_at: Optional[datetime] = None | tuple[PullRequest, bool] | Create or update a PullRequest by pr_id. Returns (pr, created). | +| `ensure_repository_owner` | repo: GitHubRepository, owner_account: GitHubAccount | None | Ensure repo has owner_account set (fixes rows with null owner_account_id). | +| `get_or_create_language` | name: str | tuple[Language, bool] | Get or create a Language by name. Returns (Language, created). | +| `get_or_create_license` | name: str, spdx_id: str = '', url: str = '' | tuple[License, bool] | Get or create a License by name. If exists, updates spdx_id and url. Returns (License, created). | +| `get_or_create_repository` | owner_account: GitHubAccount, repo_name: str, **defaults: Any | tuple[GitHubRepository, bool] | Get or create a GitHubRepository by owner_account and repo_name. If exists, updates fields in defaults. Returns (repo, created). | +| `remove_issue_assignee` | issue: Issue, account: GitHubAccount | None | Remove an assignee from an issue. | +| `remove_issue_label` | issue: Issue, label_name: str | None | Remove a label from an issue. | +| `remove_pr_assignee` | pr: PullRequest, account: GitHubAccount | None | Remove an assignee from a PR. | +| `remove_pull_request_label` | pr: PullRequest, label_name: str | None | Remove a label from a pull request. | +| `remove_repo_license` | repo: GitHubRepository, license_obj: License | None | Remove a License from a repo. | +| `set_github_file_previous_filename` | github_file: GitHubFile, previous_file: GitHubFile | None | Set the previous_filename reference for a renamed file. | +| `update_repo_language_line_count` | repo: GitHubRepository, language: Language, line_count: int | RepoLanguage | Update line_count for an existing repo–language link. | + + ## Not yet in API diff --git a/docs/service_api/wg21_paper_tracker.md b/docs/service_api/wg21_paper_tracker.md new file mode 100644 index 00000000..d4f8e0e1 --- /dev/null +++ b/docs/service_api/wg21_paper_tracker.md @@ -0,0 +1,25 @@ +# wg21_paper_tracker.services + +**Module path:** `wg21_paper_tracker.services` +**Description:** Database logic for WG21 Paper Tracker. Writes for this app's models go through this module. + +**Type notation:** Model types refer to `wg21_paper_tracker.models`. + +--- + + +## Public API (generated) + +| Function | Parameters | Return type | Summary | +| --- | --- | --- | --- | +| `get_or_create_mailing` | mailing_date: str, title: str | tuple[WG21Mailing, bool] | | +| `get_or_create_paper` | paper_id: str, url: str, title: str, document_date: date \| None, mailing: WG21Mailing, subgroup: str = '', author_names: Optional[list[str]] = None, author_emails: Optional[list[str]] = None, year: int \| None = None | tuple[WG21Paper, bool] | | +| `get_or_create_paper_author` | paper: WG21Paper, profile: WG21PaperAuthorProfile, author_order: int | tuple[WG21PaperAuthor, bool] | Get or create a WG21PaperAuthor link for (paper, profile), with author_order (1-based). Updates author_order on existing link if it differs. | +| `mark_paper_downloaded` | paper_id: str, year: int \| None = None | | | + + + +## Related + +- [Service API index](README.md) +- [Contributing](../Contributing.md) diff --git a/scripts/generate_service_docs.py b/scripts/generate_service_docs.py new file mode 100644 index 00000000..e6f188d8 --- /dev/null +++ b/scripts/generate_service_docs.py @@ -0,0 +1,434 @@ +#!/usr/bin/env python3 +""" +Generate markdown service API reference from ``*/services.py`` and ``core/protocols.py``. + +Usage: + python scripts/generate_service_docs.py # write docs + python scripts/generate_service_docs.py --check # exit 1 if docs drift + python scripts/generate_service_docs.py --app NAME + +Markers in each ``docs/service_api/.md`` (and ``core_protocols.md``) delimit the +machine-owned region; narrative content must live outside that region (below END). + + + ... generated tables ... + +""" + +from __future__ import annotations + +import argparse +import ast +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Iterable + +REPO_ROOT = Path(__file__).resolve().parent.parent +DOCS_SERVICE_API = REPO_ROOT / "docs" / "service_api" + +MARKER_START = "" +MARKER_END = "" + +SKIP_TOPLEVEL_DIRS = frozenset( + { + ".git", + ".github", + ".venv", + "venv", + "node_modules", + "htmlcov", + "staticfiles", + "build", + ".pytest_cache", + ".tox", + "site-packages", + "__pycache__", + "docs", + "scripts", + "code_cleaner", + "woring_report", + } +) + + +def _cell(text: str) -> str: + t = text.replace("\r\n", "\n").replace("\r", "\n").strip() + t = t.replace("|", "\\|") + t = " ".join(t.split()) + return t + + +def _first_paragraph_docstring(node: ast.AST) -> str: + raw = ast.get_docstring(node, clean=True) + if not raw: + return "" + return raw.split("\n\n", 1)[0].strip() + + +def _unparse(node: ast.AST | None, source: str) -> str: + if node is None: + return "" + if hasattr(ast, "unparse"): + return ast.unparse(node).strip() + raise RuntimeError("Python 3.9+ required (ast.unparse)") + + +def _format_args(func: ast.AsyncFunctionDef | ast.FunctionDef, source: str) -> str: + parts: list[str] = [] + args = func.args + n_args = len(args.args) + n_defaults = len(args.defaults) + first_default = n_args - n_defaults + + def arg_str(a: ast.arg, default: ast.expr | None) -> str: + ann = _unparse(a.annotation, source) if a.annotation else "" + base = f"{a.arg}: {ann}" if ann else a.arg + if default is not None: + base += f" = {_unparse(default, source)}" + return base + + for a in args.posonlyargs: + parts.append(arg_str(a, None)) + for i, a in enumerate(args.args): + default: ast.expr | None = None + if i >= first_default: + default = args.defaults[i - first_default] + parts.append(arg_str(a, default)) + if args.vararg: + va = args.vararg + ann = _unparse(va.annotation, source) if va.annotation else "" + parts.append(f"*{va.arg}" + (f": {ann}" if ann else "")) + for a, d in zip(args.kwonlyargs, args.kw_defaults): + default = d + ann = _unparse(a.annotation, source) if a.annotation else "" + base = f"{a.arg}: {ann}" if ann else a.arg + if default is not None: + base += f" = {_unparse(default, source)}" + parts.append(base) + if args.kwarg: + ka = args.kwarg + ann = _unparse(ka.annotation, source) if ka.annotation else "" + parts.append(f"**{ka.arg}" + (f": {ann}" if ann else "")) + return ", ".join(parts) + + +@dataclass(frozen=True) +class ServiceFuncRow: + name: str + parameters: str + return_type: str + summary: str + + +def _extract_public_functions(source: str) -> list[ServiceFuncRow]: + tree = ast.parse(source) + rows: list[ServiceFuncRow] = [] + for node in tree.body: + if not isinstance(node, (ast.AsyncFunctionDef, ast.FunctionDef)): + continue + if node.name.startswith("_"): + continue + ret = "" + if node.returns: + ret = _unparse(node.returns, source) + rows.append( + ServiceFuncRow( + name=node.name, + parameters=_format_args(node, source), + return_type=ret, + summary=_first_paragraph_docstring(node), + ) + ) + rows.sort(key=lambda r: r.name) + return rows + + +def _render_service_table( + rows: Iterable[ServiceFuncRow], *, section_title: str = "## Public API (generated)" +) -> str: + lines = [ + section_title, + "", + "| Function | Parameters | Return type | Summary |", + "| --- | --- | --- | --- |", + ] + for r in rows: + fn = f"`{r.name}`" + lines.append( + "| " + + " | ".join(_cell(x) for x in (fn, r.parameters, r.return_type, r.summary)) + + " |" + ) + return "\n".join(lines) + "\n" + + +@dataclass(frozen=True) +class ProtocolProperty: + name: str + type_ann: str + + +@dataclass(frozen=True) +class ProtocolRow: + name: str + summary: str + properties: tuple[ProtocolProperty, ...] + + +def _extract_protocols(source: str) -> tuple[list[ProtocolRow], list[ServiceFuncRow]]: + tree = ast.parse(source) + protocols: list[ProtocolRow] = [] + helpers: list[ServiceFuncRow] = [] + + for node in tree.body: + if isinstance(node, (ast.AsyncFunctionDef, ast.FunctionDef)): + if not node.name.startswith("_"): + ret = _unparse(node.returns, source) if node.returns else "" + helpers.append( + ServiceFuncRow( + name=node.name, + parameters=_format_args(node, source), + return_type=ret, + summary=_first_paragraph_docstring(node), + ) + ) + elif isinstance(node, ast.ClassDef): + if not _class_has_runtime_checkable(node): + continue + if not _bases_protocol(node): + continue + props: list[ProtocolProperty] = [] + for item in node.body: + if isinstance(item, ast.AnnAssign) and isinstance( + item.target, ast.Name + ): + # rare + continue + if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)): + if item.name == "__init__": + continue + if _is_property_method(item): + ann = "" + if item.returns: + ann = _unparse(item.returns, source) + props.append(ProtocolProperty(name=item.name, type_ann=ann)) + protocols.append( + ProtocolRow( + name=node.name, + summary=_first_paragraph_docstring(node), + properties=tuple(props), + ) + ) + + protocols.sort(key=lambda p: p.name) + helpers.sort(key=lambda r: r.name) + return protocols, helpers + + +def _class_has_runtime_checkable(node: ast.ClassDef) -> bool: + for dec in node.decorator_list: + if isinstance(dec, ast.Name) and dec.id == "runtime_checkable": + return True + if isinstance(dec, ast.Attribute) and dec.attr == "runtime_checkable": + return True + if isinstance(dec, ast.Call): + if isinstance(dec.func, ast.Name) and dec.func.id == "runtime_checkable": + return True + if ( + isinstance(dec.func, ast.Attribute) + and dec.func.attr == "runtime_checkable" + ): + return True + return False + + +def _bases_protocol(node: ast.ClassDef) -> bool: + for b in node.bases: + if isinstance(b, ast.Name) and b.id == "Protocol": + return True + if isinstance(b, ast.Attribute) and b.attr == "Protocol": + return True + if isinstance(b, ast.Subscript): + if isinstance(b.value, ast.Name) and b.value.id == "Protocol": + return True + return False + + +def _is_property_method(node: ast.FunctionDef | ast.AsyncFunctionDef) -> bool: + for dec in node.decorator_list: + if isinstance(dec, ast.Name) and dec.id == "property": + return True + if isinstance(dec, ast.Attribute) and dec.attr == "property": + return True + return False + + +def _render_protocols_page( + protocols: list[ProtocolRow], helpers: list[ServiceFuncRow] +) -> str: + chunks: list[str] = ["## Protocol types (generated)", ""] + for p in protocols: + chunks.append(f"### `{p.name}`") + if p.summary: + chunks.append("") + chunks.append(p.summary) + chunks.append("") + chunks.append("| Property | Type |") + chunks.append("| --- | --- |") + for pr in p.properties: + chunks.append(f"| `{pr.name}` | {_cell(pr.type_ann)} |") + chunks.append("") + if helpers: + chunks.append( + _render_service_table( + helpers, section_title="## Module functions (generated)" + ) + ) + return "\n".join(chunks).rstrip() + "\n" + + +def _discover_apps_with_services() -> list[tuple[str, Path]]: + found: list[tuple[str, Path]] = [] + for child in sorted(REPO_ROOT.iterdir(), key=lambda p: p.name.lower()): + if not child.is_dir(): + continue + if child.name.startswith("."): + continue + if child.name in SKIP_TOPLEVEL_DIRS: + continue + svc = child / "services.py" + if svc.is_file(): + found.append((child.name, svc)) + return found + + +def _splice_generated(existing: str, generated: str) -> str: + if MARKER_START not in existing: + raise ValueError( + f"missing {MARKER_START!r}; add markers or see docs/Contributing.md" + ) + if MARKER_END not in existing: + raise ValueError(f"missing {MARKER_END!r}") + head, mid_and_tail = existing.split(MARKER_START, 1) + _mid, tail = mid_and_tail.split(MARKER_END, 1) + gen_block = f"{MARKER_START}\n\n{generated.rstrip()}\n\n{MARKER_END}" + return f"{head.rstrip()}\n{gen_block}{tail}" + + +def _normalize_eol(text: str) -> str: + text = text.replace("\r\n", "\n").replace("\r", "\n") + if not text.endswith("\n"): + text += "\n" + return text + + +def _read_text(path: Path) -> str: + return path.read_text(encoding="utf-8") + + +def _write_text(path: Path, content: str) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(_normalize_eol(content), encoding="utf-8", newline="\n") + + +def _build_generated_for_services_py(path: Path) -> str: + source = _read_text(path) + rows = _extract_public_functions(source) + return _render_service_table(rows) + + +def _build_generated_for_protocols(path: Path) -> str: + source = _read_text(path) + protos, helpers = _extract_protocols(source) + return _render_protocols_page(protos, helpers) + + +def _doc_path_for_app(app: str) -> Path: + return DOCS_SERVICE_API / f"{app}.md" + + +def _generate_one_app(app: str, services_path: Path, check: bool) -> bool: + doc_path = _doc_path_for_app(app) + generated = _build_generated_for_services_py(services_path) + if not doc_path.is_file(): + raise FileNotFoundError( + f"expected {doc_path.relative_to(REPO_ROOT)}; " + "create it with header, markers, and optional manual tail" + ) + old = _read_text(doc_path) + new = _normalize_eol(_splice_generated(old, generated)) + if check: + return _normalize_eol(old) == new + _write_text(doc_path, new) + return True + + +def _generate_protocols(check: bool) -> bool: + proto_path = REPO_ROOT / "core" / "protocols.py" + doc_path = _doc_path_for_app("core_protocols") + generated = _build_generated_for_protocols(proto_path) + if not doc_path.is_file(): + raise FileNotFoundError(f"expected {doc_path.relative_to(REPO_ROOT)}") + old = _read_text(doc_path) + new = _normalize_eol(_splice_generated(old, generated)) + if check: + return _normalize_eol(old) == new + _write_text(doc_path, new) + return True + + +def _run_all(check: bool, only_app: str | None) -> int: + ok = True + if only_app: + if only_app == "core_protocols": + if not _generate_protocols(check): + ok = False + return 0 if ok else 1 + for app, p in _discover_apps_with_services(): + if app == only_app: + if not _generate_one_app(app, p, check): + ok = False + return 0 if ok else 1 + print(f"unknown app: {only_app}", file=sys.stderr) + return 2 + + for app, p in _discover_apps_with_services(): + if not _generate_one_app(app, p, check): + ok = False + if not _generate_protocols(check): + ok = False + return 0 if ok else 1 + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--check", + action="store_true", + help="exit with status 1 if generated content differs from committed files", + ) + parser.add_argument( + "--app", type=str, default=None, help="only regenerate one app module" + ) + args = parser.parse_args() + + try: + code = _run_all(args.check, args.app) + except FileNotFoundError as e: + print(str(e), file=sys.stderr) + return 2 + except ValueError as e: + print(str(e), file=sys.stderr) + return 2 + + if args.check and code != 0: + print( + "Service API docs are out of date. Run: python scripts/generate_service_docs.py", + file=sys.stderr, + ) + return code + + +if __name__ == "__main__": + raise SystemExit(main()) From 73eb71a88ae567f22dbde1e5f5c99e440b8b840a Mon Sep 17 00:00:00 2001 From: zho Date: Tue, 19 May 2026 03:39:20 +0800 Subject: [PATCH 2/3] addressed ai reviews --- docs/service_api/clang_github_tracker.md | 8 +++--- docs/service_api/wg21_paper_tracker.md | 6 ++--- scripts/generate_service_docs.py | 31 +++++++++++++++++++++--- 3 files changed, 35 insertions(+), 10 deletions(-) diff --git a/docs/service_api/clang_github_tracker.md b/docs/service_api/clang_github_tracker.md index 4beb0ac3..b66e135b 100644 --- a/docs/service_api/clang_github_tracker.md +++ b/docs/service_api/clang_github_tracker.md @@ -15,10 +15,10 @@ | `get_commit_watermark` | | Optional[datetime] | Max ``github_committed_at`` across commits (API fetch cursor base). | | `get_issue_item_watermark` | | Optional[datetime] | Max ``github_updated_at`` across issues and PRs (API fetch cursor base). | | `start_after_watermark` | max_dt: datetime \| None | datetime \| None | Return ``max + 1ms`` for API fetch lower bound, or ``None`` if no watermark. | -| `upsert_commit` | sha: str, github_committed_at: datetime \| None | tuple[ClangGithubCommit, bool] | Create or update a ClangGithubCommit by ``sha``. Returns (instance, created). | -| `upsert_commits_batch` | rows: Sequence[tuple[str, datetime \| None]], batch_size: int = DEFAULT_UPSERT_BATCH_SIZE | tuple[int, int] | Batch upsert commits by ``sha``. Skips rows whose sha is not 40 chars. | -| `upsert_issue_item` | number: int, is_pull_request: bool, github_created_at: datetime \| None, github_updated_at: datetime \| None | tuple[ClangGithubIssueItem, bool] | Create or update a ClangGithubIssueItem by ``number``. Returns (instance, created). | -| `upsert_issue_items_batch` | rows: Sequence[tuple[int, bool, datetime \| None, datetime \| None]], batch_size: int = DEFAULT_UPSERT_BATCH_SIZE | tuple[int, int] | Batch upsert issue/PR rows by ``number``. | +| `upsert_commit` | sha: str, *, github_committed_at: datetime \| None | tuple[ClangGithubCommit, bool] | Create or update a ClangGithubCommit by ``sha``. Returns (instance, created). | +| `upsert_commits_batch` | rows: Sequence[tuple[str, datetime \| None]], *, batch_size: int = DEFAULT_UPSERT_BATCH_SIZE | tuple[int, int] | Batch upsert commits by ``sha``. Skips rows whose sha is not 40 chars. | +| `upsert_issue_item` | number: int, *, is_pull_request: bool, github_created_at: datetime \| None, github_updated_at: datetime \| None | tuple[ClangGithubIssueItem, bool] | Create or update a ClangGithubIssueItem by ``number``. Returns (instance, created). | +| `upsert_issue_items_batch` | rows: Sequence[tuple[int, bool, datetime \| None, datetime \| None]], *, batch_size: int = DEFAULT_UPSERT_BATCH_SIZE | tuple[int, int] | Batch upsert issue/PR rows by ``number``. | diff --git a/docs/service_api/wg21_paper_tracker.md b/docs/service_api/wg21_paper_tracker.md index d4f8e0e1..ba1a17e3 100644 --- a/docs/service_api/wg21_paper_tracker.md +++ b/docs/service_api/wg21_paper_tracker.md @@ -12,10 +12,10 @@ | Function | Parameters | Return type | Summary | | --- | --- | --- | --- | -| `get_or_create_mailing` | mailing_date: str, title: str | tuple[WG21Mailing, bool] | | -| `get_or_create_paper` | paper_id: str, url: str, title: str, document_date: date \| None, mailing: WG21Mailing, subgroup: str = '', author_names: Optional[list[str]] = None, author_emails: Optional[list[str]] = None, year: int \| None = None | tuple[WG21Paper, bool] | | +| `get_or_create_mailing` | mailing_date: str, title: str | tuple[WG21Mailing, bool] | — | +| `get_or_create_paper` | paper_id: str, url: str, title: str, document_date: date \| None, mailing: WG21Mailing, subgroup: str = '', author_names: Optional[list[str]] = None, author_emails: Optional[list[str]] = None, year: int \| None = None | tuple[WG21Paper, bool] | — | | `get_or_create_paper_author` | paper: WG21Paper, profile: WG21PaperAuthorProfile, author_order: int | tuple[WG21PaperAuthor, bool] | Get or create a WG21PaperAuthor link for (paper, profile), with author_order (1-based). Updates author_order on existing link if it differs. | -| `mark_paper_downloaded` | paper_id: str, year: int \| None = None | | | +| `mark_paper_downloaded` | paper_id: str, year: int \| None = None | None | — | diff --git a/scripts/generate_service_docs.py b/scripts/generate_service_docs.py index e6f188d8..017d0c4e 100644 --- a/scripts/generate_service_docs.py +++ b/scripts/generate_service_docs.py @@ -30,6 +30,9 @@ MARKER_START = "" MARKER_END = "" +RETURN_TYPE_FALLBACK = "None" +SUMMARY_FALLBACK = "—" + SKIP_TOPLEVEL_DIRS = frozenset( { ".git", @@ -59,7 +62,17 @@ def _cell(text: str) -> str: return t -def _first_paragraph_docstring(node: ast.AST) -> str: +def _display_return_type(annotation: str) -> str: + return annotation.strip() or RETURN_TYPE_FALLBACK + + +def _display_summary(summary: str) -> str: + return summary.strip() or SUMMARY_FALLBACK + + +def _first_paragraph_docstring( + node: ast.AsyncFunctionDef | ast.FunctionDef | ast.ClassDef, +) -> str: raw = ast.get_docstring(node, clean=True) if not raw: return "" @@ -90,6 +103,8 @@ def arg_str(a: ast.arg, default: ast.expr | None) -> str: for a in args.posonlyargs: parts.append(arg_str(a, None)) + if args.posonlyargs: + parts.append("/") for i, a in enumerate(args.args): default: ast.expr | None = None if i >= first_default: @@ -99,7 +114,9 @@ def arg_str(a: ast.arg, default: ast.expr | None) -> str: va = args.vararg ann = _unparse(va.annotation, source) if va.annotation else "" parts.append(f"*{va.arg}" + (f": {ann}" if ann else "")) - for a, d in zip(args.kwonlyargs, args.kw_defaults): + elif args.kwonlyargs: + parts.append("*") + for a, d in zip(args.kwonlyargs, args.kw_defaults, strict=True): default = d ann = _unparse(a.annotation, source) if a.annotation else "" base = f"{a.arg}: {ann}" if ann else a.arg @@ -157,7 +174,15 @@ def _render_service_table( fn = f"`{r.name}`" lines.append( "| " - + " | ".join(_cell(x) for x in (fn, r.parameters, r.return_type, r.summary)) + + " | ".join( + _cell(x) + for x in ( + fn, + r.parameters, + _display_return_type(r.return_type), + _display_summary(r.summary), + ) + ) + " |" ) return "\n".join(lines) + "\n" From 9940cac80d88dc97e036775f61788438f2c5b318 Mon Sep 17 00:00:00 2001 From: zho Date: Wed, 20 May 2026 03:51:45 +0800 Subject: [PATCH 3/3] removed unused parameters and added order validation --- scripts/generate_service_docs.py | 34 +++++++++++++++++--------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/scripts/generate_service_docs.py b/scripts/generate_service_docs.py index 017d0c4e..9751b90a 100644 --- a/scripts/generate_service_docs.py +++ b/scripts/generate_service_docs.py @@ -79,7 +79,7 @@ def _first_paragraph_docstring( return raw.split("\n\n", 1)[0].strip() -def _unparse(node: ast.AST | None, source: str) -> str: +def _unparse(node: ast.AST | None) -> str: if node is None: return "" if hasattr(ast, "unparse"): @@ -87,7 +87,7 @@ def _unparse(node: ast.AST | None, source: str) -> str: raise RuntimeError("Python 3.9+ required (ast.unparse)") -def _format_args(func: ast.AsyncFunctionDef | ast.FunctionDef, source: str) -> str: +def _format_args(func: ast.AsyncFunctionDef | ast.FunctionDef) -> str: parts: list[str] = [] args = func.args n_args = len(args.args) @@ -95,10 +95,10 @@ def _format_args(func: ast.AsyncFunctionDef | ast.FunctionDef, source: str) -> s first_default = n_args - n_defaults def arg_str(a: ast.arg, default: ast.expr | None) -> str: - ann = _unparse(a.annotation, source) if a.annotation else "" + ann = _unparse(a.annotation) if a.annotation else "" base = f"{a.arg}: {ann}" if ann else a.arg if default is not None: - base += f" = {_unparse(default, source)}" + base += f" = {_unparse(default)}" return base for a in args.posonlyargs: @@ -112,20 +112,20 @@ def arg_str(a: ast.arg, default: ast.expr | None) -> str: parts.append(arg_str(a, default)) if args.vararg: va = args.vararg - ann = _unparse(va.annotation, source) if va.annotation else "" + ann = _unparse(va.annotation) if va.annotation else "" parts.append(f"*{va.arg}" + (f": {ann}" if ann else "")) elif args.kwonlyargs: parts.append("*") for a, d in zip(args.kwonlyargs, args.kw_defaults, strict=True): default = d - ann = _unparse(a.annotation, source) if a.annotation else "" + ann = _unparse(a.annotation) if a.annotation else "" base = f"{a.arg}: {ann}" if ann else a.arg if default is not None: - base += f" = {_unparse(default, source)}" + base += f" = {_unparse(default)}" parts.append(base) if args.kwarg: ka = args.kwarg - ann = _unparse(ka.annotation, source) if ka.annotation else "" + ann = _unparse(ka.annotation) if ka.annotation else "" parts.append(f"**{ka.arg}" + (f": {ann}" if ann else "")) return ", ".join(parts) @@ -148,11 +148,11 @@ def _extract_public_functions(source: str) -> list[ServiceFuncRow]: continue ret = "" if node.returns: - ret = _unparse(node.returns, source) + ret = _unparse(node.returns) rows.append( ServiceFuncRow( name=node.name, - parameters=_format_args(node, source), + parameters=_format_args(node), return_type=ret, summary=_first_paragraph_docstring(node), ) @@ -209,11 +209,11 @@ def _extract_protocols(source: str) -> tuple[list[ProtocolRow], list[ServiceFunc for node in tree.body: if isinstance(node, (ast.AsyncFunctionDef, ast.FunctionDef)): if not node.name.startswith("_"): - ret = _unparse(node.returns, source) if node.returns else "" + ret = _unparse(node.returns) if node.returns else "" helpers.append( ServiceFuncRow( name=node.name, - parameters=_format_args(node, source), + parameters=_format_args(node), return_type=ret, summary=_first_paragraph_docstring(node), ) @@ -236,7 +236,7 @@ def _extract_protocols(source: str) -> tuple[list[ProtocolRow], list[ServiceFunc if _is_property_method(item): ann = "" if item.returns: - ann = _unparse(item.returns, source) + ann = _unparse(item.returns) props.append(ProtocolProperty(name=item.name, type_ann=ann)) protocols.append( ProtocolRow( @@ -333,10 +333,12 @@ def _splice_generated(existing: str, generated: str) -> str: raise ValueError( f"missing {MARKER_START!r}; add markers or see docs/Contributing.md" ) - if MARKER_END not in existing: - raise ValueError(f"missing {MARKER_END!r}") head, mid_and_tail = existing.split(MARKER_START, 1) - _mid, tail = mid_and_tail.split(MARKER_END, 1) + + if MARKER_END not in mid_and_tail: + raise ValueError(f"missing {MARKER_END!r}") + + _, tail = mid_and_tail.split(MARKER_END, 1) gen_block = f"{MARKER_START}\n\n{generated.rstrip()}\n\n{MARKER_END}" return f"{head.rstrip()}\n{gen_block}{tail}"