diff --git a/boost_library_docs_tracker/management/commands/run_boost_library_docs_tracker.py b/boost_library_docs_tracker/management/commands/run_boost_library_docs_tracker.py index 17eb7cca..cc55c1af 100644 --- a/boost_library_docs_tracker/management/commands/run_boost_library_docs_tracker.py +++ b/boost_library_docs_tracker/management/commands/run_boost_library_docs_tracker.py @@ -43,8 +43,7 @@ from django.core.management.base import CommandError -from core.collectors.base import CollectorBase -from core.collectors.command_base import BaseCollectorCommand +from core.collectors import AbstractCollector, BaseCollectorCommand from boost_library_docs_tracker import fetcher, services, workspace from boost_library_docs_tracker.preprocessor import preprocess_for_pinecone @@ -57,14 +56,23 @@ DEFAULT_MAX_PAGES = 10 -class BoostLibraryDocsTrackerCollector(CollectorBase): +class BoostLibraryDocsTrackerCollector(AbstractCollector): """Scrape docs to DB/workspace; Pinecone upsert in ``sync_pinecone``.""" def __init__(self, cmd: "Command", options: dict) -> None: self.cmd = cmd self.options = options - def run(self) -> None: + @property + def name(self) -> str: + return "boost_library_docs_tracker" + + def validate_config(self) -> None: + max_pages = self.options.get("max_pages") + if max_pages is not None and max_pages < 1: + raise CommandError("--max-pages must be at least 1.") + + def collect(self) -> None: o = self.options try: self.cmd._run( @@ -145,7 +153,7 @@ def add_arguments(self, parser): ), ) - def get_collector(self, **options): + def get_collector(self, **options) -> AbstractCollector: return BoostLibraryDocsTrackerCollector(cmd=self, options=dict(options)) # Top-level flow diff --git a/boost_library_usage_dashboard/collectors.py b/boost_library_usage_dashboard/collectors.py index 3e60ba86..f7c637f0 100644 --- a/boost_library_usage_dashboard/collectors.py +++ b/boost_library_usage_dashboard/collectors.py @@ -5,7 +5,7 @@ from django.conf import settings from django.core.management.base import CommandError -from core.collectors.base import CollectorBase +from core.collectors import AbstractCollector from boost_library_usage_dashboard.analyzer import BoostUsageDashboardAnalyzer from boost_library_usage_dashboard.publisher import publish_dashboard from boost_library_usage_dashboard.renderer import render_dashboard_html @@ -15,7 +15,7 @@ logger = logging.getLogger(__name__) -class BoostLibraryUsageDashboardCollector(CollectorBase): +class BoostLibraryUsageDashboardCollector(AbstractCollector): """Collect metrics, render HTML, optionally publish to GitHub.""" def __init__( @@ -35,7 +35,14 @@ def __init__( self.repo = repo self.branch = branch - def run(self) -> None: + @property + def name(self) -> str: + return "boost_library_usage_dashboard" + + def validate_config(self) -> None: + return None + + def collect(self) -> None: output_dir = get_workspace_path("boost_library_usage_dashboard").resolve() output_dir.mkdir(parents=True, exist_ok=True) diff --git a/boost_library_usage_dashboard/management/commands/run_boost_library_usage_dashboard.py b/boost_library_usage_dashboard/management/commands/run_boost_library_usage_dashboard.py index 46555bfa..b59a38ae 100644 --- a/boost_library_usage_dashboard/management/commands/run_boost_library_usage_dashboard.py +++ b/boost_library_usage_dashboard/management/commands/run_boost_library_usage_dashboard.py @@ -1,6 +1,6 @@ """Build the Boost library usage dashboard from DB data and optionally publish to GitHub.""" -from core.collectors.command_base import BaseCollectorCommand +from core.collectors import AbstractCollector, BaseCollectorCommand from boost_library_usage_dashboard.collectors import ( BoostLibraryUsageDashboardCollector, ) @@ -49,7 +49,7 @@ def add_arguments(self, parser): help="Branch to publish to (overrides BOOST_LIBRARY_USAGE_DASHBOARD_PUBLISH_BRANCH; default main).", ) - def get_collector(self, **options): + def get_collector(self, **options) -> AbstractCollector: return BoostLibraryUsageDashboardCollector( skip_collect=options["skip_collect"], skip_render=options["skip_render"], diff --git a/boost_mailing_list_tracker/management/commands/run_boost_mailing_list_tracker.py b/boost_mailing_list_tracker/management/commands/run_boost_mailing_list_tracker.py index 735b1d9a..d2ae4555 100644 --- a/boost_mailing_list_tracker/management/commands/run_boost_mailing_list_tracker.py +++ b/boost_mailing_list_tracker/management/commands/run_boost_mailing_list_tracker.py @@ -15,8 +15,7 @@ from django.core.management import call_command from django.utils.dateparse import parse_datetime -from core.collectors.base import CollectorBase -from core.collectors.command_base import BaseCollectorCommand +from core.collectors import AbstractCollector, BaseCollectorCommand from boost_mailing_list_tracker.email_formatter import format_email from boost_mailing_list_tracker.fetcher import ( @@ -179,7 +178,7 @@ def _process_existing_workspace_json(list_name: str) -> tuple[int, int]: return processed, skipped -class BoostMailingListTrackerCollector(CollectorBase): +class BoostMailingListTrackerCollector(AbstractCollector): """Fetch mailing lists via workspace pipeline.""" def __init__( @@ -201,7 +200,14 @@ def __init__( self.pinecone_app_type = pinecone_app_type self.pinecone_namespace = pinecone_namespace - def run(self) -> None: + @property + def name(self) -> str: + return "boost_mailing_list_tracker" + + def validate_config(self) -> None: + return None + + def collect(self) -> None: start_date = self.start_date end_date = self.end_date dry_run = self.dry_run @@ -360,7 +366,7 @@ def add_arguments(self, parser): help=f"Pinecone namespace for sync. Default from env {PINECONE_NAMESPACE_ENV_KEY}.", ) - def get_collector(self, **options): + def get_collector(self, **options) -> AbstractCollector: start_date = options["start_date"] end_date = options["end_date"] dry_run = options["dry_run"] diff --git a/core/apps.py b/core/apps.py index 9c03d891..344a5fcd 100644 --- a/core/apps.py +++ b/core/apps.py @@ -1,4 +1,5 @@ import logging +from typing import ClassVar from django.apps import AppConfig from django.conf import settings @@ -7,7 +8,7 @@ class CoreConfig(AppConfig): - default_auto_field = "django.db.models.BigAutoField" + default_auto_field: ClassVar[str] = "django.db.models.BigAutoField" name = "core" verbose_name = "Core" diff --git a/core/collectors/command_base.py b/core/collectors/command_base.py index c3365408..47027e0e 100644 --- a/core/collectors/command_base.py +++ b/core/collectors/command_base.py @@ -1,4 +1,4 @@ -"""Django management command base class for CollectorBase-backed collectors.""" +"""Django management command base class for CollectorRunnable-backed collectors.""" # Design notes (review summary): # - Template method: handle() -> get_collector(**options) -> phase(run) -> phase(sync_pinecone). diff --git a/core/management/commands/cleanup_workspace_orphans.py b/core/management/commands/cleanup_workspace_orphans.py index 24147fa3..282d5be0 100644 --- a/core/management/commands/cleanup_workspace_orphans.py +++ b/core/management/commands/cleanup_workspace_orphans.py @@ -9,6 +9,7 @@ import logging import time from pathlib import Path +from typing import Any, cast from django.conf import settings from django.core.management.base import BaseCommand, CommandError @@ -56,13 +57,12 @@ def handle(self, *args, **options): ) execute = options["execute"] root = Path(getattr(settings, "WORKSPACE_DIR", "")) + style = cast(Any, self.style) if not root.is_dir(): - self.stderr.write( - self.style.ERROR(f"WORKSPACE_DIR is not a directory: {root}") - ) + self.stderr.write(style.ERROR(f"WORKSPACE_DIR is not a directory: {root}")) return - suffix_found = self._run_suffix_scan(root, max_age, execute) + suffix_found = self._run_suffix_scan(root, max_age, execute, style) gh_stats = None if options["github_json_cache"]: @@ -79,7 +79,7 @@ def handle(self, *args, **options): ) rel = "Removed" if execute else "Would remove / logged" self.stdout.write( - self.style.NOTICE( + style.NOTICE( f"{rel} github_activity_tracker invalid JSON: scanned={gh_stats.scanned} " f"removed_invalid={gh_stats.removed_invalid} " f"quarantined={gh_stats.quarantined_invalid} " @@ -89,13 +89,15 @@ def handle(self, *args, **options): ) self.stdout.write( - self.style.NOTICE( + style.NOTICE( f"{'Removed' if execute else 'Found'} {suffix_found} orphan suffix candidate(s) " f"(suffix in {_ORPHAN_SUFFIXES}, older than {max_age}h)." ) ) - def _run_suffix_scan(self, root: Path, max_age: float, execute: bool) -> int: + def _run_suffix_scan( + self, root: Path, max_age: float, execute: bool, style: Any + ) -> int: cutoff = time.time() - max_age * 3600.0 found: list[Path] = [] for path in root.rglob("*"): @@ -116,10 +118,10 @@ def _run_suffix_scan(self, root: Path, max_age: float, execute: bool) -> int: if execute: try: p.unlink() - self.stdout.write(self.style.SUCCESS(f"deleted {rel}")) + self.stdout.write(style.SUCCESS(f"deleted {rel}")) except OSError as e: logger.warning("Could not delete %s: %s", p, e) - self.stderr.write(self.style.WARNING(f"skip {rel}: {e}")) + self.stderr.write(style.WARNING(f"skip {rel}: {e}")) else: self.stdout.write(f"would delete (dry-run): {rel}") diff --git a/cppa_pinecone_sync/management/commands/run_cppa_pinecone_sync.py b/cppa_pinecone_sync/management/commands/run_cppa_pinecone_sync.py index a7ac23dd..9c13a7ec 100644 --- a/cppa_pinecone_sync/management/commands/run_cppa_pinecone_sync.py +++ b/cppa_pinecone_sync/management/commands/run_cppa_pinecone_sync.py @@ -17,8 +17,7 @@ from django.core.management.base import CommandError -from core.collectors.base import CollectorBase -from core.collectors.command_base import BaseCollectorCommand +from core.collectors import AbstractCollector, BaseCollectorCommand from cppa_pinecone_sync.ingestion import PineconeInstance from cppa_pinecone_sync.sync import sync_to_pinecone @@ -42,7 +41,7 @@ def _resolve_preprocessor(dotted_path: str): return fn -class CppaPineconeSyncCollector(CollectorBase): +class CppaPineconeSyncCollector(AbstractCollector): """Run sync_to_pinecone for one (app_type, namespace, preprocessor).""" def __init__( @@ -57,8 +56,19 @@ def __init__( self.namespace = namespace self.preprocessor_path = preprocessor_path self.instance = instance + self._preprocess_fn: Any = None - def run(self) -> None: + @property + def name(self) -> str: + return "cppa_pinecone_sync" + + def validate_config(self) -> None: + try: + self._preprocess_fn = _resolve_preprocessor(self.preprocessor_path) + except (ValueError, ImportError) as e: + raise CommandError(str(e)) from e + + def collect(self) -> None: logger.info( "run_cppa_pinecone_sync: starting app_type=%s namespace=%s preprocessor=%s", self.app_type, @@ -66,11 +76,10 @@ def run(self) -> None: self.preprocessor_path, ) - preprocess_fn = _resolve_preprocessor(self.preprocessor_path) result = sync_to_pinecone( self.app_type, self.namespace, - preprocess_fn, + self._preprocess_fn, instance=self.instance, ) logger.info( @@ -118,7 +127,7 @@ def add_arguments(self, parser): help="Pinecone API key instance to use: 'public' (default) or 'private'.", ) - def get_collector(self, **options: Any) -> CollectorBase: + def get_collector(self, **options: Any) -> AbstractCollector: app_type = (options.get("app_type") or "").strip() or None namespace = (options.get("namespace") or "").strip() or None preprocessor_path = (options.get("preprocessor") or "").strip() or None diff --git a/cppa_slack_tracker/management/commands/run_cppa_slack_tracker.py b/cppa_slack_tracker/management/commands/run_cppa_slack_tracker.py index 0e92893d..398b4974 100644 --- a/cppa_slack_tracker/management/commands/run_cppa_slack_tracker.py +++ b/cppa_slack_tracker/management/commands/run_cppa_slack_tracker.py @@ -19,8 +19,7 @@ from django.conf import settings from django.core.management.base import CommandError -from core.collectors.base import CollectorBase -from core.collectors.command_base import BaseCollectorCommand +from core.collectors import AbstractCollector, BaseCollectorCommand from cppa_slack_tracker.models import SlackTeam from cppa_slack_tracker.services import save_slack_message @@ -52,7 +51,7 @@ def _parse_date(date_str: Optional[str]) -> Optional[datetime]: return None -class CppaSlackTrackerCollector(CollectorBase): +class CppaSlackTrackerCollector(AbstractCollector): """Sync Slack teams, users, channels, memberships, and messages; optional Pinecone upsert.""" def __init__( @@ -65,7 +64,14 @@ def __init__( self.options = options self._team: SlackTeam | None = None - def run(self) -> None: + @property + def name(self) -> str: + return "cppa_slack_tracker" + + def validate_config(self) -> None: + return None + + def collect(self) -> None: dry_run = self.options.get("dry_run", False) if dry_run: self._print_dry_run() @@ -434,7 +440,7 @@ def add_arguments(self, parser): help="Skip Pinecone sync after message sync (default: sync to Pinecone)", ) - def get_collector(self, **options: Any) -> CollectorBase: + def get_collector(self, **options: Any) -> AbstractCollector: team_id = (options.get("team_id") or "").strip() if not team_id: team_id = (getattr(settings, "SLACK_TEAM_ID", "") or "").strip() diff --git a/cppa_user_tracker/management/commands/run_cppa_user_tracker.py b/cppa_user_tracker/management/commands/run_cppa_user_tracker.py index d295385f..1d78c120 100644 --- a/cppa_user_tracker/management/commands/run_cppa_user_tracker.py +++ b/cppa_user_tracker/management/commands/run_cppa_user_tracker.py @@ -9,20 +9,26 @@ import logging from typing import Any -from core.collectors.base import CollectorBase -from core.collectors.command_base import BaseCollectorCommand +from core.collectors import AbstractCollector, BaseCollectorCommand logger = logging.getLogger(__name__) -class CppaUserTrackerCollector(CollectorBase): +class CppaUserTrackerCollector(AbstractCollector): """Identity/profile staging (stub until merge logic is implemented).""" def __init__(self, *, stdout: Any, style: Any) -> None: self.stdout = stdout self.style = style - def run(self) -> None: + @property + def name(self) -> str: + return "cppa_user_tracker" + + def validate_config(self) -> None: + return None + + def collect(self) -> None: logger.info("run_cppa_user_tracker: starting") # Stub: add logic (stage relations, merge into Identity/BaseProfile, etc.) self.stdout.write(self.style.SUCCESS("CPPA User Tracker completed (stub).")) @@ -32,5 +38,5 @@ def run(self) -> None: class Command(BaseCollectorCommand): help = "Run the CPPA User Tracker (identity/profile staging and merge)." - def get_collector(self, **_options: Any) -> CollectorBase: + def get_collector(self, **_options: Any) -> AbstractCollector: return CppaUserTrackerCollector(stdout=self.stdout, style=self.style) diff --git a/cppa_youtube_script_tracker/management/commands/run_cppa_youtube_script_tracker.py b/cppa_youtube_script_tracker/management/commands/run_cppa_youtube_script_tracker.py index e70877b4..0fc7ef73 100644 --- a/cppa_youtube_script_tracker/management/commands/run_cppa_youtube_script_tracker.py +++ b/cppa_youtube_script_tracker/management/commands/run_cppa_youtube_script_tracker.py @@ -24,9 +24,9 @@ from django.conf import settings from django.core.exceptions import ValidationError from django.core.management import call_command +from django.core.management.base import CommandError -from core.collectors.base import CollectorBase -from core.collectors.command_base import BaseCollectorCommand +from core.collectors import AbstractCollector, BaseCollectorCommand from django.utils.dateparse import parse_datetime from cppa_user_tracker.services import get_or_create_youtube_speaker @@ -408,14 +408,60 @@ def _run_pinecone_sync(app_id: str, namespace: str) -> None: ) -class CppaYoutubeScriptTrackerCollector(CollectorBase): +class CppaYoutubeScriptTrackerCollector(AbstractCollector): """Phases 1–3 on the command; Pinecone in ``sync_pinecone``.""" def __init__(self, cmd: Command, options: dict) -> None: self.cmd = cmd self.options = options - def run(self) -> None: + @property + def name(self) -> str: + return "cppa_youtube_script_tracker" + + def validate_config(self) -> None: + o = self.options + start_time_arg = (o.get("start_time") or "").strip() + end_time_arg = (o.get("end_time") or "").strip() + + start_dt: Optional[datetime] = None + if start_time_arg: + parsed = parse_datetime(start_time_arg) + if not parsed: + raise CommandError( + "--start-time must be a valid ISO 8601 datetime " + "(for example 2024-01-01T12:00:00Z)." + ) + start_dt = ( + parsed.replace(tzinfo=timezone.utc) if parsed.tzinfo is None else parsed + ) + + end_dt: Optional[datetime] = None + if end_time_arg: + parsed = parse_datetime(end_time_arg) + if not parsed: + raise CommandError( + "--end-time must be a valid ISO 8601 datetime " + "(for example 2024-01-01T12:00:00Z)." + ) + end_dt = ( + parsed.replace(tzinfo=timezone.utc) if parsed.tzinfo is None else parsed + ) + + if start_dt is not None and end_dt is not None and start_dt > end_dt: + raise CommandError( + "--start-time must be earlier than or equal to --end-time." + ) + + if start_dt is not None and end_dt is None: + effective_end = datetime.now(tz=timezone.utc) + if start_dt > effective_end: + raise CommandError( + "--start-time must not be later than the effective end time " + "(when --end-time is omitted, the end is the current UTC time)." + ) + + def collect(self) -> None: o = self.options start_time_arg = (o.get("start_time") or "").strip() end_time_arg = (o.get("end_time") or "").strip() @@ -519,7 +565,7 @@ def add_arguments(self, parser): help=f"Pinecone namespace. Default from env {PINECONE_NAMESPACE_ENV_KEY}.", ) - def get_collector(self, **options): + def get_collector(self, **options) -> AbstractCollector: return CppaYoutubeScriptTrackerCollector(cmd=self, options=dict(options)) def _phase_1(self, dry_run: bool) -> None: diff --git a/cppa_youtube_script_tracker/tests/test_run_command.py b/cppa_youtube_script_tracker/tests/test_run_command.py index 7d487516..f792cd63 100644 --- a/cppa_youtube_script_tracker/tests/test_run_command.py +++ b/cppa_youtube_script_tracker/tests/test_run_command.py @@ -9,6 +9,7 @@ import pytest from django.core.management import call_command +from django.core.management.base import CommandError from django.test.utils import override_settings from cppa_youtube_script_tracker.management.commands.run_cppa_youtube_script_tracker import ( @@ -76,6 +77,70 @@ def test_resolve_end_time_explicit(): assert dt.year == 2021 +def _collector_options(**overrides): + base = { + "start_time": "", + "end_time": "", + "channel_title": "", + "dry_run": False, + "skip_transcript": False, + "pinecone_app_id": "", + "pinecone_namespace": "", + } + base.update(overrides) + return base + + +def test_validate_config_rejects_malformed_start_time(): + collector = CppaYoutubeScriptTrackerCollector( + cmd=Command(stdout=StringIO(), stderr=StringIO()), + options=_collector_options(start_time="not-an-iso-datetime"), + ) + with pytest.raises(CommandError, match="--start-time"): + collector.validate_config() + + +def test_validate_config_rejects_malformed_end_time(): + collector = CppaYoutubeScriptTrackerCollector( + cmd=Command(stdout=StringIO(), stderr=StringIO()), + options=_collector_options(end_time="bogus"), + ) + with pytest.raises(CommandError, match="--end-time"): + collector.validate_config() + + +def test_validate_config_rejects_start_after_end(): + collector = CppaYoutubeScriptTrackerCollector( + cmd=Command(stdout=StringIO(), stderr=StringIO()), + options=_collector_options( + start_time="2024-02-01T00:00:00Z", + end_time="2024-01-01T00:00:00Z", + ), + ) + with pytest.raises(CommandError, match="earlier than or equal"): + collector.validate_config() + + +def test_validate_config_rejects_start_after_effective_end_when_end_omitted(): + collector = CppaYoutubeScriptTrackerCollector( + cmd=Command(stdout=StringIO(), stderr=StringIO()), + options=_collector_options(start_time="2099-01-01T00:00:00Z", end_time=""), + ) + with pytest.raises(CommandError, match="effective end"): + collector.validate_config() + + +def test_validate_config_accepts_valid_explicit_range(): + collector = CppaYoutubeScriptTrackerCollector( + cmd=Command(stdout=StringIO(), stderr=StringIO()), + options=_collector_options( + start_time="2024-01-01T00:00:00Z", + end_time="2024-02-01T00:00:00Z", + ), + ) + collector.validate_config() + + @pytest.mark.django_db def test_persist_video_skips_empty_id(): created, skipped = _persist_video({"video_id": ""}) diff --git a/discord_activity_tracker/management/commands/backfill_discord_activity_tracker.py b/discord_activity_tracker/management/commands/backfill_discord_activity_tracker.py index 5a4000de..cefe9e1a 100644 --- a/discord_activity_tracker/management/commands/backfill_discord_activity_tracker.py +++ b/discord_activity_tracker/management/commands/backfill_discord_activity_tracker.py @@ -18,7 +18,7 @@ success; Pinecone sync when enabled. Raises: - Per-file parse/validation failures are caught inside ``DiscordBackfillCollector.run`` + Per-file parse/validation failures are caught inside ``DiscordBackfillCollector.collect`` (logged and reported on stdout); they do not abort the whole command. Uncaught exceptions from ``sync_pinecone`` or the base command layer may still propagate. """ @@ -32,8 +32,7 @@ from asgiref.sync import sync_to_async -from core.collectors.base import CollectorBase -from core.collectors.command_base import BaseCollectorCommand +from core.collectors import AbstractCollector, BaseCollectorCommand from discord_activity_tracker.pinecone_runner import task_discord_pinecone_sync from discord_activity_tracker.services import ( get_or_create_discord_channel, @@ -63,10 +62,10 @@ def _json_display_path(import_dir: Path, json_path: Path) -> str: return json_path.name -class DiscordBackfillCollector(CollectorBase): +class DiscordBackfillCollector(AbstractCollector): """Backfill collector: scan drop folder, import each JSON, delete on success. - ``run()`` lists JSON under ``get_cpp_discussion_import_dir()``, optionally + ``collect()`` lists JSON under ``get_cpp_discussion_import_dir()``, optionally dry-run prints paths, else for each file parses, validates staging schema, upserts messages in batches, unlinks the file on success, or logs failure and keeps the file. @@ -83,7 +82,14 @@ def __init__(self, *, stdout, style, **opts: Any) -> None: self.dry_run: bool = opts["dry_run"] self.skip_pinecone: bool = bool(opts.get("skip_pinecone")) - def run(self) -> None: + @property + def name(self) -> str: + return "discord_activity_tracker_backfill" + + def validate_config(self) -> None: + return None + + def collect(self) -> None: import_dir = get_cpp_discussion_import_dir() json_files = sorted( filter_discord_export_json_paths(import_dir.rglob("*.json")) @@ -226,7 +232,7 @@ def add_arguments(self, parser): help="List JSON files that would be imported without writing or deleting", ) - def get_collector(self, **options: Any) -> CollectorBase: + def get_collector(self, **options: Any) -> AbstractCollector: opts = dict(options) if opts.get("skip_pinecone") is None: opts["skip_pinecone"] = False diff --git a/discord_activity_tracker/management/commands/run_discord_activity_tracker.py b/discord_activity_tracker/management/commands/run_discord_activity_tracker.py index 122d47cb..f96d520a 100644 --- a/discord_activity_tracker/management/commands/run_discord_activity_tracker.py +++ b/discord_activity_tracker/management/commands/run_discord_activity_tracker.py @@ -43,8 +43,7 @@ from django.conf import settings from django.core.management.base import CommandError -from core.collectors.base import CollectorBase -from core.collectors.command_base import BaseCollectorCommand +from core.collectors import AbstractCollector, BaseCollectorCommand from core.utils.datetime_parsing import parse_iso_datetime from discord_activity_tracker.models import DiscordServer from discord_activity_tracker.pinecone_runner import task_discord_pinecone_sync @@ -333,13 +332,13 @@ def task_markdown_export_and_push( collector.stdout.write(collector.style.WARNING("No markdown files exported")) -class DiscordActivityCollector(CollectorBase): +class DiscordActivityCollector(AbstractCollector): """Collector implementation for ``run_discord_activity_tracker``. Holds stdout/style, resolved ``channel_ids`` (from ``--channels`` or ``settings.DISCORD_CHANNEL_IDS``), and delegates to ``Command._handle_core``. - ``run()`` drives fetch → Markdown → Pinecone according to options. + ``collect()`` drives fetch → Markdown → Pinecone according to options. ``sync_pinecone()`` runs ``task_discord_pinecone_sync`` when not dry-run and not skipping Pinecone. @@ -359,7 +358,14 @@ def __init__(self, cmd: "Command", options: dict) -> None: else: self.channel_ids = list(getattr(settings, "DISCORD_CHANNEL_IDS", [])) - def run(self) -> None: + @property + def name(self) -> str: + return "discord_activity_tracker" + + def validate_config(self) -> None: + return None + + def collect(self) -> None: self.cmd._handle_core(self.options, collector=self) def sync_pinecone(self) -> None: @@ -512,7 +518,7 @@ def add_arguments(self, parser): help="Deprecated: prefer --skip-*. sync=fetch only; export=markdown only; all=all phases.", ) - def get_collector(self, **options: Any) -> CollectorBase: + def get_collector(self, **options: Any) -> AbstractCollector: opts = dict(options) if opts.get("skip_pinecone") is None: opts["skip_pinecone"] = False diff --git a/docs/Core_public_API.md b/docs/Core_public_API.md index 67fda01a..3007db6d 100644 --- a/docs/Core_public_API.md +++ b/docs/Core_public_API.md @@ -12,6 +12,28 @@ The `core` Django app holds shared infrastructure. Treat the following as the ** | `core.collectors.BaseCollectorCommand` | Thin `BaseCommand` adapter: runs `get_collector(**opts).run()` then `sync_pinecone()`. | | `core.collectors.DjangoCommandCollector` | Wraps `call_command(name)` for tests or glue code. | +### Collector base class usage (migration status) + +All **application** collectors listed below subclass **`AbstractCollector`** (`name`, `validate_config()`, `collect()`). **`BaseCollectorCommand`** runs `run()` (validate then collect) and `sync_pinecone()` for each. + +| Management command | Collector class | Primary module | +|--------|-----------------|----------------| +| `run_boost_usage_tracker` | `BoostUsageTrackerCollector` | `boost_usage_tracker.management.commands.run_boost_usage_tracker` | +| `run_boost_github_activity_tracker` | `BoostGithubActivityCollector` | `boost_library_tracker.management.commands.run_boost_github_activity_tracker` | +| `run_clang_github_tracker` | `ClangGithubTrackerCollector` | `clang_github_tracker.collectors` | +| `run_boost_library_usage_dashboard` | `BoostLibraryUsageDashboardCollector` | `boost_library_usage_dashboard.collectors` | +| `run_boost_library_docs_tracker` | `BoostLibraryDocsTrackerCollector` | `boost_library_docs_tracker.management.commands.run_boost_library_docs_tracker` | +| `run_boost_mailing_list_tracker` | `BoostMailingListTrackerCollector` | `boost_mailing_list_tracker.management.commands.run_boost_mailing_list_tracker` | +| `run_cppa_user_tracker` | `CppaUserTrackerCollector` | `cppa_user_tracker.management.commands.run_cppa_user_tracker` | +| `run_cppa_pinecone_sync` | `CppaPineconeSyncCollector` | `cppa_pinecone_sync.management.commands.run_cppa_pinecone_sync` | +| `run_cppa_slack_tracker` | `CppaSlackTrackerCollector` | `cppa_slack_tracker.management.commands.run_cppa_slack_tracker` | +| `run_cppa_youtube_script_tracker` | `CppaYoutubeScriptTrackerCollector` | `cppa_youtube_script_tracker.management.commands.run_cppa_youtube_script_tracker` | +| `run_wg21_paper_tracker` | `Wg21PaperTrackerCollector` | `wg21_paper_tracker.collectors` | +| `run_discord_activity_tracker` | `DiscordActivityCollector` | `discord_activity_tracker.management.commands.run_discord_activity_tracker` | +| `backfill_discord_activity_tracker` | `DiscordBackfillCollector` | `discord_activity_tracker.management.commands.backfill_discord_activity_tracker` | + +**Still on `CollectorBase` (framework only):** `DjangoCommandCollector` in `core.collectors.base` subclasses the legacy abstract base for `call_command` glue. New app collectors should **not** subclass `CollectorBase`. + ## Failure classification | Import | Purpose | diff --git a/pyrightconfig.json b/pyrightconfig.json index 109b1c4c..389b3f54 100644 --- a/pyrightconfig.json +++ b/pyrightconfig.json @@ -11,6 +11,5 @@ ], "pythonVersion": "3.11", "typeCheckingMode": "basic", - "reportMissingImports": true, - "stubPath": "" + "reportMissingImports": true } diff --git a/wg21_paper_tracker/collectors.py b/wg21_paper_tracker/collectors.py index db59cb40..6ad12608 100644 --- a/wg21_paper_tracker/collectors.py +++ b/wg21_paper_tracker/collectors.py @@ -8,8 +8,11 @@ from django.conf import settings from django.core.management.base import CommandError -from core.collectors.base import CollectorBase -from wg21_paper_tracker.pipeline import run_tracker_pipeline +from core.collectors import AbstractCollector +from wg21_paper_tracker.pipeline import ( + _normalize_mailing_date_label, + run_tracker_pipeline, +) logger = logging.getLogger(__name__) @@ -49,7 +52,7 @@ def trigger_github_repository_dispatch( response.raise_for_status() -class Wg21PaperTrackerCollector(CollectorBase): +class Wg21PaperTrackerCollector(AbstractCollector): """Fetch mailings, update DB, optionally dispatch to GitHub.""" def __init__( @@ -63,7 +66,48 @@ def __init__( self.from_date = from_date self.to_date = to_date - def run(self) -> None: + @property + def name(self) -> str: + return "wg21_paper_tracker" + + def validate_config(self) -> None: + def _validated_bound( + value: str, *, field_for_normalize: str, cli_flag: str + ) -> str: + try: + normalized = _normalize_mailing_date_label( + value, field_name=field_for_normalize + ) + except ValueError as e: + raise CommandError(str(e)) from e + month = int(normalized[5:7]) + if month < 1 or month > 12: + raise CommandError( + f"Invalid --{cli_flag} {value!r}; month must be 01-12 (YYYY-MM)." + ) + return normalized + + from_norm: str | None = None + if self.from_date: + from_norm = _validated_bound( + self.from_date, + field_for_normalize="from_mailing_date", + cli_flag="from-date", + ) + to_norm: str | None = None + if self.to_date: + to_norm = _validated_bound( + self.to_date, + field_for_normalize="to_mailing_date", + cli_flag="to-date", + ) + if from_norm is not None and to_norm is not None and from_norm > to_norm: + raise CommandError( + f"--from-date {self.from_date!r} must be earlier than or equal to " + f"--to-date {self.to_date!r}." + ) + + def collect(self) -> None: if self.dry_run: if self.from_date or self.to_date: logger.info( diff --git a/wg21_paper_tracker/management/commands/run_wg21_paper_tracker.py b/wg21_paper_tracker/management/commands/run_wg21_paper_tracker.py index 8ddbefa1..f882a0f1 100644 --- a/wg21_paper_tracker/management/commands/run_wg21_paper_tracker.py +++ b/wg21_paper_tracker/management/commands/run_wg21_paper_tracker.py @@ -4,7 +4,7 @@ trigger a GitHub repository_dispatch so another repo can download and convert documents. """ -from core.collectors.command_base import BaseCollectorCommand +from core.collectors import AbstractCollector, BaseCollectorCommand from wg21_paper_tracker.collectors import Wg21PaperTrackerCollector @@ -43,7 +43,7 @@ def add_arguments(self, parser): ), ) - def get_collector(self, **options): + def get_collector(self, **options) -> AbstractCollector: dry_run = options.get("dry_run", False) from_date = options.get("from_date") to_date = options.get("to_date") diff --git a/wg21_paper_tracker/tests/test_collectors.py b/wg21_paper_tracker/tests/test_collectors.py index e465f2c6..f36a6f78 100644 --- a/wg21_paper_tracker/tests/test_collectors.py +++ b/wg21_paper_tracker/tests/test_collectors.py @@ -1,4 +1,4 @@ -"""Tests for WG21 collector helpers (repository_dispatch + CollectorBase impl).""" +"""Tests for WG21 collector helpers (repository_dispatch + AbstractCollector impl).""" import logging from unittest.mock import MagicMock, patch @@ -34,6 +34,36 @@ def test_trigger_github_repository_dispatch_raises_on_http_error(): ) +def test_wg21_collector_validate_config_rejects_invalid_month(): + collector = Wg21PaperTrackerCollector( + dry_run=True, + from_date="2025-13", + to_date=None, + ) + with pytest.raises(CommandError, match="month must be"): + collector.run() + + +def test_wg21_collector_validate_config_rejects_from_after_to_dry_run(): + collector = Wg21PaperTrackerCollector( + dry_run=True, + from_date="2025-03", + to_date="2025-01", + ) + with pytest.raises(CommandError, match="--from-date"): + collector.run() + + +def test_wg21_collector_validate_config_rejects_bad_from_dry_run(): + collector = Wg21PaperTrackerCollector( + dry_run=True, + from_date="bad", + to_date=None, + ) + with pytest.raises(CommandError, match="Invalid from_mailing_date"): + collector.run() + + @pytest.mark.parametrize( ("dry_run", "from_date", "to_date", "expect_dates_in_message"), [