Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,7 @@

from django.core.management.base import CommandError

from core.collectors.base import CollectorBase
from core.collectors.command_base import BaseCollectorCommand
from core.collectors import AbstractCollector, BaseCollectorCommand

from boost_library_docs_tracker import fetcher, services, workspace
from boost_library_docs_tracker.preprocessor import preprocess_for_pinecone
Expand All @@ -57,14 +56,23 @@
DEFAULT_MAX_PAGES = 10


class BoostLibraryDocsTrackerCollector(CollectorBase):
class BoostLibraryDocsTrackerCollector(AbstractCollector):
"""Scrape docs to DB/workspace; Pinecone upsert in ``sync_pinecone``."""

def __init__(self, cmd: "Command", options: dict) -> None:
self.cmd = cmd
self.options = options

def run(self) -> None:
@property
def name(self) -> str:
return "boost_library_docs_tracker"

def validate_config(self) -> None:
max_pages = self.options.get("max_pages")
if max_pages is not None and max_pages < 1:
raise CommandError("--max-pages must be at least 1.")

def collect(self) -> None:
o = self.options
try:
self.cmd._run(
Expand Down Expand Up @@ -145,7 +153,7 @@ def add_arguments(self, parser):
),
)

def get_collector(self, **options):
def get_collector(self, **options) -> AbstractCollector:
return BoostLibraryDocsTrackerCollector(cmd=self, options=dict(options))

# Top-level flow
Expand Down
13 changes: 10 additions & 3 deletions boost_library_usage_dashboard/collectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from django.conf import settings
from django.core.management.base import CommandError

from core.collectors.base import CollectorBase
from core.collectors import AbstractCollector
from boost_library_usage_dashboard.analyzer import BoostUsageDashboardAnalyzer
from boost_library_usage_dashboard.publisher import publish_dashboard
from boost_library_usage_dashboard.renderer import render_dashboard_html
Expand All @@ -15,7 +15,7 @@
logger = logging.getLogger(__name__)


class BoostLibraryUsageDashboardCollector(CollectorBase):
class BoostLibraryUsageDashboardCollector(AbstractCollector):
"""Collect metrics, render HTML, optionally publish to GitHub."""

def __init__(
Expand All @@ -35,7 +35,14 @@ def __init__(
self.repo = repo
self.branch = branch

def run(self) -> None:
@property
def name(self) -> str:
return "boost_library_usage_dashboard"

def validate_config(self) -> None:
return None

def collect(self) -> None:
output_dir = get_workspace_path("boost_library_usage_dashboard").resolve()
output_dir.mkdir(parents=True, exist_ok=True)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Build the Boost library usage dashboard from DB data and optionally publish to GitHub."""

from core.collectors.command_base import BaseCollectorCommand
from core.collectors import AbstractCollector, BaseCollectorCommand
from boost_library_usage_dashboard.collectors import (
BoostLibraryUsageDashboardCollector,
)
Expand Down Expand Up @@ -49,7 +49,7 @@ def add_arguments(self, parser):
help="Branch to publish to (overrides BOOST_LIBRARY_USAGE_DASHBOARD_PUBLISH_BRANCH; default main).",
)

def get_collector(self, **options):
def get_collector(self, **options) -> AbstractCollector:
return BoostLibraryUsageDashboardCollector(
skip_collect=options["skip_collect"],
skip_render=options["skip_render"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@
from django.core.management import call_command
from django.utils.dateparse import parse_datetime

from core.collectors.base import CollectorBase
from core.collectors.command_base import BaseCollectorCommand
from core.collectors import AbstractCollector, BaseCollectorCommand

from boost_mailing_list_tracker.email_formatter import format_email
from boost_mailing_list_tracker.fetcher import (
Expand Down Expand Up @@ -179,7 +178,7 @@ def _process_existing_workspace_json(list_name: str) -> tuple[int, int]:
return processed, skipped


class BoostMailingListTrackerCollector(CollectorBase):
class BoostMailingListTrackerCollector(AbstractCollector):
"""Fetch mailing lists via workspace pipeline."""

def __init__(
Expand All @@ -201,7 +200,14 @@ def __init__(
self.pinecone_app_type = pinecone_app_type
self.pinecone_namespace = pinecone_namespace

def run(self) -> None:
@property
def name(self) -> str:
return "boost_mailing_list_tracker"

def validate_config(self) -> None:
return None

def collect(self) -> None:
start_date = self.start_date
end_date = self.end_date
dry_run = self.dry_run
Expand Down Expand Up @@ -360,7 +366,7 @@ def add_arguments(self, parser):
help=f"Pinecone namespace for sync. Default from env {PINECONE_NAMESPACE_ENV_KEY}.",
)

def get_collector(self, **options):
def get_collector(self, **options) -> AbstractCollector:
start_date = options["start_date"]
end_date = options["end_date"]
dry_run = options["dry_run"]
Expand Down
3 changes: 2 additions & 1 deletion core/apps.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from typing import ClassVar

from django.apps import AppConfig
from django.conf import settings
Expand All @@ -7,7 +8,7 @@


class CoreConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
default_auto_field: ClassVar[str] = "django.db.models.BigAutoField"
name = "core"
verbose_name = "Core"

Expand Down
2 changes: 1 addition & 1 deletion core/collectors/command_base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Django management command base class for CollectorBase-backed collectors."""
"""Django management command base class for CollectorRunnable-backed collectors."""

# Design notes (review summary):
# - Template method: handle() -> get_collector(**options) -> phase(run) -> phase(sync_pinecone).
Expand Down
20 changes: 11 additions & 9 deletions core/management/commands/cleanup_workspace_orphans.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import logging
import time
from pathlib import Path
from typing import Any, cast

from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
Expand Down Expand Up @@ -56,13 +57,12 @@ def handle(self, *args, **options):
)
execute = options["execute"]
root = Path(getattr(settings, "WORKSPACE_DIR", ""))
style = cast(Any, self.style)
if not root.is_dir():
self.stderr.write(
self.style.ERROR(f"WORKSPACE_DIR is not a directory: {root}")
)
self.stderr.write(style.ERROR(f"WORKSPACE_DIR is not a directory: {root}"))
return

suffix_found = self._run_suffix_scan(root, max_age, execute)
suffix_found = self._run_suffix_scan(root, max_age, execute, style)

gh_stats = None
if options["github_json_cache"]:
Expand All @@ -79,7 +79,7 @@ def handle(self, *args, **options):
)
rel = "Removed" if execute else "Would remove / logged"
self.stdout.write(
self.style.NOTICE(
style.NOTICE(
f"{rel} github_activity_tracker invalid JSON: scanned={gh_stats.scanned} "
f"removed_invalid={gh_stats.removed_invalid} "
f"quarantined={gh_stats.quarantined_invalid} "
Expand All @@ -89,13 +89,15 @@ def handle(self, *args, **options):
)

self.stdout.write(
self.style.NOTICE(
style.NOTICE(
f"{'Removed' if execute else 'Found'} {suffix_found} orphan suffix candidate(s) "
f"(suffix in {_ORPHAN_SUFFIXES}, older than {max_age}h)."
)
)

def _run_suffix_scan(self, root: Path, max_age: float, execute: bool) -> int:
def _run_suffix_scan(
self, root: Path, max_age: float, execute: bool, style: Any
) -> int:
cutoff = time.time() - max_age * 3600.0
found: list[Path] = []
for path in root.rglob("*"):
Expand All @@ -116,10 +118,10 @@ def _run_suffix_scan(self, root: Path, max_age: float, execute: bool) -> int:
if execute:
try:
p.unlink()
self.stdout.write(self.style.SUCCESS(f"deleted {rel}"))
self.stdout.write(style.SUCCESS(f"deleted {rel}"))
except OSError as e:
logger.warning("Could not delete %s: %s", p, e)
self.stderr.write(self.style.WARNING(f"skip {rel}: {e}"))
self.stderr.write(style.WARNING(f"skip {rel}: {e}"))
else:
self.stdout.write(f"would delete (dry-run): {rel}")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@

from django.core.management.base import CommandError

from core.collectors.base import CollectorBase
from core.collectors.command_base import BaseCollectorCommand
from core.collectors import AbstractCollector, BaseCollectorCommand

from cppa_pinecone_sync.ingestion import PineconeInstance
from cppa_pinecone_sync.sync import sync_to_pinecone
Expand All @@ -42,7 +41,7 @@ def _resolve_preprocessor(dotted_path: str):
return fn


class CppaPineconeSyncCollector(CollectorBase):
class CppaPineconeSyncCollector(AbstractCollector):
"""Run sync_to_pinecone for one (app_type, namespace, preprocessor)."""

def __init__(
Expand All @@ -57,20 +56,30 @@ def __init__(
self.namespace = namespace
self.preprocessor_path = preprocessor_path
self.instance = instance
self._preprocess_fn: Any = None

def run(self) -> None:
@property
def name(self) -> str:
return "cppa_pinecone_sync"

def validate_config(self) -> None:
try:
self._preprocess_fn = _resolve_preprocessor(self.preprocessor_path)
except (ValueError, ImportError) as e:
raise CommandError(str(e)) from e
Comment thread
snowfox1003 marked this conversation as resolved.

def collect(self) -> None:
logger.info(
"run_cppa_pinecone_sync: starting app_type=%s namespace=%s preprocessor=%s",
self.app_type,
self.namespace,
self.preprocessor_path,
)

preprocess_fn = _resolve_preprocessor(self.preprocessor_path)
result = sync_to_pinecone(
self.app_type,
self.namespace,
preprocess_fn,
self._preprocess_fn,
instance=self.instance,
)
logger.info(
Expand Down Expand Up @@ -118,7 +127,7 @@ def add_arguments(self, parser):
help="Pinecone API key instance to use: 'public' (default) or 'private'.",
)

def get_collector(self, **options: Any) -> CollectorBase:
def get_collector(self, **options: Any) -> AbstractCollector:
app_type = (options.get("app_type") or "").strip() or None
namespace = (options.get("namespace") or "").strip() or None
preprocessor_path = (options.get("preprocessor") or "").strip() or None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@
from django.conf import settings
from django.core.management.base import CommandError

from core.collectors.base import CollectorBase
from core.collectors.command_base import BaseCollectorCommand
from core.collectors import AbstractCollector, BaseCollectorCommand

from cppa_slack_tracker.models import SlackTeam
from cppa_slack_tracker.services import save_slack_message
Expand Down Expand Up @@ -52,7 +51,7 @@ def _parse_date(date_str: Optional[str]) -> Optional[datetime]:
return None


class CppaSlackTrackerCollector(CollectorBase):
class CppaSlackTrackerCollector(AbstractCollector):
"""Sync Slack teams, users, channels, memberships, and messages; optional Pinecone upsert."""

def __init__(
Expand All @@ -65,7 +64,14 @@ def __init__(
self.options = options
self._team: SlackTeam | None = None

def run(self) -> None:
@property
def name(self) -> str:
return "cppa_slack_tracker"

def validate_config(self) -> None:
return None

def collect(self) -> None:
dry_run = self.options.get("dry_run", False)
if dry_run:
self._print_dry_run()
Expand Down Expand Up @@ -434,7 +440,7 @@ def add_arguments(self, parser):
help="Skip Pinecone sync after message sync (default: sync to Pinecone)",
)

def get_collector(self, **options: Any) -> CollectorBase:
def get_collector(self, **options: Any) -> AbstractCollector:
team_id = (options.get("team_id") or "").strip()
if not team_id:
team_id = (getattr(settings, "SLACK_TEAM_ID", "") or "").strip()
Expand Down
16 changes: 11 additions & 5 deletions cppa_user_tracker/management/commands/run_cppa_user_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,26 @@
import logging
from typing import Any

from core.collectors.base import CollectorBase
from core.collectors.command_base import BaseCollectorCommand
from core.collectors import AbstractCollector, BaseCollectorCommand

logger = logging.getLogger(__name__)


class CppaUserTrackerCollector(CollectorBase):
class CppaUserTrackerCollector(AbstractCollector):
"""Identity/profile staging (stub until merge logic is implemented)."""

def __init__(self, *, stdout: Any, style: Any) -> None:
self.stdout = stdout
self.style = style

def run(self) -> None:
@property
def name(self) -> str:
return "cppa_user_tracker"

def validate_config(self) -> None:
return None

def collect(self) -> None:
logger.info("run_cppa_user_tracker: starting")
# Stub: add logic (stage relations, merge into Identity/BaseProfile, etc.)
self.stdout.write(self.style.SUCCESS("CPPA User Tracker completed (stub)."))
Expand All @@ -32,5 +38,5 @@ def run(self) -> None:
class Command(BaseCollectorCommand):
help = "Run the CPPA User Tracker (identity/profile staging and merge)."

def get_collector(self, **_options: Any) -> CollectorBase:
def get_collector(self, **_options: Any) -> AbstractCollector:
return CppaUserTrackerCollector(stdout=self.stdout, style=self.style)
Loading
Loading