From f4751b60b03b31ad821bf642abd7fa710afe60bb Mon Sep 17 00:00:00 2001 From: snowfox1003 Date: Tue, 19 May 2026 13:34:30 -0400 Subject: [PATCH 1/2] chore: update configuration and documentation for Celery Beat schedule --- .github/workflows/actions.yml | 4 ++ .gitignore | 3 -- Makefile | 3 +- .../tests/test_schedule_config.py | 26 ++++++++++ config/boost_collector_schedule.yaml | 50 +++++++++++++++++++ docs/Deployment.md | 6 +-- docs/How_to_add_a_collector.md | 2 +- 7 files changed, 86 insertions(+), 8 deletions(-) create mode 100644 config/boost_collector_schedule.yaml diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index a1a4afb4..b448f813 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -207,6 +207,10 @@ jobs: - name: Run migrations run: docker compose exec -T web python manage.py migrate --noinput + - name: Verify Celery Beat schedule loaded + run: | + docker compose exec -T web python manage.py shell -c "from django.conf import settings; n = len(settings.CELERY_BEAT_SCHEDULE); assert n > 0, 'CELERY_BEAT_SCHEDULE is empty'; print('Beat schedule entries:', n)" + - name: Health check run: make health diff --git a/.gitignore b/.gitignore index 5e07b92c..4d5c116b 100644 --- a/.gitignore +++ b/.gitignore @@ -49,9 +49,6 @@ discord_activity_tracker/tools/ .DS_Store ._* - -# Config -config/boost_collector_schedule.yaml # temp files temp/ nul diff --git a/Makefile b/Makefile index 5ead9d52..28667613 100644 --- a/Makefile +++ b/Makefile @@ -32,7 +32,7 @@ help: @echo "" @echo " Logs & status" @echo " ps Show running containers" - @echo " health Verify DB, Redis, Selenium, and Celery containers" + @echo " health Verify DB, Redis, Selenium, Celery Beat schedule, and containers" @echo " notify Send Slack/Discord startup notification (celery_beat; optional DEPLOY_BRANCH)" @echo " logs Follow logs for all services" @echo " logs-web Follow logs for the web service" @@ -98,6 +98,7 @@ ps: .PHONY: health health: $(COMPOSE) exec -T $(APP) python manage.py check --database default + $(COMPOSE) exec -T $(APP) python manage.py shell -c "from django.conf import settings; n = len(settings.CELERY_BEAT_SCHEDULE); assert n > 0, 'CELERY_BEAT_SCHEDULE is empty'; print('Beat schedule entries:', n)" $(COMPOSE) exec -T redis redis-cli ping | grep -q PONG $(COMPOSE) exec -T selenium curl -sf http://localhost:4444/status | grep -qE '"ready"[[:space:]]*:[[:space:]]*true' $(COMPOSE) ps --status running celery_worker | grep -q celery_worker diff --git a/boost_collector_runner/tests/test_schedule_config.py b/boost_collector_runner/tests/test_schedule_config.py index 52af630a..d5c9d12d 100644 --- a/boost_collector_runner/tests/test_schedule_config.py +++ b/boost_collector_runner/tests/test_schedule_config.py @@ -1,8 +1,11 @@ """Tests for boost_collector_runner.schedule_config: load_config, validation, get_tasks_for_schedule, get_beat_schedule.""" import calendar +from pathlib import Path + import pytest import yaml +from django.core.management import get_commands from boost_collector_runner.schedule_config import ( DEFAULT_GROUP_BATCH_SCHEDULE_KIND, @@ -729,3 +732,26 @@ def test_get_tasks_for_schedule_interval_scoped_by_group_id(tmp_path): assert len(tasks_all) == 2 commands = {t[1]["command"] for t in tasks_all} assert commands == {"interval_g1", "interval_g2"} + + +@pytest.mark.django_db +def test_committed_schedule_yaml_loads_non_empty_beat_schedule(settings): + """Repo ships config/boost_collector_schedule.yaml; Beat must not be empty on clone.""" + repo_yaml = Path(settings.BASE_DIR) / "config" / "boost_collector_schedule.yaml" + assert ( + repo_yaml.is_file() + ), "committed schedule missing; add config/boost_collector_schedule.yaml" + settings.BOOST_COLLECTOR_SCHEDULE_YAML = repo_yaml + data = load_config(repo_yaml) + registered = get_commands() + for _group_id, group_data in (data.get("groups") or {}).items(): + if not isinstance(group_data, dict): + continue + task_list = group_data.get("tasks") or [] + for task in task_list: + if not isinstance(task, dict) or task.get("enabled") is False: + continue + cmd = task.get("command") + assert cmd in registered, f"unknown management command in YAML: {cmd!r}" + schedule = get_beat_schedule() + assert schedule, "CELERY_BEAT_SCHEDULE must not be empty when committed YAML exists" diff --git a/config/boost_collector_schedule.yaml b/config/boost_collector_schedule.yaml new file mode 100644 index 00000000..fffee998 --- /dev/null +++ b/config/boost_collector_schedule.yaml @@ -0,0 +1,50 @@ +# Boost collector schedule (see docs/Workflow.md) +# default_time is in UTC (24h "HH:MM"). Beat runs at that UTC time. The default batch +# uses the UTC date for weekly/monthly eligibility so it matches the run time. +# +# Weekly tasks are supported by the runner but none are configured here yet. +# +# Schedule types: +# daily - runs at group default_time every day (part of group batch). +# weekly - runs on the given weekday at default_time (use "on" or "day_of_week": monday..sunday or mon..sun). +# monthly - runs on the given day of month at default_time (use "on" or "day_of_month": 1-31). +# on_release - runs at default_time only when a new Boost release is detected (part of group batch). +# interval - runs every N minutes (use "minutes": 1-180); one Beat entry per distinct interval_minutes. + +groups: + github: + default_time: "00:05" + tasks: + - command: run_boost_usage_tracker + schedule: daily + args: ["--task", "monitor_content"] + - command: run_update_created_repos_by_language + schedule: daily + - command: run_boost_github_activity_tracker + schedule: daily + - command: run_boost_library_usage_dashboard + schedule: daily + - command: run_boost_usage_tracker + schedule: monthly + "on": 1 + args: ["--task", "monitor_stars"] + - command: collect_boost_libraries + schedule: on_release + + boost_library_docs: + default_time: "16:20" + tasks: + - command: run_boost_library_docs_tracker + schedule: on_release + + slack: + default_time: "16:30" + tasks: + - command: run_cppa_slack_tracker + schedule: daily + + mailing_list: + default_time: "00:10" + tasks: + - command: run_boost_mailing_list_tracker + schedule: daily diff --git a/docs/Deployment.md b/docs/Deployment.md index 8c00872e..8c558bc7 100644 --- a/docs/Deployment.md +++ b/docs/Deployment.md @@ -107,11 +107,11 @@ SSH into the server as the same user GitHub Actions uses (the account named in ` cd /opt/boost-data-collector cp .env.example .env # edit .env -cp config/boost_collector_schedule.yaml.example config/boost_collector_schedule.yaml -# edit config/boost_collector_schedule.yaml ``` -Use `.env.example` and the YAML example as references for required variables and schedule entries. +The collector schedule ships in the repo as `config/boost_collector_schedule.yaml`. Adjust it via pull request if you need different times or groups; see [Workflow.md](Workflow.md). + +Use `.env.example` as a reference for required environment variables. If you create or edit `.env` with `sudo` (e.g. `sudo nano`), the file is often owned by **root** with mode `600`. **Docker Compose reads `.env` as the user running `make build` / `make up`** (your deploy user), which causes `permission denied`. Fix ownership after saving: diff --git a/docs/How_to_add_a_collector.md b/docs/How_to_add_a_collector.md index b86cabd7..4dd5ef94 100644 --- a/docs/How_to_add_a_collector.md +++ b/docs/How_to_add_a_collector.md @@ -9,7 +9,7 @@ This checklist assumes you already have a Django app (or are creating one) with ## 2. Register the command in YAML -Add a task under the right group in `config/boost_collector_schedule.yaml` (see [Workflow.md](Workflow.md#2-boost-collector-runner-and-yaml-schedule)). That file is often **local-only** (gitignored); copy from [`config/boost_collector_schedule.yaml.example`](../config/boost_collector_schedule.yaml.example) if you do not have it yet. Celery Beat runs **`boost_collector_runner.tasks.run_scheduled_collectors_task`** per group and schedule. +Add a task under the right group in `config/boost_collector_schedule.yaml` (see [Workflow.md](Workflow.md#2-boost-collector-runner-and-yaml-schedule)). That file is **committed** to the repository; [`config/boost_collector_schedule.yaml.example`](../config/boost_collector_schedule.yaml.example) only points to it. Celery Beat runs **`boost_collector_runner.tasks.run_scheduled_collectors_task`** per group and schedule. ## 3. Shared abstractions (recommended) From 68570673c1f262d096765c4e08a79473b5b9e3de Mon Sep 17 00:00:00 2001 From: snowfox1003 Date: Tue, 19 May 2026 13:46:08 -0400 Subject: [PATCH 2/2] fix: update health check and CI verification for Celery Beat schedule to handle empty schedules gracefully --- .github/workflows/actions.yml | 2 +- Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index b448f813..b05da1b3 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -209,7 +209,7 @@ jobs: - name: Verify Celery Beat schedule loaded run: | - docker compose exec -T web python manage.py shell -c "from django.conf import settings; n = len(settings.CELERY_BEAT_SCHEDULE); assert n > 0, 'CELERY_BEAT_SCHEDULE is empty'; print('Beat schedule entries:', n)" + docker compose exec -T web python manage.py shell -c "from django.conf import settings; import sys; n = len(settings.CELERY_BEAT_SCHEDULE); print('Beat schedule entries:', n); sys.exit(1 if n <= 0 else 0)" - name: Health check run: make health diff --git a/Makefile b/Makefile index 28667613..52d5efaa 100644 --- a/Makefile +++ b/Makefile @@ -98,7 +98,7 @@ ps: .PHONY: health health: $(COMPOSE) exec -T $(APP) python manage.py check --database default - $(COMPOSE) exec -T $(APP) python manage.py shell -c "from django.conf import settings; n = len(settings.CELERY_BEAT_SCHEDULE); assert n > 0, 'CELERY_BEAT_SCHEDULE is empty'; print('Beat schedule entries:', n)" + $(COMPOSE) exec -T $(APP) python manage.py shell -c "from django.conf import settings; import sys; n = len(settings.CELERY_BEAT_SCHEDULE); print('Beat schedule entries:', n); sys.exit(1 if n <= 0 else 0)" $(COMPOSE) exec -T redis redis-cli ping | grep -q PONG $(COMPOSE) exec -T selenium curl -sf http://localhost:4444/status | grep -qE '"ready"[[:space:]]*:[[:space:]]*true' $(COMPOSE) ps --status running celery_worker | grep -q celery_worker