diff --git a/.github/workflows/issue-monitor.yml b/.github/workflows/issue-monitor.yml new file mode 100644 index 0000000000..5a04a4ea6b --- /dev/null +++ b/.github/workflows/issue-monitor.yml @@ -0,0 +1,61 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: ADK Issue Monitoring Agent + +on: + schedule: + # Runs daily at 6:00 AM UTC + - cron: '0 6 * * *' + + # Allows manual triggering from the GitHub Actions tab + workflow_dispatch: + inputs: + full_scan: + description: 'Run an Initial Full Scan of ALL open issues' + required: false + type: boolean + default: false + +jobs: + sweep-spam: + runs-on: ubuntu-latest + timeout-minutes: 120 + permissions: + issues: write + contents: read + + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install requests google-adk python-dotenv + + - name: Run Issue Monitoring Agent + env: + GITHUB_TOKEN: ${{ secrets.ADK_TRIAGE_AGENT }} + GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} + OWNER: ${{ github.repository_owner }} + REPO: ${{ github.event.repository.name }} + CONCURRENCY_LIMIT: 3 + INITIAL_FULL_SCAN: ${{ github.event.inputs.full_scan == 'true' }} + run: python -m spam_sweeper_agent.main \ No newline at end of file diff --git a/contributing/samples/adk_issue_monitoring_agent/PROMPT_INSTRUCTION.txt b/contributing/samples/adk_issue_monitoring_agent/PROMPT_INSTRUCTION.txt new file mode 100644 index 0000000000..5beacb11be --- /dev/null +++ b/contributing/samples/adk_issue_monitoring_agent/PROMPT_INSTRUCTION.txt @@ -0,0 +1,18 @@ +You are the automated security and moderation agent for the {OWNER}/{REPO} repository. + +You will be provided with an Issue Number and a list of comments made by non-maintainers. +Your job is to read through these comments and identify if any of them contain SPAM, promotional content for 3rd-party websites, SEO links, or objectionable material. + +CRITERIA FOR SPAM: +- The comment is completely unrelated to the repository or the specific issue. +- The comment promotes a 3rd party product, service, or website. +- The comment is generic "SEO spam" (e.g., "Great post! Check out my site at [link]"). + +INSTRUCTIONS: +1. Evaluate the provided comments. +2. If you identify spam, call the `flag_issue_as_spam` tool. + - Pass the `item_number`. + - Pass a brief `detection_reason` explaining which comment is spam and why (e.g., "@spammer_bot posted an irrelevant link to a shoe store"). +3. If NONE of the comments contain spam, do NOT call any tools. Just respond with "No spam detected." + +Remember: Do not flag comments that are merely unhelpful, off-topic, or from beginners asking legitimate questions. Only flag actual spam, endorsements, or objectionable material. \ No newline at end of file diff --git a/contributing/samples/adk_issue_monitoring_agent/README.md b/contributing/samples/adk_issue_monitoring_agent/README.md new file mode 100644 index 0000000000..da1a5d7d4f --- /dev/null +++ b/contributing/samples/adk_issue_monitoring_agent/README.md @@ -0,0 +1,62 @@ +# ADK Issue Monitoring Agent 🛡️ + +An intelligent, cost-optimized, automated moderation agent built with the **Google Agent Development Kit (ADK)**. + +This agent automatically audits GitHub repository issues to detect SEO spam, unsolicited promotional links, and irrelevant third-party endorsements. If spam is detected, it automatically applies a `spam` label and alerts the repository maintainers. + +## ✨ Key Features & Optimizations + +* **Zero-Waste LLM Invocations:** Fetches issue comments via REST APIs and pre-filters them in Python. It automatically ignores comments from maintainers, `[bot]` accounts, and the official `adk-bot`. The Gemini LLM is never invoked for safe threads, saving 100% of the token cost. +* **Dual-Mode Scanning:** Can perform a **Deep Clean** (auditing the entire history of all open issues) or a **Daily Sweep** (only fetching issues updated within the last 24 hours). +* **Token Truncation:** Uses Regular Expressions to strip out Markdown code blocks (` ``` `) replacing them with `[CODE BLOCK REMOVED]`, and truncates unusually long text to 1,500 characters before sending it to the AI. +* **Idempotency (Anti-Double-Posting):** The bot reads the comment history for its own signature. If it has already flagged an issue, it instantly skips it, preventing infinite feedback loops. + +--- + +## Configuration + +The agent is configured via environment variables, typically set as secrets in GitHub Actions. + +### Required Secrets + +| Secret Name | Description | +| :--- | :--- | +| `GITHUB_TOKEN` | A GitHub Personal Access Token (PAT) or Service Account Token with `repo` and `issues: write` scope. | +| `GOOGLE_API_KEY` | An API key for the Google AI (Gemini) model used for reasoning. | + +### Optional Configuration + +These variables control the scanning behavior, thresholds, and model selection. + +| Variable Name | Description | Default | +| :--- | :--- | :--- | +| `INITIAL_FULL_SCAN` | If `true`, audits every open issue in the repository. If `false`, only audits issues updated in the last 24 hours. | `false` | +| `SPAM_LABEL_NAME` | The exact text of the label applied to flagged issues. | `spam` | +| `BOT_NAME` | The GitHub username of your official bot to ensure its comments are ignored. | `adk-bot` | +| `CONCURRENCY_LIMIT` | The number of issues to process concurrently. | `3` | +| `SLEEP_BETWEEN_CHUNKS` | Time in seconds to sleep between batches to respect GitHub API rate limits. | `1.5` | +| `LLM_MODEL_NAME`| The specific Gemini model version to use. | `gemini-2.5-flash` | +| `OWNER` | Repository owner (auto-detected in Actions). | (Environment dependent) | +| `REPO` | Repository name (auto-detected in Actions). | (Environment dependent) | + +--- + +## Deployment + +To deploy this agent, a GitHub Actions workflow file (`.github/workflows/issue-monitor.yml`) is recommended. + +### Directory Structure Note +Because this agent resides within the `adk-python` package structure, the workflow must ensure the script is executed correctly to handle imports. It must be run as a module from the parent directory. + +### Example Workflow Execution +```yaml + - name: Run ADK Issue Monitoring Agent + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} + OWNER: ${{ github.repository_owner }} + REPO: ${{ github.event.repository.name }} + # Mapped to the manual trigger checkbox in the GitHub UI + INITIAL_FULL_SCAN: ${{ github.event.inputs.full_scan == 'true' }} + PYTHONPATH: contributing/samples + run: python -m adk_issue_monitoring_agent.main \ No newline at end of file diff --git a/contributing/samples/adk_issue_monitoring_agent/__init__.py b/contributing/samples/adk_issue_monitoring_agent/__init__.py new file mode 100644 index 0000000000..4015e47d6e --- /dev/null +++ b/contributing/samples/adk_issue_monitoring_agent/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import agent diff --git a/contributing/samples/adk_issue_monitoring_agent/agent.py b/contributing/samples/adk_issue_monitoring_agent/agent.py new file mode 100644 index 0000000000..956f5c0c27 --- /dev/null +++ b/contributing/samples/adk_issue_monitoring_agent/agent.py @@ -0,0 +1,88 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +from typing import Any + +from adk_issue_monitoring_agent.settings import BOT_ALERT_SIGNATURE +from adk_issue_monitoring_agent.settings import GITHUB_BASE_URL +from adk_issue_monitoring_agent.settings import LLM_MODEL_NAME +from adk_issue_monitoring_agent.settings import OWNER +from adk_issue_monitoring_agent.settings import REPO +from adk_issue_monitoring_agent.settings import SPAM_LABEL_NAME +from adk_issue_monitoring_agent.utils import error_response +from adk_issue_monitoring_agent.utils import post_request +from google.adk.agents.llm_agent import Agent +from requests.exceptions import RequestException + +logger = logging.getLogger("google_adk." + __name__) + + +def load_prompt_template(filename: str) -> str: + file_path = os.path.join(os.path.dirname(__file__), filename) + with open(file_path, "r") as f: + return f.read() + + +PROMPT_TEMPLATE = load_prompt_template("PROMPT_INSTRUCTION.txt") + +# --- Tools --- + + +def flag_issue_as_spam( + item_number: int, detection_reason: str +) -> dict[str, Any]: + """ + Flags an issue as spam by adding a label and leaving a comment for maintainers. + + Args: + item_number (int): The GitHub issue number. + detection_reason (str): The explanation of what the spam is. + """ + logger.info(f"Flagging #{item_number} as SPAM. Reason: {detection_reason}") + + label_url = ( + f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}/labels" + ) + comment_url = ( + f"{GITHUB_BASE_URL}/repos/{OWNER}/{REPO}/issues/{item_number}/comments" + ) + + alert_body = ( + f"{BOT_ALERT_SIGNATURE}\n" + "@maintainers, a suspected spam comment was detected in this thread.\n\n" + f"**Reason:** {detection_reason}" + ) + + try: + # 1. Add Label + post_request(label_url, {"labels": [SPAM_LABEL_NAME]}) + # 2. Post Alert Comment + post_request(comment_url, {"body": alert_body}) + return {"status": "success", "message": "Maintainers alerted successfully."} + except RequestException as e: + return error_response(f"Error flagging issue: {e}") + + +root_agent = Agent( + model=LLM_MODEL_NAME, + name="spam_auditor_agent", + description="Audits issue comments for spam.", + instruction=PROMPT_TEMPLATE.format( + OWNER=OWNER, + REPO=REPO, + ), + tools=[flag_issue_as_spam], +) diff --git a/contributing/samples/adk_issue_monitoring_agent/main.py b/contributing/samples/adk_issue_monitoring_agent/main.py new file mode 100644 index 0000000000..cbde3a6516 --- /dev/null +++ b/contributing/samples/adk_issue_monitoring_agent/main.py @@ -0,0 +1,203 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +import logging +import re +import time +from typing import List +from typing import Tuple + +from adk_issue_monitoring_agent.agent import root_agent +from adk_issue_monitoring_agent.settings import BOT_ALERT_SIGNATURE +from adk_issue_monitoring_agent.settings import BOT_NAME +from adk_issue_monitoring_agent.settings import CONCURRENCY_LIMIT +from adk_issue_monitoring_agent.settings import OWNER +from adk_issue_monitoring_agent.settings import REPO +from adk_issue_monitoring_agent.settings import SLEEP_BETWEEN_CHUNKS +from adk_issue_monitoring_agent.utils import get_api_call_count +from adk_issue_monitoring_agent.utils import get_issue_comments +from adk_issue_monitoring_agent.utils import get_issue_details +from adk_issue_monitoring_agent.utils import get_repository_maintainers +from adk_issue_monitoring_agent.utils import get_target_issues +from adk_issue_monitoring_agent.utils import reset_api_call_count +from google.adk.cli.utils import logs +from google.adk.runners import InMemoryRunner +from google.genai import types + +logs.setup_adk_logger(level=logging.INFO) +logger = logging.getLogger("google_adk." + __name__) + +APP_NAME = "spam_bot_app" +USER_ID = "spam_bot_user" + + +async def process_single_issue( + issue_number: int, maintainers: List[str] +) -> Tuple[float, int]: + start_time = time.perf_counter() + start_api_calls = get_api_call_count() + + try: + # 1. Fetch the main issue AND the comments + issue = get_issue_details(OWNER, REPO, issue_number) + comments = get_issue_comments(OWNER, REPO, issue_number) + + user_comments = [] + + # 2. Process the ORIGINAL ISSUE DESCRIPTION first! + issue_author = issue.get("user", {}).get("login", "") + issue_body = issue.get("body") or "" + + # Only check the description if the author isn't a maintainer/bot + if ( + issue_author not in maintainers + and not issue_author.endswith("[bot]") + and issue_author != BOT_NAME + ): + cleaned_issue_body = re.sub( + r"```.*?```", "\n[CODE BLOCK REMOVED]\n", issue_body, flags=re.DOTALL + ) + if len(cleaned_issue_body) > 1500: + cleaned_issue_body = cleaned_issue_body[:1500] + "\n...[TRUNCATED]" + user_comments.append( + f"Author (Original Issue): @{issue_author}\nText:" + f" {cleaned_issue_body}\n---" + ) + + # 3. Process all the replies (comments) + for c in comments: + author = c.get("user", {}).get("login", "") + body = c.get("body") or "" + + if BOT_ALERT_SIGNATURE in body: + logger.info( + f"#{issue_number}: Spam bot already alerted maintainers previously." + " Skipping." + ) + return ( + time.perf_counter() - start_time, + get_api_call_count() - start_api_calls, + ) + + if ( + author in maintainers + or author.endswith("[bot]") + or author == BOT_NAME + ): + continue + + cleaned_body = re.sub( + r"```.*?```", "\n[CODE BLOCK REMOVED]\n", body, flags=re.DOTALL + ) + + if len(cleaned_body) > 1500: + cleaned_body = cleaned_body[:1500] + "\n...[TRUNCATED]" + + user_comments.append(f"Author: @{author}\nComment: {cleaned_body}\n---") + + # 4. Skip LLM if no user text exists + if not user_comments: + logger.debug(f"#{issue_number}: No non-maintainer text found. Skipping.") + return ( + time.perf_counter() - start_time, + get_api_call_count() - start_api_calls, + ) + + logger.info( + f"Processing Issue #{issue_number} (Found {len(user_comments)} items to" + " review)..." + ) + + # 5. Format prompt and invoke LLM + compiled_comments = "\n".join(user_comments) + prompt_text = ( + "Please review the following text for issue" + f" #{issue_number}:\n\n{compiled_comments}" + ) + + runner = InMemoryRunner(agent=root_agent, app_name=APP_NAME) + session = await runner.session_service.create_session( + user_id=USER_ID, app_name=APP_NAME + ) + prompt_message = types.Content( + role="user", parts=[types.Part(text=prompt_text)] + ) + + async for event in runner.run_async( + user_id=USER_ID, session_id=session.id, new_message=prompt_message + ): + if ( + event.content + and event.content.parts + and hasattr(event.content.parts[0], "text") + ): + text = event.content.parts[0].text + if text: + clean_text = text[:100].replace("\n", " ") + logger.info(f"#{issue_number} Decision: {clean_text}...") + + except Exception as e: + logger.error(f"Error processing issue #{issue_number}: {e}", exc_info=True) + + # Calculate duration and API calls regardless of success or failure + duration = time.perf_counter() - start_time + issue_api_calls = get_api_call_count() - start_api_calls + return duration, issue_api_calls + + +async def main(): + logger.info(f"--- Starting Spam Bot for {OWNER}/{REPO} ---") + reset_api_call_count() + + # Step 1: Fetch Maintainers + try: + maintainers = get_repository_maintainers(OWNER, REPO) + logger.info(f"Found {len(maintainers)} maintainers.") + except Exception as e: + logger.critical(f"Failed to fetch maintainers: {e}") + return + + # Step 2: Fetch target issues + try: + all_issues = get_target_issues(OWNER, REPO) + except Exception as e: + logger.critical(f"Failed to fetch issue list: {e}") + return + + total_count = len(all_issues) + if total_count == 0: + logger.info("No issues matched criteria. Run finished.") + return + + logger.info(f"Found {total_count} issues to process.") + + # Step 3: Iterate through issues async 3 at a time + for i in range(0, total_count, CONCURRENCY_LIMIT): + chunk = all_issues[i : i + CONCURRENCY_LIMIT] + logger.info(f"Processing chunk: {chunk}") + + tasks = [ + process_single_issue(issue_num, maintainers) for issue_num in chunk + ] + await asyncio.gather(*tasks) + + if (i + CONCURRENCY_LIMIT) < total_count: + await asyncio.sleep(SLEEP_BETWEEN_CHUNKS) + + logger.info(f"--- Run Finished. Total API calls: {get_api_call_count()} ---") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/contributing/samples/adk_issue_monitoring_agent/settings.py b/contributing/samples/adk_issue_monitoring_agent/settings.py new file mode 100644 index 0000000000..fbba22f904 --- /dev/null +++ b/contributing/samples/adk_issue_monitoring_agent/settings.py @@ -0,0 +1,43 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from pathlib import Path + +from dotenv import load_dotenv + +CURRENT_DIR = Path(__file__).resolve().parent +ENV_PATH = CURRENT_DIR / ".env" +load_dotenv(dotenv_path=ENV_PATH, override=True) + +GITHUB_BASE_URL = "https://api.github.com" +GITHUB_TOKEN = os.getenv("GITHUB_TOKEN") +if not GITHUB_TOKEN: + raise ValueError("GITHUB_TOKEN environment variable not set") + +OWNER = os.getenv("OWNER", "google") +REPO = os.getenv("REPO", "adk-python") +LLM_MODEL_NAME = os.getenv("LLM_MODEL_NAME", "gemini-2.5-flash") + +SPAM_LABEL_NAME = os.getenv("SPAM_LABEL_NAME", "spam") +CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 3)) +BOT_NAME = os.getenv("BOT_NAME", "adk-bot") +BOT_ALERT_SIGNATURE = os.getenv( + "BOT_ALERT_SIGNATURE", "🚨 **Automated Spam Detection Alert** 🚨" +) +SLEEP_BETWEEN_CHUNKS = float(os.getenv("SLEEP_BETWEEN_CHUNKS", 1.5)) + + +# Toggle for the initial run +INITIAL_FULL_SCAN = os.getenv("INITIAL_FULL_SCAN", "false").lower() == "true" diff --git a/contributing/samples/adk_issue_monitoring_agent/utils.py b/contributing/samples/adk_issue_monitoring_agent/utils.py new file mode 100644 index 0000000000..88b9590417 --- /dev/null +++ b/contributing/samples/adk_issue_monitoring_agent/utils.py @@ -0,0 +1,178 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import threading +from typing import Any +from typing import Dict +from typing import List + +from adk_issue_monitoring_agent.settings import GITHUB_TOKEN +from adk_issue_monitoring_agent.settings import INITIAL_FULL_SCAN +from adk_issue_monitoring_agent.settings import SPAM_LABEL_NAME +import requests +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + +logger = logging.getLogger("google_adk." + __name__) + +_api_call_count = 0 +_counter_lock = threading.Lock() + + +def get_api_call_count() -> int: + with _counter_lock: + return _api_call_count + + +def reset_api_call_count() -> None: + global _api_call_count + with _counter_lock: + _api_call_count = 0 + + +def _increment_api_call_count() -> None: + global _api_call_count + with _counter_lock: + _api_call_count += 1 + + +retry_strategy = Retry( + total=6, + backoff_factor=2, + status_forcelist=[429, 500, 502, 503, 504], + allowed_methods=["GET", "POST", "PATCH", "DELETE"], +) +adapter = HTTPAdapter(max_retries=retry_strategy) +_session = requests.Session() +_session.mount("https://", adapter) +_session.headers.update({ + "Authorization": f"token {GITHUB_TOKEN}", + "Accept": "application/vnd.github.v3+json", +}) + + +def get_request(url: str, params: Dict[str, Any] = None) -> Any: + _increment_api_call_count() + response = _session.get(url, params=params or {}, timeout=60) + response.raise_for_status() + return response.json() + + +def post_request(url: str, payload: Any) -> Any: + _increment_api_call_count() + response = _session.post(url, json=payload, timeout=60) + response.raise_for_status() + return response.json() + + +def error_response(error_message: str) -> Dict[str, Any]: + return {"status": "error", "message": error_message} + + +def get_repository_maintainers(owner: str, repo: str) -> List[str]: + """Fetches all users with push/maintain access.""" + url = f"https://api.github.com/repos/{owner}/{repo}/collaborators" + data = get_request(url, {"permission": "push"}) + return [user["login"] for user in data] + + +def get_issue_details( + owner: str, repo: str, issue_number: int +) -> Dict[str, Any]: + """Fetches the main issue object to get the original description (body).""" + url = f"https://api.github.com/repos/{owner}/{repo}/issues/{issue_number}" + return get_request(url) + + +def get_issue_comments(owner: str, repo: str, issue_number: int) -> List[Dict]: + """Fetches ALL comments for a specific issue, handling pagination.""" + url = f"https://api.github.com/repos/{owner}/{repo}/issues/{issue_number}/comments" + all_comments = [] + page = 1 + + while True: + data = get_request(url, params={"per_page": 100, "page": page}) + if not data: + break + + all_comments.extend(data) + + if len(data) < 100: + break + page += 1 + + return all_comments + + +def get_target_issues(owner: str, repo: str) -> List[int]: + """ + Fetches issues. + If INITIAL_FULL_SCAN is True, fetches ALL open issues. + If False, fetches only issues updated in the last 24 hours using the 'since' parameter. + """ + from datetime import datetime + from datetime import timedelta + from datetime import timezone + + from adk_issue_monitoring_agent.settings import INITIAL_FULL_SCAN + + url = f"https://api.github.com/repos/{owner}/{repo}/issues" + params = { + "state": "open", + "per_page": 100, + } + + if INITIAL_FULL_SCAN: + logger.info("INITIAL_FULL_SCAN is True. Fetching ALL open issues...") + else: + yesterday = (datetime.now(timezone.utc) - timedelta(days=1)).strftime( + "%Y-%m-%dT%H:%M:%SZ" + ) + params["since"] = yesterday + logger.info(f"Daily mode: Fetching issues updated since {yesterday}...") + + issue_numbers = [] + page = 1 + + while True: + params["page"] = page + try: + items = get_request(url, params=params) + + if not items: + break + + for item in items: + if "pull_request" not in item: + # Extract all the label names on this issue + current_labels = [label["name"] for label in item.get("labels", [])] + + # Only add the issue if it DOES NOT already have the spam label + if SPAM_LABEL_NAME not in current_labels: + issue_numbers.append(item["number"]) + else: + logger.debug( + f"Skipping #{item['number']} - already marked as spam." + ) + + if len(items) < 100: + break # Reached the last page + + page += 1 + except requests.exceptions.RequestException as e: + logger.error(f"Failed to fetch issues on page {page}: {e}") + break + + return issue_numbers