From bd78e76698bca3618b38905896cca53b9e19aa87 Mon Sep 17 00:00:00 2001 From: iGufrankhan Date: Sat, 15 Nov 2025 23:33:31 +0530 Subject: [PATCH 001/389] Use configured Celery concurrency values instead of hardcoded ones (#3403) Signed-off-by: iGufrankhan --- augur/application/cli/tasks.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/augur/application/cli/tasks.py b/augur/application/cli/tasks.py index b0a1ac2e6..22c20d6bb 100644 --- a/augur/application/cli/tasks.py +++ b/augur/application/cli/tasks.py @@ -13,9 +13,12 @@ from augur import instance_id from augur.application.logs import AugurLogger +from augur.application.config import AugurConfig +from augur.application.db.session import DatabaseSession from augur.application.cli import test_connection, test_db_connection from augur.application.cli.backend import clear_rabbitmq_messages, raise_open_file_limit + logger = AugurLogger("augur", reset_logfiles=False).get_logger() @click.group('celery', short_help='Commands for controlling the backend API server & data collection workers') @@ -33,10 +36,18 @@ def start(): scheduling_worker_process = None core_worker_process = None secondary_worker_process = None + + + with DatabaseSession(logger) as session: + config = AugurConfig(logger, session) + core_count = config.get_value("Celery", "core_worker_count") + secondary_count = config.get_value("Celery", "secondary_worker_count") + scheduling_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=1 -n scheduling:{uuid.uuid4().hex}@%h -Q scheduling" - core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=90 -n core:{uuid.uuid4().hex}@%h" - secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=20 -n secondary:{uuid.uuid4().hex}@%h -Q secondary" + core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_count} -n core:{uuid.uuid4().hex}@%h" + secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={secondary_count} -n secondary:{uuid.uuid4().hex}@%h -Q secondary" + scheduling_worker_process = subprocess.Popen(scheduling_worker.split(" ")) core_worker_process = subprocess.Popen(core_worker.split(" ")) From cc7d9a8161a80086849c1cfd74aa94a6b1c61ae7 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Mon, 17 Nov 2025 00:22:02 -0500 Subject: [PATCH 002/389] refactor KeyClient and KeyOrchestrator for improved clarity and documentation; add README for project overview and usage instructions Signed-off-by: Shlok Gilda --- keyman/KeyClient.py | 320 +++++++++++++++++++++------------- keyman/KeyOrchestrationAPI.py | 22 ++- keyman/Orchestrator.py | 118 +++++++++---- keyman/README.md | 173 ++++++++++++++++++ 4 files changed, 472 insertions(+), 161 deletions(-) create mode 100644 keyman/README.md diff --git a/keyman/KeyClient.py b/keyman/KeyClient.py index 21f9daa9f..8b06cb473 100644 --- a/keyman/KeyClient.py +++ b/keyman/KeyClient.py @@ -1,22 +1,28 @@ -from augur.tasks.init.redis_connection import get_redis_connection -from redis.client import PubSub -from logging import Logger from os import getpid -import time, json +import time +import json +from logging import Logger +from redis.client import PubSub +from augur.tasks.init.redis_connection import get_redis_connection from keyman.KeyOrchestrationAPI import spec, WaitKeyTimeout class KeyClient: - """ NOT THREAD SAFE! - - Only one KeyClient can exist at a time per *process*, as - the process ID is used for async communication between - the client and the orchestrator. + """Worker-side interface for requesting API keys from the orchestrator. - All functions will block indefinitely if the orchestration - server is not running or not responding. - - param platform: The default platform to use for key requests + NOT THREAD SAFE! Only one KeyClient can exist at a time per *process* + as the process ID is used for async communication between the client + and the orchestrator. + + All functions will block indefinitely if the orchestration server is + not running or not responding. + + Args: + platform: Default platform for key requests (e.g., 'github_rest') + logger: Logger instance for debugging + + Raises: + ValueError: If platform is empty or None """ def __init__(self, platform: str, logger: Logger): self.id = getpid() @@ -37,37 +43,62 @@ def __init__(self, platform: str, logger: Logger): self.platform = platform self.logger = logger - def _send(self, req_type, **kwargs): + def _send(self, req_type: str, **kwargs) -> None: + """Send a request message to the orchestrator via Redis pub/sub. + + Args: + req_type: Request type ('NEW', 'EXPIRE', 'INVALIDATE') + **kwargs: Additional message parameters (e.g., key_platform, key_str) + + Returns: + None + """ kwargs["type"] = req_type kwargs["requester_id"] = self.id - + self.stdout.publish(self.REQUEST, json.dumps(kwargs)) - def _recv(self, timeout = None): + def _recv(self, timeout: int | None = None) -> dict: + """Receive a response message from the orchestrator. + + Args: + timeout: Optional timeout in seconds for get_message() + + Returns: + dict: Parsed JSON response message from orchestrator + + Raises: + WaitKeyTimeout: If orchestrator requests client to wait + """ if timeout is not None: return self.stdin.get_message(timeout = timeout) - + stream = self.stdin.listen() - + reply = next(stream) msg = json.loads(reply["data"]) - + if "wait" in msg: raise WaitKeyTimeout(msg["wait"]) else: return msg - def request(self, platform = None) -> str: - """ Request a new key from the Orchestrator - - Will block until a key is available. Will block - *indefinitely* if no keys are available for the - requested platform. - - Optionally supply a platform string, if the default - one provided during initialization does not match - the desired platform for this request. + def request(self, platform: str | None = None) -> str: + """Request a new key from the orchestrator. + + Will block until a key is available. Will block *indefinitely* + if no keys are available for the requested platform. + + Args: + platform: Optional platform override. If None, uses the default + platform provided during initialization. + + Returns: + str: A fresh API key for the requested platform + + Raises: + Exception: If orchestrator returns invalid response format """ while True: self._send("NEW", key_platform = platform or self.platform) @@ -79,30 +110,34 @@ def request(self, platform = None) -> str: else: raise Exception(f"Invalid response type: {msg}") except WaitKeyTimeout as e: - self.logger.debug(f"NO FRESH KEYS: sleeping for {e.tiemout_seconds} seconds") - time.sleep(e.tiemout_seconds) + self.logger.debug(f"NO FRESH KEYS: sleeping for {e.timeout_seconds} seconds") + time.sleep(e.timeout_seconds) except Exception as e: - self.logger.exception("Error during key request") + self.logger.exception(f"Error during key request: {e}") time.sleep(20) - def expire(self, key: str, refresh_timestamp: int, platform: str = None) -> str: - """ Expire a key, and get a new key in return + def expire(self, key: str, refresh_timestamp: int, platform: str | None = None) -> str: + """Expire a key and get a new key in return. - Multiple expiration messages can be sent for the - same key simultaneously. The final expiration - message to be received will take precedence. - - Will block until a key is available. Will block - *indefinitely* if no keys are available for the - requested platform. + Multiple expiration messages can be sent for the same key + simultaneously. The final expiration message to be received + will take precedence. - param refresh_timestamp: The Unix timestamp denoting - when the key will become available again for new requests - - Optionally supply a platform string, if the default - one provided during initialization does not match - the desired platform for this request. The platform - given *must* match the old key, and also the new key. + Will block until a key is available. Will block *indefinitely* + if no keys are available for the requested platform. + + Args: + key: The API key to mark as temporarily expired + refresh_timestamp: Unix timestamp when the key becomes available again + platform: Optional platform override. If None, uses the default + platform. The platform given *must* match the old key and + also the new key. + + Returns: + str: A fresh API key for the requested platform + + Raises: + Exception: If orchestrator returns invalid response format """ message = { "type": "EXPIRE", @@ -116,23 +151,28 @@ def expire(self, key: str, refresh_timestamp: int, platform: str = None) -> str: time.sleep(0.1) return self.request(platform) - def invalidate(self, key: str, platform: str = None) -> str: - """ Notify the orchestration server that the given key is - no longer valid, IE: cannot be used to service any - future requests, and will not refresh. - - Multiple invalidation messages can be sent for the - same key simultaneously. The initial invalidation - message to be received will take precedence. - - Will block until a key is available. Will block - *indefinitely* if less than two remaining valid keys - were available for the given platform before invalidation. - - Optionally supply a platform string, if the default - one provided during initialization does not match - the desired platform for this request. The platform - given *must* match the old key, and also the new key. + def invalidate(self, key: str, platform: str | None = None) -> str: + """Notify the orchestration server that the given key is permanently invalid. + + The key cannot be used to service any future requests and will not refresh. + Multiple invalidation messages can be sent for the same key simultaneously. + The initial invalidation message to be received will take precedence. + + Will block until a key is available. Will block *indefinitely* if less than + two remaining valid keys were available for the given platform before + invalidation. + + Args: + key: The API key to mark as permanently invalid + platform: Optional platform override. If None, uses the default + platform. The platform given *must* match the old key and + also the new key. + + Returns: + str: A fresh API key for the requested platform + + Raises: + Exception: If orchestrator returns invalid response format """ message = { "type": "INVALIDATE", @@ -146,11 +186,13 @@ def invalidate(self, key: str, platform: str = None) -> str: return self.request(platform) class KeyPublisher: - """ NOT THREAD SAFE! - - Only one KeyPublisher can exist at a time per *process*, - as the process ID is used for async communication between - the publisher and the orchestrator. + """Admin interface for publishing/unpublishing keys to orchestrator. + + NOT THREAD SAFE! Only one KeyPublisher can exist at a time per *process* + as the process ID is used for async communication between the publisher + and the orchestrator. + + Typically used during Augur startup to load keys from database. """ def __init__(self) -> None: @@ -163,11 +205,17 @@ def __init__(self) -> None: self.stdin: PubSub = self.conn.pubsub(ignore_subscribe_messages = True) self.stdin.subscribe(f"{self.ANNOUNCE}-{self.id}") - def publish(self, key: str, platform: str): - """ Publish a key to the orchestration server - - No reply is sent, and keys are added or overwritten - silently. + def publish(self, key: str, platform: str) -> None: + """Publish a key to the orchestration server. + + No reply is sent, and keys are added or overwritten silently. + + Args: + key: The API key to publish + platform: The platform type (e.g., 'github_rest', 'gitlab_rest') + + Returns: + None """ message = { "type": "PUBLISH", @@ -177,14 +225,20 @@ def publish(self, key: str, platform: str): self.conn.publish(self.ANNOUNCE, json.dumps(message)) - def unpublish(self, key: str, platform: str): - """ Unpublish a key, and remove it from orchestration - - They key will remain in use by any workers that are currently - using it, but it will not be assigned to any new requests. - - No reply is sent, and non-existent keys or platforms are - ignored silently. + def unpublish(self, key: str, platform: str) -> None: + """Unpublish a key and remove it from orchestration. + + The key will remain in use by workers currently using it, + but won't be assigned to new requests. + + No reply is sent, and non-existent keys or platforms are ignored silently. + + Args: + key: The API key to unpublish + platform: The platform type (e.g., 'github_rest', 'gitlab_rest') + + Returns: + None """ message = { "type": "UNPUBLISH", @@ -194,20 +248,24 @@ def unpublish(self, key: str, platform: str): self.conn.publish(self.ANNOUNCE, json.dumps(message)) - def wait(self, timeout_seconds = 30, republish = False): - """ Wait for ACK from the orchestrator - - If a lot of publish or unpublish messages are waiting to - be processed, this will block until all of them have been - read. If the timeout is reached, this returns False, or if - the orchestration server acknkowledges within the time - limit, this returns True. - - If republish is True, the initial ACK request will be resent - 10 times per second until the orchestrator responds. This - should only be used to wait for the orchestrator to come - online, as it could put a lot of unnecessary messages on the - queue if the orchestrator is running, but very busy. + def wait(self, timeout_seconds: int = 30, republish: bool = False) -> bool: + """Wait for ACK from the orchestrator. + + If a lot of publish or unpublish messages are waiting to be processed, + this will block until all of them have been read. + + Args: + timeout_seconds: Maximum time to wait for ACK (default: 30) + republish: If True, resend ACK request 10 times per second until + response received. Should only be used to wait for the orchestrator + to come online, as it could put unnecessary messages on the queue + if the orchestrator is running but very busy. + + Returns: + bool: True if orchestrator acknowledged within time limit, False if timeout + + Raises: + ValueError: If timeout_seconds is negative """ if timeout_seconds < 0: raise ValueError("timeout cannot be negative") @@ -237,11 +295,14 @@ def wait(self, timeout_seconds = 30, republish = False): return False - def list_platforms(self): - """ Get a list of currently loaded orchestration platforms - - Will raise a ValueError if the orchestration server - returns a malformed response. + def list_platforms(self) -> list[str]: + """Get a list of currently loaded orchestration platforms. + + Returns: + list[str]: List of platform names (e.g., ['github_rest', 'gitlab_rest']) + + Raises: + ValueError: If the orchestration server returns a malformed response """ message = { "type": "LIST_PLATFORMS", @@ -254,7 +315,7 @@ def list_platforms(self): try: reply = json.loads(reply["data"]) - except Exception as e: + except Exception: raise ValueError("Exception during platform list decoding") if isinstance(reply, list): @@ -262,12 +323,18 @@ def list_platforms(self): raise ValueError(f"Unexpected reply during list operation: {reply}") - def list_keys(self, platform): - """ Get a list of currently loaded keys for the given platform - - Will raise a ValueError if the orchestration server - returns a malformed response, or if the platform does - not exist. + def list_keys(self, platform: str) -> list[str]: + """Get a list of currently loaded keys for the given platform. + + Args: + platform: The platform type (e.g., 'github_rest', 'gitlab_rest') + + Returns: + list[str]: List of API keys for the platform + + Raises: + ValueError: If the orchestration server returns a malformed response + or if the platform does not exist """ message = { "type": "LIST_KEYS", @@ -281,7 +348,7 @@ def list_keys(self, platform): try: reply = json.loads(reply["data"]) - except Exception as e: + except Exception: raise ValueError("Exception during key list decoding") if isinstance(reply, list): @@ -291,13 +358,18 @@ def list_keys(self, platform): else: raise ValueError(f"Unexpected reply during list operation: {reply}") - def list_invalid_keys(self, platform): - """ Get a list of currently loaded keys for the given platform, - which have been marked as invalid during runtime - - Will raise a ValueError if the orchestration server - returns a malformed response, or if the platform does - not exist. + def list_invalid_keys(self, platform: str) -> list[str]: + """Get a list of invalid keys for the given platform. + + Args: + platform: The platform type (e.g., 'github_rest', 'gitlab_rest') + + Returns: + list[str]: List of permanently invalid API keys for the platform + + Raises: + ValueError: If the orchestration server returns a malformed response + or if the platform does not exist """ message = { "type": "LIST_INVALID_KEYS", @@ -311,7 +383,7 @@ def list_invalid_keys(self, platform): try: reply = json.loads(reply["data"]) - except Exception as e: + except Exception: raise ValueError("Exception during key list decoding") if isinstance(reply, list): @@ -321,11 +393,13 @@ def list_invalid_keys(self, platform): else: raise ValueError(f"Unexpected reply during list operation: {reply}") - def shutdown(self): - """ Instruct the orchestration server to shutdown + def shutdown(self) -> None: + """Instruct the orchestration server to shutdown. + + The orchestration server will process any requests sent prior to + this message, then shut down immediately. - The orchestration server will process any requests that - were sent prior to this message, and will then shut down - immediately upon processing of the shutdown command + Returns: + None """ self.conn.publish(self.ANNOUNCE, json.dumps({"type": "SHUTDOWN"})) diff --git a/keyman/KeyOrchestrationAPI.py b/keyman/KeyOrchestrationAPI.py index 68c4ec28a..31915b7a1 100644 --- a/keyman/KeyOrchestrationAPI.py +++ b/keyman/KeyOrchestrationAPI.py @@ -1,8 +1,7 @@ -""" This is a hybrid-fixed specification +"""This is a hybrid-fixed specification -The names of the channels *MUST NOT* change, -but the channel IDs are free to +The names of the channels *MUST NOT* change, but the channel IDs are free to change. """ spec = { "channels": [ @@ -92,8 +91,21 @@ } class WaitKeyTimeout(Exception): - def __init__(self, timeout_seconds) -> None: - self.tiemout_seconds = timeout_seconds + """Raised when waiting for a key exceeds the specified timeout. + + This typically occurs when all keys for a platform are expired and none + are available within the wait period. + + Args: + timeout_seconds: Maximum wait time before raising this exception + """ + def __init__(self, timeout_seconds: int) -> None: + self.timeout_seconds = timeout_seconds class InvalidRequest(Exception): + """Raised when a request doesn't conform to the KeyOrchestrationAPI spec. + + This can occur due to unknown message type, missing required fields, + invalid field types, or malformed JSON payload. + """ pass \ No newline at end of file diff --git a/keyman/Orchestrator.py b/keyman/Orchestrator.py index ed29397a9..638e35ae7 100644 --- a/keyman/Orchestrator.py +++ b/keyman/Orchestrator.py @@ -1,5 +1,7 @@ import os -import json, random, time +import json +import random +import time from keyman.KeyOrchestrationAPI import spec, WaitKeyTimeout, InvalidRequest @@ -28,11 +30,24 @@ conn = get_redis_connection() class KeyOrchestrator: + """Central API key management server for distributed workers. + + Manages three key pools per platform: + - fresh_keys: Available for assignment to workers + - expired_keys: Rate-limited keys with refresh timestamps + - invalid_keys: Permanently bad keys (never refreshed) + + Listens on two Redis pub/sub channels: + - ANNOUNCE: Admin operations (PUBLISH, UNPUBLISH, LIST_*, SHUTDOWN) + - REQUEST: Worker operations (NEW, EXPIRE, INVALIDATE) + + Single-threaded process that handles all key requests synchronously. + """ def __init__(self) -> None: self.stdin = conn.pubsub(ignore_subscribe_messages = True) self.logger = logger - + # Load channel names and IDs from the spec for channel in spec["channels"]: # IE: self.ANNOUNCE = "augur-oauth-announce" @@ -43,35 +58,61 @@ def __init__(self) -> None: self.expired_keys: dict[str, dict[str, int]] = {} self.invalid_keys: dict[str, set[str]] = {} - def publish_key(self, key, platform): + def publish_key(self, key: str, platform: str) -> None: + """Add a key to the fresh pool for the given platform. + + Args: + key: API key string + platform: Platform identifier (e.g., 'github_rest') + """ if platform not in self.fresh_keys: self.fresh_keys[platform] = [key] self.expired_keys[platform] = {} self.invalid_keys[platform] = set() else: - self.fresh_keys[platform].append(key) + # Prevent duplicate keys from increasing selection probability + if key not in self.fresh_keys[platform]: + self.fresh_keys[platform].append(key) - def unpublish_key(self, key, platform): + def unpublish_key(self, key: str, platform: str) -> None: + """Remove a key from circulation (fresh or expired pool). + + Args: + key: API key string + platform: Platform identifier + """ if platform not in self.fresh_keys: return - + if key in self.fresh_keys[platform]: self.fresh_keys[platform].remove(key) elif key in self.expired_keys[platform]: self.expired_keys[platform].pop(key) - def expire_key(self, key, platform, timeout): - if not platform in self.fresh_keys or not key in self.fresh_keys[platform]: + def expire_key(self, key: str, platform: str, timeout: int) -> None: + """Move key from fresh to expired pool with refresh timestamp. + + Args: + key: API key string + platform: Platform identifier + timeout: Unix timestamp when key becomes fresh again + """ + if platform not in self.fresh_keys or key not in self.fresh_keys[platform]: return - - self.fresh_keys[platform].remove(key) + self.fresh_keys[platform].remove(key) self.expired_keys[platform][key] = timeout - def invalidate_key(self, key, platform): - if not platform in self.fresh_keys: + def invalidate_key(self, key: str, platform: str) -> None: + """Permanently invalidate a key (typically due to 401 response). + + Args: + key: API key string + platform: Platform identifier + """ + if platform not in self.fresh_keys: return - + if key in self.fresh_keys[platform]: self.fresh_keys[platform].remove(key) self.logger.debug("Invalidating fresh key") @@ -83,7 +124,8 @@ def invalidate_key(self, key, platform): self.invalid_keys[platform].add(key) - def refresh_keys(self): + def refresh_keys(self) -> None: + """Move expired keys back to fresh pool if their timeout has passed.""" curr_time = time.time() for platform in self.expired_keys: @@ -97,27 +139,40 @@ def refresh_keys(self): self.fresh_keys[platform].append(key) self.expired_keys[platform].pop(key) - def new_key(self, platform): - if not platform in self.fresh_keys: + def new_key(self, platform: str) -> str | None: + """Get a random fresh key for the platform, or raise WaitKeyTimeout. + + Args: + platform: Platform identifier + + Returns: + Random key from fresh pool, or None if no keys published + + Raises: + InvalidRequest: If platform doesn't exist + WaitKeyTimeout: If no fresh keys available (includes wait duration) + """ + if platform not in self.fresh_keys: raise InvalidRequest(f"Invalid platform: {platform}") - + if not len(self.fresh_keys[platform]): if not len(self.expired_keys[platform]): self.logger.warning(f"Key was requested for {platform}, but none are published") return - - min = 0 + + min_timeout = 0 for key, timeout in self.expired_keys[platform].items(): - if not min or timeout < min: - min = timeout + if not min_timeout or timeout < min_timeout: + min_timeout = timeout - delta = int(min - time.time()) + delta = int(min_timeout - time.time()) raise WaitKeyTimeout(delta + 5 if delta > 0 else 5) - + return random.choice(self.fresh_keys[platform]) - def run(self): + def run(self) -> None: + """Main event loop - listens for Redis pub/sub messages and processes requests.""" self.logger.info("Ready") for msg in self.stdin.listen(): try: @@ -140,12 +195,9 @@ def run(self): self.logger.exception(e) continue - """ For performance reasons: - - Instead of dynamically checking that the - given channel matches one that we're - listening for, just check against each - channel that we have actions prepared for. + """For performance reasons: Instead of dynamically checking that the + given channel matches one that we're listening for, just check against each + channel that we have actions prepared for. """ if channel == self.ANNOUNCE: if "requester_id" in request: @@ -174,7 +226,7 @@ def run(self): return except KeyboardInterrupt: break - except Exception as e: + except Exception: # This is a bare exception, because we don't really care why failure happened self.logger.exception("Error during ANNOUNCE") continue @@ -197,12 +249,12 @@ def run(self): except KeyboardInterrupt: break except WaitKeyTimeout as w: - timeout = w.tiemout_seconds + timeout = w.timeout_seconds conn.publish(stdout, json.dumps({ "wait": timeout })) continue - except Exception as e: + except Exception: # This is a bare exception, because we don't really care why failure happened self.logger.exception("Error during REQUEST") continue diff --git a/keyman/README.md b/keyman/README.md new file mode 100644 index 000000000..bf29ca25b --- /dev/null +++ b/keyman/README.md @@ -0,0 +1,173 @@ +# keyman + +Centralized API key orchestration system for managing rate-limited API keys across distributed Celery workers. + +## Overview + +keyman coordinates API key distribution and rate limit tracking between a central orchestrator and multiple worker processes via Redis pub/sub. + +**Key features:** +- Round-robin key distribution (random selection) +- Automatic rate limit tracking and key refresh +- Support for multiple platforms (GitHub REST/GraphQL/Search, GitLab) +- Duplicate key prevention + +## Architecture + +``` +┌──────────────┐ +│ Orchestrator │ ← Single process managing all keys +└──────┬───────┘ + │ Redis pub/sub + ↓ +┌──────────────┐ +│ KeyClient │ ← One per worker per platform +└──────────────┘ +``` + +### Components + +**KeyOrchestrator** (`Orchestrator.py`) +- Central key manager (single process) +- Maintains fresh, expired, and invalid key pools +- Listens on Redis channels for key requests + +**KeyClient** (`KeyClient.py`) +- Worker-side interface for requesting keys +- NOT thread-safe (uses process ID for channels) +- Blocks until keys are available + +**KeyPublisher** (`KeyClient.py`) +- Admin interface for publishing/unpublishing keys +- Used during Augur startup to load keys from database + +## Usage + +### Request a key (worker) + +```python +from keyman.KeyClient import KeyClient + +# Initialize once per process +client = KeyClient("github_rest", logger) + +# Request a key (blocks if none available) +key = client.request() + +# Use key for API call... + +# Expire key when rate limited +epoch_reset = int(response.headers["X-RateLimit-Reset"]) +new_key = client.expire(key, epoch_reset) + +# Invalidate key on 401 +new_key = client.invalidate(key) +``` + +### Publish keys (startup) + +```python +from keyman.KeyClient import KeyPublisher + +pub = KeyPublisher() + +# Add key to orchestrator +pub.publish("ghp_abc123", "github_rest") + +# Check health +if pub.wait(timeout_seconds=30, republish=True): + print("Orchestrator ready") +``` + +## Supported Platforms + +| Platform | Use Case | Rate Limit | +|----------|----------|------------| +| `github_rest` | GitHub REST API v3 | 5000 req/hour | +| `github_graphql` | GitHub GraphQL API v4 | 5000 points/hour | +| `github_search` | GitHub Search API | 30 req/min | +| `gitlab_rest` | GitLab REST API | Varies | + +**Note**: Same GitHub token is published to all three `github_*` platforms because GitHub enforces separate rate limits for each API type. + +## Key States + +**Fresh** → Available for assignment to workers +**Expired** → Rate limited, will refresh when timeout passes +**Invalid** → Permanently bad (401), never refreshed + +## Redis Channels + +**`augur-oauth-announce`** - Admin operations (PUBLISH, UNPUBLISH, SHUTDOWN) +**`worker-oath-request`** - Worker operations (NEW, EXPIRE, INVALIDATE) + +Responses sent to `{channel}-{process_id}` + +## Starting the Orchestrator + +The orchestrator is started automatically by Augur backend: + +```python +# In augur/application/cli/backend.py +orchestrator = subprocess.Popen("python keyman/Orchestrator.py".split()) +``` + +For manual testing: +```bash +python keyman/Orchestrator.py +``` + +## Adding Keys + +### Database (recommended) + +```sql +INSERT INTO augur_operations.worker_oauth +(name, consumer_key, consumer_secret, access_token, access_token_secret, platform) +VALUES +('My GitHub Key', 'not_used', 'not_used', 'ghp_YOURTOKEN', 'not_used', 'github'); +``` + +Keys are loaded on Augur startup and published to orchestrator. + +### Config file (single key only) + +```json +{ + "Keys": { + "github_api_key": "ghp_YOURTOKEN" + } +} +``` + +## Troubleshooting + +**Workers hang indefinitely** +- Check orchestrator is running: `ps aux | grep Orchestrator.py` +- Check Redis connectivity +- Verify keys exist: `pub.list_keys("github_rest")` + +**All keys expired** +- Check rate limit reset times in GitHub response headers +- Add more keys to database +- Wait for keys to refresh automatically + +**Keys not loading on startup** +- Verify `worker_oauth` table has keys +- Check `GithubApiKeyHandler` logs for validation errors +- Ensure Redis is accessible + +## Files + +| File | Purpose | +|------|---------| +| `Orchestrator.py` | Central key manager | +| `KeyClient.py` | Worker + admin interfaces | +| `KeyOrchestrationAPI.py` | Protocol specification | + +## Limitations + +- NOT thread-safe (uses process IDs) +- No persistence (state lost on orchestrator restart) +- Blocks indefinitely if no keys available +- Single orchestrator (no clustering/HA) From 09bee792bfce1621a16d4fcf46b56cafe321c6b3 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Mon, 17 Nov 2025 00:31:04 -0500 Subject: [PATCH 003/389] streamline keyman documentation Signed-off-by: Shlok Gilda --- keyman/README.md | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/keyman/README.md b/keyman/README.md index bf29ca25b..108d13e3b 100644 --- a/keyman/README.md +++ b/keyman/README.md @@ -119,8 +119,6 @@ python keyman/Orchestrator.py ## Adding Keys -### Database (recommended) - ```sql INSERT INTO augur_operations.worker_oauth (name, consumer_key, consumer_secret, access_token, access_token_secret, platform) @@ -130,16 +128,6 @@ VALUES Keys are loaded on Augur startup and published to orchestrator. -### Config file (single key only) - -```json -{ - "Keys": { - "github_api_key": "ghp_YOURTOKEN" - } -} -``` - ## Troubleshooting **Workers hang indefinitely** From 29848c6edcc57cf253f58f9d09479f29f47828cc Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Mon, 17 Nov 2025 00:35:54 -0500 Subject: [PATCH 004/389] refactor KeyOrchestrator: simplify loop variable in expire_key method Signed-off-by: Shlok Gilda --- keyman/Orchestrator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keyman/Orchestrator.py b/keyman/Orchestrator.py index 638e35ae7..3c9a68ff9 100644 --- a/keyman/Orchestrator.py +++ b/keyman/Orchestrator.py @@ -161,7 +161,7 @@ def new_key(self, platform: str) -> str | None: return min_timeout = 0 - for key, timeout in self.expired_keys[platform].items(): + for _, timeout in self.expired_keys[platform].items(): if not min_timeout or timeout < min_timeout: min_timeout = timeout From 4209044a3d85cbd45890f83426834c496f6cabf0 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Mon, 17 Nov 2025 09:49:43 -0500 Subject: [PATCH 005/389] fix README: update GitHub key entry to specify 'github_rest' platform Signed-off-by: Shlok Gilda --- keyman/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keyman/README.md b/keyman/README.md index 108d13e3b..77f8a5130 100644 --- a/keyman/README.md +++ b/keyman/README.md @@ -123,7 +123,7 @@ python keyman/Orchestrator.py INSERT INTO augur_operations.worker_oauth (name, consumer_key, consumer_secret, access_token, access_token_secret, platform) VALUES -('My GitHub Key', 'not_used', 'not_used', 'ghp_YOURTOKEN', 'not_used', 'github'); +('My GitHub Key', 'not_used', 'not_used', 'ghp_YOURTOKEN', 'not_used', 'github_rest'); ``` Keys are loaded on Augur startup and published to orchestrator. From 0a230241809cc9ade1f7867a7dacec60918366fd Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Thu, 18 Dec 2025 12:50:35 -0500 Subject: [PATCH 006/389] revert simplify key addition logic and remove duplicate key prevention note from README Signed-off-by: Shlok Gilda --- keyman/Orchestrator.py | 4 +--- keyman/README.md | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/keyman/Orchestrator.py b/keyman/Orchestrator.py index 3c9a68ff9..220776063 100644 --- a/keyman/Orchestrator.py +++ b/keyman/Orchestrator.py @@ -70,9 +70,7 @@ def publish_key(self, key: str, platform: str) -> None: self.expired_keys[platform] = {} self.invalid_keys[platform] = set() else: - # Prevent duplicate keys from increasing selection probability - if key not in self.fresh_keys[platform]: - self.fresh_keys[platform].append(key) + self.fresh_keys[platform].append(key) def unpublish_key(self, key: str, platform: str) -> None: """Remove a key from circulation (fresh or expired pool). diff --git a/keyman/README.md b/keyman/README.md index 77f8a5130..d864fee4f 100644 --- a/keyman/README.md +++ b/keyman/README.md @@ -10,7 +10,6 @@ keyman coordinates API key distribution and rate limit tracking between a centra - Round-robin key distribution (random selection) - Automatic rate limit tracking and key refresh - Support for multiple platforms (GitHub REST/GraphQL/Search, GitLab) -- Duplicate key prevention ## Architecture From 7b7277e3606c541149c4a37edf8ab8f3f340b23f Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Fri, 19 Dec 2025 21:35:15 -0500 Subject: [PATCH 007/389] refactor: simplify message handling in KeyClient Signed-off-by: Shlok Gilda --- keyman/KeyClient.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/keyman/KeyClient.py b/keyman/KeyClient.py index 8b06cb473..68249f386 100644 --- a/keyman/KeyClient.py +++ b/keyman/KeyClient.py @@ -81,8 +81,7 @@ def _recv(self, timeout: int | None = None) -> dict: if "wait" in msg: raise WaitKeyTimeout(msg["wait"]) - else: - return msg + return msg def request(self, platform: str | None = None) -> str: """Request a new key from the orchestrator. @@ -106,9 +105,7 @@ def request(self, platform: str | None = None) -> str: msg = self._recv() if "key" in msg: return msg["key"] - - else: - raise Exception(f"Invalid response type: {msg}") + raise Exception(f"Invalid response type: {msg}") except WaitKeyTimeout as e: self.logger.debug(f"NO FRESH KEYS: sleeping for {e.timeout_seconds} seconds") time.sleep(e.timeout_seconds) From dd72320f4174b216036dd8d3795e349404cf72af Mon Sep 17 00:00:00 2001 From: Dhanesh Kolu Date: Wed, 31 Dec 2025 06:47:39 +0530 Subject: [PATCH 008/389] docs: minor README cleanup and clarification Signed-off-by: Dhanesh Kolu --- README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index e59180de0..bd6198673 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ Augur is now releasing a dramatically improved new version. It is also available - The `release` branch is a stable version of our new architecture, which features: - Dramatic improvement in the speed of large scale data collection (100,000+ repos). All data is obtained for 100k+ repos within 2 weeks. - A new job management architecture that uses Celery and Redis to manage queues, and enables users to run a Flower job monitoring dashboard. - - Materialized views to increase the snappiness of API’s and Frontends on large scale data. + - Materialized views to increase the snappiness of APIs and Frontends on large scale data. - Changes to primary keys, which now employ a UUID strategy that ensures unique keys across all Augur instances. - Support for [8knot](https://github.com/oss-aspen/8knot) dashboards (view a sample [here](https://eightknot.osci.io/)). *beautification coming soon!* @@ -43,14 +43,15 @@ For more information on [how to get involved on the CHAOSS website](https://chao ## Collecting Data -Augur supports ```Python3.7``` through ```Python3.11``` on all platforms. ```Python3.12``` and above do not yet work because of machine learning worker dependencies. On OSX, you can create a ```Python3.11``` environment, by running: -``` -$ python3.11 -m venv path/to/venv +Augur supports ```Python 3.7``` through ```Python 3.11``` on all platforms. ```Python 3.12``` and above do not yet work because of machine learning worker dependencies. On OSX, you can create a ```Python 3.11``` environment by running: + +```bash +$ python3.11 -m venv path/to/venv ``` Augur's main focus is to measure the overall health and sustainability of open source projects. -Augur collects more data about open source software projects than any other available software. Augur's main focus is to measure the overall health and sustainability of open source projects. +Augur collects more data about open source software projects than any other available software. One of Augur's core tenets is a desire to openly gather data that people can trust, and then provide useful and well-defined metrics that help give important context to the larger stories being told by that data. From 080f01d905f75b993a51ac683ded9468fac2ddcc Mon Sep 17 00:00:00 2001 From: Dhanesh Kolu Date: Sat, 3 Jan 2026 07:43:48 +0530 Subject: [PATCH 009/389] docs: fix README markdown and revert python formatting Signed-off-by: Dhanesh Kolu --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index bd6198673..6db9812d0 100644 --- a/README.md +++ b/README.md @@ -43,10 +43,12 @@ For more information on [how to get involved on the CHAOSS website](https://chao ## Collecting Data -Augur supports ```Python 3.7``` through ```Python 3.11``` on all platforms. ```Python 3.12``` and above do not yet work because of machine learning worker dependencies. On OSX, you can create a ```Python 3.11``` environment by running: +Augur supports ```Python3.7``` through ```Python3.11``` on all platforms. +```Python3.12``` and above do not yet work because of machine learning worker dependencies. +On OSX, you can create a ```Python3.11``` environment by running: ```bash -$ python3.11 -m venv path/to/venv +python3.11 -m venv path/to/venv ``` Augur's main focus is to measure the overall health and sustainability of open source projects. From 099ddeea3943e62ca0f2f9464ba8e7d398bcaced Mon Sep 17 00:00:00 2001 From: Dhanesh Kolu Date: Tue, 6 Jan 2026 07:25:56 +0530 Subject: [PATCH 010/389] docs: revert python line wrapping to avoid diff noise Signed-off-by: Dhanesh Kolu --- README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index 6db9812d0..009d8d8c7 100644 --- a/README.md +++ b/README.md @@ -43,9 +43,7 @@ For more information on [how to get involved on the CHAOSS website](https://chao ## Collecting Data -Augur supports ```Python3.7``` through ```Python3.11``` on all platforms. -```Python3.12``` and above do not yet work because of machine learning worker dependencies. -On OSX, you can create a ```Python3.11``` environment by running: +Augur supports ```Python3.7``` through ```Python3.11``` on all platforms. ```Python3.12``` and above do not yet work because of machine learning worker dependencies. On OSX, you can create a ```Python3.11``` environment, by running: ```bash python3.11 -m venv path/to/venv From 7f95617c02fbb3c0784799c8b5e08ec3e5e58d38 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Wed, 7 Jan 2026 01:40:34 -0500 Subject: [PATCH 011/389] update WaitKeyTimeout exception docstring Signed-off-by: Shlok Gilda --- keyman/KeyOrchestrationAPI.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/keyman/KeyOrchestrationAPI.py b/keyman/KeyOrchestrationAPI.py index 31915b7a1..f4b905df4 100644 --- a/keyman/KeyOrchestrationAPI.py +++ b/keyman/KeyOrchestrationAPI.py @@ -91,13 +91,14 @@ } class WaitKeyTimeout(Exception): - """Raised when waiting for a key exceeds the specified timeout. + """Raised when the Key Orchestrator returns a 'wait' message. - This typically occurs when all keys for a platform are expired and none - are available within the wait period. + This indicates that there are no fresh keys available for the requested + platform. Args: - timeout_seconds: Maximum wait time before raising this exception + timeout_seconds: How long the client needs to sleep (in seconds) + before a fresh key will become available. """ def __init__(self, timeout_seconds: int) -> None: self.timeout_seconds = timeout_seconds From b02b5858511ccac2c9e3d9d1d55c724d34362a68 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Wed, 7 Jan 2026 17:19:14 -0500 Subject: [PATCH 012/389] Normalize hardcoded batch sizes to named constants Convert inline magic numbers to named constants at top of files: - ISSUE_BATCH_SIZE = 1000 (issues.py) - EVENT_BATCH_SIZE = 500 (events.py) - MESSAGE_BATCH_SIZE = 20 (messages.py) - PR_BATCH_SIZE, PR_REVIEW_COMMENT_BATCH_SIZE, PR_REVIEW_BATCH_SIZE = 1000 (pull_requests/tasks.py) - PR_COMMIT_BATCH_SIZE = 1000 (pull_requests/commits_model/core.py) - PR_FILE_BATCH_SIZE = 1000 (pull_requests/files_model/core.py) - FACADE_CONTRIBUTOR_BATCH_SIZE = 1000 (facade_github/tasks.py) No functional changes - same values as before. This prepares for adding configurable batch sizes via config. Refs: #3515 Signed-off-by: Shlok Gilda --- augur/tasks/github/events.py | 9 ++++++--- augur/tasks/github/facade_github/tasks.py | 10 ++++++---- augur/tasks/github/issues.py | 6 ++++-- augur/tasks/github/messages.py | 8 ++++++-- .../github/pull_requests/commits_model/core.py | 7 +++++-- .../github/pull_requests/files_model/core.py | 7 +++++-- augur/tasks/github/pull_requests/tasks.py | 15 +++++++++------ 7 files changed, 41 insertions(+), 21 deletions(-) diff --git a/augur/tasks/github/events.py b/augur/tasks/github/events.py index 38a5e9e9c..94b823fc8 100644 --- a/augur/tasks/github/events.py +++ b/augur/tasks/github/events.py @@ -17,6 +17,9 @@ from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts, get_issues_by_repo_id, get_pull_requests_by_repo_id, update_issue_closed_cntrbs_by_repo_id, get_session, get_engine, get_core_data_last_collected, batch_insert_contributors +# Batch size for processing events - smaller than issues/PRs due to higher processing overhead per event +EVENT_BATCH_SIZE = 500 + platform_id = 1 @celery.task(base=AugurCoreRepoCollectionTask) @@ -120,7 +123,7 @@ def collect(self, repo_git, key_auth, since): events.append(event) # making this a decent size since process_events retrieves all the issues and prs each time - if len(events) >= 500: + if len(events) >= EVENT_BATCH_SIZE: self._process_events(events, repo_id) events.clear() @@ -327,7 +330,7 @@ def _collect_and_process_issue_events(self, owner, repo, repo_id, key_auth, sinc except UrlNotFoundException as e: self._logger.info(f"{self.repo_identifier}: Issue with number of {issue_number} returned 404 on event data. Skipping.") - if len(events) > 500: + if len(events) >= EVENT_BATCH_SIZE: self._insert_contributors(contributors) self._insert_issue_events(events) events.clear() @@ -389,7 +392,7 @@ def _collect_and_process_pr_events(self, owner, repo, repo_id, key_auth, since): self._logger.info(f"{self.repo_identifier}: PR with number of {pr_number} returned 404 on event data. Skipping.") continue - if len(events) > 500: + if len(events) >= EVENT_BATCH_SIZE: self._insert_contributors(contributors) self._insert_pr_events(events) events.clear() diff --git a/augur/tasks/github/facade_github/tasks.py b/augur/tasks/github/facade_github/tasks.py index 53a3d6648..ca099a423 100644 --- a/augur/tasks/github/facade_github/tasks.py +++ b/augur/tasks/github/facade_github/tasks.py @@ -11,6 +11,10 @@ from augur.tasks.git.util.facade_worker.facade_worker.facade00mainprogram import * +# Batch size for facade contributor processing +FACADE_CONTRIBUTOR_BATCH_SIZE = 1000 + + def process_commit_metadata(logger, auth, contributorQueue, repo_id, platform_id): github_data_access = GithubDataAccess(auth, logger) @@ -266,12 +270,11 @@ def insert_facade_contributors(self, repo_git): # Process results in batches to reduce memory usage batch = [] - BATCH_SIZE = 1000 for row in rows: batch.append(dict(row)) - if len(batch) >= BATCH_SIZE: + if len(batch) >= FACADE_CONTRIBUTOR_BATCH_SIZE: process_commit_metadata(logger, key_auth, batch, repo_id, platform_id) batch.clear() @@ -322,12 +325,11 @@ def insert_facade_contributors(self, repo_git): # Process results in batches to reduce memory usage batch = [] - BATCH_SIZE = 1000 for row in rows: batch.append(dict(row)) - if len(batch) >= BATCH_SIZE: + if len(batch) >= FACADE_CONTRIBUTOR_BATCH_SIZE: link_commits_to_contributor(logger, facade_helper, batch) batch.clear() diff --git a/augur/tasks/github/issues.py b/augur/tasks/github/issues.py index 91e56deaf..adaef4d7b 100644 --- a/augur/tasks/github/issues.py +++ b/augur/tasks/github/issues.py @@ -17,6 +17,9 @@ from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts, get_core_data_last_collected, batch_insert_contributors +# Batch size for processing issues - controls memory usage during collection +ISSUE_BATCH_SIZE = 1000 + development = get_development_flag() @celery.task(base=AugurCoreRepoCollectionTask) @@ -56,12 +59,11 @@ def collect_issues(repo_git: str, full_collection: bool) -> int: # Process issues in batches to avoid memory spikes batch = [] total_issues = 0 - batch_size = 1000 for issue in issue_data_generator: batch.append(issue) - if len(batch) >= batch_size: + if len(batch) >= ISSUE_BATCH_SIZE: logger.info(f"{owner}/{repo}: Processing batch of {len(batch)} issues (total so far: {total_issues + len(batch)})") process_issues(batch, f"{owner}/{repo}: Issue task", repo_id, logger) total_issues += len(batch) diff --git a/augur/tasks/github/messages.py b/augur/tasks/github/messages.py index 812af0fad..1916cf3d1 100644 --- a/augur/tasks/github/messages.py +++ b/augur/tasks/github/messages.py @@ -13,6 +13,10 @@ from augur.application.db.lib import get_core_data_last_collected from sqlalchemy.sql import text + +# Batch size for processing messages - smaller due to large text content per message +MESSAGE_BATCH_SIZE = 20 + platform_id = 1 @celery.task(base=AugurCoreRepoCollectionTask) @@ -123,8 +127,8 @@ def process_large_issue_and_pr_message_collection(repo_id, repo_git: str, logger except UrlNotFoundException: logger.info(f"{task_name}: PR or issue comment url of {comment_url} returned 404. Skipping.") skipped_urls += 1 - - if len(all_data) >= 20: + + if len(all_data) >= MESSAGE_BATCH_SIZE: process_messages(all_data, task_name, repo_id, logger, augur_db) all_data.clear() diff --git a/augur/tasks/github/pull_requests/commits_model/core.py b/augur/tasks/github/pull_requests/commits_model/core.py index 83b283bb6..0f39190b9 100644 --- a/augur/tasks/github/pull_requests/commits_model/core.py +++ b/augur/tasks/github/pull_requests/commits_model/core.py @@ -6,6 +6,10 @@ from augur.application.db.lib import get_secondary_data_last_collected, get_updated_prs +# Batch size for PR commit collection +PR_COMMIT_BATCH_SIZE = 1000 + + def pull_request_commits_model(repo_id,logger, augur_db, key_auth, full_collection=False): if full_collection: @@ -44,7 +48,6 @@ def pull_request_commits_model(repo_id,logger, augur_db, key_auth, full_collecti github_data_access = GithubDataAccess(key_auth, logger) - BATCH_SIZE = 1000 pr_commits_natural_keys = ["pull_request_id", "repo_id", "pr_cmt_sha"] all_data = [] for index,pr_info in enumerate(pr_urls): @@ -73,7 +76,7 @@ def pull_request_commits_model(repo_id,logger, augur_db, key_auth, full_collecti } all_data.append(pr_commit_row) - if len(all_data) >= BATCH_SIZE: + if len(all_data) >= PR_COMMIT_BATCH_SIZE: logger.info(f"{task_name}: Inserting {len(all_data)} rows") augur_db.insert_data(all_data,PullRequestCommit,pr_commits_natural_keys) all_data.clear() diff --git a/augur/tasks/github/pull_requests/files_model/core.py b/augur/tasks/github/pull_requests/files_model/core.py index 60222a3bc..510025c9b 100644 --- a/augur/tasks/github/pull_requests/files_model/core.py +++ b/augur/tasks/github/pull_requests/files_model/core.py @@ -6,6 +6,10 @@ from augur.application.db.lib import get_secondary_data_last_collected, get_updated_prs +# Batch size for PR file collection +PR_FILE_BATCH_SIZE = 1000 + + def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection=False): if full_collection: @@ -40,7 +44,6 @@ def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection github_graphql_data_access = GithubGraphQlDataAccess(key_auth, logger) - BATCH_SIZE = 1000 pr_file_natural_keys = ["pull_request_id", "repo_id", "pr_file_path"] pr_file_rows = [] logger.info(f"Getting pull request files for repo: {repo.repo_git}") @@ -95,7 +98,7 @@ def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection pr_file_rows.append(data) - if len(pr_file_rows) >= BATCH_SIZE: + if len(pr_file_rows) >= PR_FILE_BATCH_SIZE: logger.info(f"{task_name}: Inserting {len(pr_file_rows)} rows") augur_db.insert_data(pr_file_rows, PullRequestFile, pr_file_natural_keys) pr_file_rows.clear() diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index 3d9f0a4a2..8322690c9 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -19,6 +19,11 @@ from typing import List +# Batch sizes for PR-related data collection +PR_BATCH_SIZE = 1000 +PR_REVIEW_COMMENT_BATCH_SIZE = 1000 +PR_REVIEW_BATCH_SIZE = 1000 + platform_id = 1 @celery.task(base=AugurCoreRepoCollectionTask) @@ -44,10 +49,10 @@ def collect_pull_requests(repo_git: str, full_collection: bool) -> int: total_count = 0 all_data = [] for pr in retrieve_all_pr_data(repo_git, logger, manifest.key_auth, core_data_last_collected): - + all_data.append(pr) - if len(all_data) >= 1000: + if len(all_data) >= PR_BATCH_SIZE: process_pull_requests(all_data, f"{owner}/{repo}: Github Pr task", repo_id, logger, augur_db) total_count += len(all_data) all_data.clear() @@ -249,7 +254,6 @@ def collect_pull_request_review_comments(repo_git: str, full_collection: bool) - github_data_access = GithubDataAccess(key_auth, logger) # Batch processing: accumulate comments until batch size reached, then flush - COMMENT_BATCH_SIZE = 1000 contributors = [] pr_review_comment_dicts = [] pr_review_msg_mapping_data = {} @@ -271,7 +275,7 @@ def collect_pull_request_review_comments(repo_git: str, full_collection: bool) - pr_review_msg_mapping_data[comment["id"]] = comment # Flush batch when threshold reached (check both to prevent unbounded growth) - if len(pr_review_comment_dicts) >= COMMENT_BATCH_SIZE or len(contributors) >= COMMENT_BATCH_SIZE: + if len(pr_review_comment_dicts) >= PR_REVIEW_COMMENT_BATCH_SIZE or len(contributors) >= PR_REVIEW_COMMENT_BATCH_SIZE: refs_inserted = _flush_pr_review_comment_batch( logger, contributors, pr_review_comment_dicts, pr_review_msg_mapping_data, pr_review_id_mapping, repo_id, tool_version, data_source, owner, repo @@ -485,7 +489,6 @@ def collect_pull_request_reviews(repo_git: str, full_collection: bool) -> None: github_data_access = GithubDataAccess(manifest.key_auth, logger) # Batch processing: accumulate reviews until batch size reached, then flush - REVIEW_BATCH_SIZE = 1000 contributors = [] pr_review_dicts = [] total_reviews_collected = 0 @@ -520,7 +523,7 @@ def collect_pull_request_reviews(repo_git: str, full_collection: bool) -> None: ) # Flush batch when threshold reached - if len(pr_review_dicts) >= REVIEW_BATCH_SIZE: + if len(pr_review_dicts) >= PR_REVIEW_BATCH_SIZE: _flush_pr_review_batch(augur_db, contributors, pr_review_dicts, logger, owner, repo) total_reviews_collected += len(pr_review_dicts) contributors.clear() From 0f2c3e24256b05bc9e1dc3bc6b8d655ec9841d68 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Wed, 7 Jan 2026 17:27:07 -0500 Subject: [PATCH 013/389] Add configurable batch sizes with hybrid approach Add 3 config values to Tasks section: - default_batch_size: 1000 (used by most tasks) - github_event_batch_size: 500 (events need more processing per item) - github_message_batch_size: 20 (messages have large text content) Add get_batch_size() helper in db/lib.py that: - Returns specific override if task_type matches a config key - Falls back to default_batch_size otherwise - Hardcoded fallback to 1000 if config unavailable Update all task files to use config-based batch sizes: - issues.py, pull_requests/tasks.py, commits_model/core.py, files_model/core.py, facade_github/tasks.py: use default - events.py: uses github_event_batch_size (500) - messages.py: uses github_message_batch_size (20) Users can now tune batch sizes via: augur config set-value Tasks default_batch_size 500 Refs: #3515 Signed-off-by: Shlok Gilda --- augur/application/config.py | 5 +++- augur/application/db/lib.py | 25 +++++++++++++++++-- augur/tasks/github/events.py | 5 ++-- augur/tasks/github/facade_github/tasks.py | 5 ++-- augur/tasks/github/issues.py | 5 ++-- augur/tasks/github/messages.py | 5 ++-- .../pull_requests/commits_model/core.py | 5 ++-- .../github/pull_requests/files_model/core.py | 5 ++-- augur/tasks/github/pull_requests/tasks.py | 9 ++++--- 9 files changed, 50 insertions(+), 19 deletions(-) diff --git a/augur/application/config.py b/augur/application/config.py index 41a729020..d9eb12bc3 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -90,7 +90,10 @@ def redact_setting_value(section_name, setting_name, value): "core_collection_interval_days": 15, "secondary_collection_interval_days": 10, "facade_collection_interval_days": 10, - "ml_collection_interval_days": 40 + "ml_collection_interval_days": 40, + "default_batch_size": 1000, + "github_event_batch_size": 500, + "github_message_batch_size": 20 }, "Message_Insights": { "insight_days": 30, diff --git a/augur/application/db/lib.py b/augur/application/db/lib.py index 4f106b0a7..f1c27f92e 100644 --- a/augur/application/db/lib.py +++ b/augur/application/db/lib.py @@ -48,8 +48,29 @@ def get_value(section_name: str, setting_name: str) -> Optional[Any]: setting_dict = convert_type_of_value(setting_dict, logger) return setting_dict["value"] - - + + +def get_batch_size(task_type: str = None) -> int: + """Get batch size for a task, with fallback to default. + + Args: + task_type: Optional task type (e.g., "event", "message"). + If provided and a specific config exists for it, + that value is used. Otherwise falls back to default_batch_size. + + Returns: + Batch size integer (default: 1000) + """ + if task_type: + specific_key = f"github_{task_type}_batch_size" + value = get_value("Tasks", specific_key) + if value is not None: + return int(value) + + default_value = get_value("Tasks", "default_batch_size") + return int(default_value) if default_value is not None else 1000 + + def execute_sql(sql_text): engine = get_engine() diff --git a/augur/tasks/github/events.py b/augur/tasks/github/events.py index 94b823fc8..b7d301e88 100644 --- a/augur/tasks/github/events.py +++ b/augur/tasks/github/events.py @@ -14,11 +14,12 @@ from augur.tasks.github.util.util import get_owner_repo from augur.tasks.util.worker_util import remove_duplicate_dicts from augur.application.db.models import PullRequestEvent, IssueEvent, Contributor, Repo -from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts, get_issues_by_repo_id, get_pull_requests_by_repo_id, update_issue_closed_cntrbs_by_repo_id, get_session, get_engine, get_core_data_last_collected, batch_insert_contributors +from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts, get_issues_by_repo_id, get_pull_requests_by_repo_id, update_issue_closed_cntrbs_by_repo_id, get_session, get_engine, get_core_data_last_collected, batch_insert_contributors, get_batch_size # Batch size for processing events - smaller than issues/PRs due to higher processing overhead per event -EVENT_BATCH_SIZE = 500 +# Uses github_event_batch_size from config (default: 500) +EVENT_BATCH_SIZE = get_batch_size("event") platform_id = 1 diff --git a/augur/tasks/github/facade_github/tasks.py b/augur/tasks/github/facade_github/tasks.py index ca099a423..6103b3e8e 100644 --- a/augur/tasks/github/facade_github/tasks.py +++ b/augur/tasks/github/facade_github/tasks.py @@ -6,13 +6,14 @@ from augur.tasks.github.util.github_data_access import GithubDataAccess, UrlNotFoundException from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth from augur.tasks.github.facade_github.core import * -from augur.application.db.lib import execute_sql, get_contributor_aliases_by_email, get_unresolved_commit_emails_by_name, get_contributors_by_full_name, get_repo_by_repo_git, batch_insert_contributors +from augur.application.db.lib import execute_sql, get_contributor_aliases_by_email, get_unresolved_commit_emails_by_name, get_contributors_by_full_name, get_repo_by_repo_git, batch_insert_contributors, get_batch_size from augur.application.db.lib import get_session, execute_session_query from augur.tasks.git.util.facade_worker.facade_worker.facade00mainprogram import * # Batch size for facade contributor processing -FACADE_CONTRIBUTOR_BATCH_SIZE = 1000 +# Uses default_batch_size from config (default: 1000) +FACADE_CONTRIBUTOR_BATCH_SIZE = get_batch_size() def process_commit_metadata(logger, auth, contributorQueue, repo_id, platform_id): diff --git a/augur/tasks/github/issues.py b/augur/tasks/github/issues.py index adaef4d7b..09e1626d0 100644 --- a/augur/tasks/github/issues.py +++ b/augur/tasks/github/issues.py @@ -14,11 +14,12 @@ from augur.tasks.util.worker_util import remove_duplicate_dicts from augur.application.db.models import Issue, IssueLabel, IssueAssignee from augur.application.config import get_development_flag -from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts, get_core_data_last_collected, batch_insert_contributors +from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts, get_core_data_last_collected, batch_insert_contributors, get_batch_size # Batch size for processing issues - controls memory usage during collection -ISSUE_BATCH_SIZE = 1000 +# Uses default_batch_size from config (default: 1000) +ISSUE_BATCH_SIZE = get_batch_size() development = get_development_flag() diff --git a/augur/tasks/github/messages.py b/augur/tasks/github/messages.py index 1916cf3d1..1f05b0444 100644 --- a/augur/tasks/github/messages.py +++ b/augur/tasks/github/messages.py @@ -10,12 +10,13 @@ from augur.tasks.github.util.util import get_owner_repo from augur.application.db.models import PullRequest, Message, Issue, PullRequestMessageRef, IssueMessageRef, Contributor, Repo, CollectionStatus from augur.application.db import get_engine, get_session -from augur.application.db.lib import get_core_data_last_collected +from augur.application.db.lib import get_core_data_last_collected, get_batch_size from sqlalchemy.sql import text # Batch size for processing messages - smaller due to large text content per message -MESSAGE_BATCH_SIZE = 20 +# Uses github_message_batch_size from config (default: 20) +MESSAGE_BATCH_SIZE = get_batch_size("message") platform_id = 1 diff --git a/augur/tasks/github/pull_requests/commits_model/core.py b/augur/tasks/github/pull_requests/commits_model/core.py index 0f39190b9..58ddc1854 100644 --- a/augur/tasks/github/pull_requests/commits_model/core.py +++ b/augur/tasks/github/pull_requests/commits_model/core.py @@ -3,11 +3,12 @@ from augur.application.db.models import * from augur.tasks.github.util.util import get_owner_repo from augur.application.db.util import execute_session_query -from augur.application.db.lib import get_secondary_data_last_collected, get_updated_prs +from augur.application.db.lib import get_secondary_data_last_collected, get_updated_prs, get_batch_size # Batch size for PR commit collection -PR_COMMIT_BATCH_SIZE = 1000 +# Uses default_batch_size from config (default: 1000) +PR_COMMIT_BATCH_SIZE = get_batch_size() def pull_request_commits_model(repo_id,logger, augur_db, key_auth, full_collection=False): diff --git a/augur/tasks/github/pull_requests/files_model/core.py b/augur/tasks/github/pull_requests/files_model/core.py index 510025c9b..653880af8 100644 --- a/augur/tasks/github/pull_requests/files_model/core.py +++ b/augur/tasks/github/pull_requests/files_model/core.py @@ -3,11 +3,12 @@ from augur.application.db.models import * from augur.tasks.github.util.util import get_owner_repo from augur.application.db.util import execute_session_query -from augur.application.db.lib import get_secondary_data_last_collected, get_updated_prs +from augur.application.db.lib import get_secondary_data_last_collected, get_updated_prs, get_batch_size # Batch size for PR file collection -PR_FILE_BATCH_SIZE = 1000 +# Uses default_batch_size from config (default: 1000) +PR_FILE_BATCH_SIZE = get_batch_size() def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection=False): diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index 8322690c9..950c31fed 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -11,7 +11,7 @@ from augur.application.db.models import PullRequest, Message, PullRequestReview, PullRequestLabel, PullRequestReviewer, PullRequestMeta, PullRequestAssignee, PullRequestReviewMessageRef, Contributor, Repo from augur.tasks.github.util.github_task_session import GithubTaskManifest from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth -from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts, get_pull_request_reviews_by_repo_id, batch_insert_contributors +from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts, get_pull_request_reviews_by_repo_id, batch_insert_contributors, get_batch_size from augur.application.db.util import execute_session_query from ..messages import process_github_comment_contributors from augur.application.db.lib import get_secondary_data_last_collected, get_updated_prs, get_core_data_last_collected @@ -20,9 +20,10 @@ # Batch sizes for PR-related data collection -PR_BATCH_SIZE = 1000 -PR_REVIEW_COMMENT_BATCH_SIZE = 1000 -PR_REVIEW_BATCH_SIZE = 1000 +# All use default_batch_size from config (default: 1000) +PR_BATCH_SIZE = get_batch_size() +PR_REVIEW_COMMENT_BATCH_SIZE = get_batch_size() +PR_REVIEW_BATCH_SIZE = get_batch_size() platform_id = 1 From b7e7af548751016661accff589312d884d97a202 Mon Sep 17 00:00:00 2001 From: PredictiveManish Date: Fri, 9 Jan 2026 23:41:15 +0530 Subject: [PATCH 014/389] Replace print statements with logging in contributor_breadth_worker Signed-off-by: PredictiveManish --- .../contributor_breadth_worker.py | 292 +++++++++--------- augur/tasks/test.py | 22 +- 2 files changed, 157 insertions(+), 157 deletions(-) diff --git a/augur/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py b/augur/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py index 896ccd61d..8af0e54a1 100644 --- a/augur/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py +++ b/augur/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py @@ -1,146 +1,146 @@ -#SPDX-License-Identifier: MIT -import logging -import sqlalchemy as s -from datetime import datetime - -from augur.tasks.init.celery_app import celery_app as celery -from augur.tasks.github.util.github_data_access import GithubDataAccess, UrlNotFoundException -from augur.application.db.models import ContributorRepo -from augur.application.db.lib import bulk_insert_dicts -from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth - -### This worker scans all the platform users in Augur, and pulls their platform activity -### logs. Those are then used to analyze what repos each is working in (which will include repos not -### tracked in the Augur instance.) -### Logic: For each unique platform contributor, gather non duplicate events, using the GitHub "id" -### for the event API (GitLab coming!) - -@celery.task(bind=True) -def contributor_breadth_model(self) -> None: - - engine = self.app.engine - - logger = logging.getLogger(contributor_breadth_model.__name__) - - tool_source = 'Contributor Breadth Worker' - tool_version = '0.0.1' - data_source = 'GitHub API' - - key_auth = GithubRandomKeyAuth(logger) - - # This version of the query pulls contributors who have not had any data collected yet - # To the top of the list - cntrb_login_query = s.sql.text(""" - SELECT DISTINCT - gh_login, - cntrb_id - FROM - ( - SELECT DISTINCT - gh_login, - cntrb_id, - data_collection_date - FROM - ( - SELECT DISTINCT - contributors.gh_login, - contributors.cntrb_id, - contributor_repo.data_collection_date :: DATE - FROM - contributor_repo - RIGHT OUTER JOIN contributors ON contributors.cntrb_id = contributor_repo.cntrb_id - AND contributors.gh_login IS NOT NULL - ORDER BY - contributor_repo.data_collection_date :: DATE NULLS FIRST - ) A - ORDER BY - data_collection_date DESC NULLS FIRST - ) b - """) - - with engine.connect() as connection: - result = connection.execute(cntrb_login_query) - - current_cntrb_logins = [dict(row) for row in result.mappings()] - - cntrb_newest_events_query = s.sql.text(""" - SELECT c.gh_login, MAX(cr.created_at) as newest_event_date - FROM contributor_repo AS cr - JOIN contributors AS c ON cr.cntrb_id = c.cntrb_id - GROUP BY c.gh_login; - """) - - with engine.connect() as connection: - cntrb_newest_events_list = connection.execute(cntrb_newest_events_query) - - cntrb_newest_events_list = [dict(row) for row in cntrb_newest_events_list.mappings()] - - cntrb_newest_events_map = {} - for cntrb_event in cntrb_newest_events_list: - - gh_login = cntrb_event["gh_login"] - newest_event_date = cntrb_event["newest_event_date"] - - cntrb_newest_events_map[gh_login] = newest_event_date - - github_data_access = GithubDataAccess(key_auth, logger) - - index = 1 - total = len(current_cntrb_logins) - for cntrb in current_cntrb_logins: - - print(f"Processing cntrb {index} of {total}") - index += 1 - - repo_cntrb_url = f"https://api.github.com/users/{cntrb['gh_login']}/events" - - newest_event_in_db = datetime(1970, 1, 1) - if cntrb["gh_login"] in cntrb_newest_events_map: - newest_event_in_db = cntrb_newest_events_map[cntrb["gh_login"]] - - - cntrb_events = [] - try: - for event in github_data_access.paginate_resource(repo_cntrb_url): - - cntrb_events.append(event) - - event_age = datetime.strptime(event["created_at"], "%Y-%m-%dT%H:%M:%SZ") - if event_age < newest_event_in_db: - logger.info("Found cntrb events we already have...skipping the rest") - break - - if len(cntrb_events) == 0: - logger.info("There are no cntrb events, or new events for this user.\n") - continue - - except UrlNotFoundException as e: - logger.warning(e) - continue - - events = process_contributor_events(cntrb, cntrb_events, logger, tool_source, tool_version, data_source) - - logger.info(f"Inserting {len(events)} events") - natural_keys = ["event_id", "tool_version"] - bulk_insert_dicts(logger, events, ContributorRepo, natural_keys) - - -def process_contributor_events(cntrb, cntrb_events, logger, tool_source, tool_version, data_source): - - cntrb_repos_insert = [] - for event_id_api in cntrb_events: - - cntrb_repos_insert.append({ - "cntrb_id": cntrb['cntrb_id'], - "repo_git": event_id_api['repo']['url'], - "tool_source": tool_source, - "tool_version": tool_version, - "data_source": data_source, - "repo_name": event_id_api['repo']['name'], - "gh_repo_id": event_id_api['repo']['id'], - "cntrb_category": event_id_api['type'], - "event_id": int(event_id_api['id']), - "created_at": event_id_api['created_at'] - }) - - return cntrb_repos_insert +#SPDX-License-Identifier: MIT +import logging +import sqlalchemy as s +from datetime import datetime + +from augur.tasks.init.celery_app import celery_app as celery +from augur.tasks.github.util.github_data_access import GithubDataAccess, UrlNotFoundException +from augur.application.db.models import ContributorRepo +from augur.application.db.lib import bulk_insert_dicts +from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth + +### This worker scans all the platform users in Augur, and pulls their platform activity +### logs. Those are then used to analyze what repos each is working in (which will include repos not +### tracked in the Augur instance.) +### Logic: For each unique platform contributor, gather non duplicate events, using the GitHub "id" +### for the event API (GitLab coming!) + +@celery.task(bind=True) +def contributor_breadth_model(self) -> None: + + engine = self.app.engine + + logger = logging.getLogger(__name__) + + tool_source = 'Contributor Breadth Worker' + tool_version = '0.0.1' + data_source = 'GitHub API' + + key_auth = GithubRandomKeyAuth(logger) + + # This version of the query pulls contributors who have not had any data collected yet + # To the top of the list + cntrb_login_query = s.sql.text(""" + SELECT DISTINCT + gh_login, + cntrb_id + FROM + ( + SELECT DISTINCT + gh_login, + cntrb_id, + data_collection_date + FROM + ( + SELECT DISTINCT + contributors.gh_login, + contributors.cntrb_id, + contributor_repo.data_collection_date :: DATE + FROM + contributor_repo + RIGHT OUTER JOIN contributors ON contributors.cntrb_id = contributor_repo.cntrb_id + AND contributors.gh_login IS NOT NULL + ORDER BY + contributor_repo.data_collection_date :: DATE NULLS FIRST + ) A + ORDER BY + data_collection_date DESC NULLS FIRST + ) b + """) + + with engine.connect() as connection: + result = connection.execute(cntrb_login_query) + + current_cntrb_logins = [dict(row) for row in result.mappings()] + + cntrb_newest_events_query = s.sql.text(""" + SELECT c.gh_login, MAX(cr.created_at) as newest_event_date + FROM contributor_repo AS cr + JOIN contributors AS c ON cr.cntrb_id = c.cntrb_id + GROUP BY c.gh_login; + """) + + with engine.connect() as connection: + cntrb_newest_events_list = connection.execute(cntrb_newest_events_query) + + cntrb_newest_events_list = [dict(row) for row in cntrb_newest_events_list.mappings()] + + cntrb_newest_events_map = {} + for cntrb_event in cntrb_newest_events_list: + + gh_login = cntrb_event["gh_login"] + newest_event_date = cntrb_event["newest_event_date"] + + cntrb_newest_events_map[gh_login] = newest_event_date + + github_data_access = GithubDataAccess(key_auth, logger) + + index = 1 + total = len(current_cntrb_logins) + for cntrb in current_cntrb_logins: + + logger.info(f"Processing cntrb {index} of {total}") + index += 1 + + repo_cntrb_url = f"https://api.github.com/users/{cntrb['gh_login']}/events" + + newest_event_in_db = datetime(1970, 1, 1) + if cntrb["gh_login"] in cntrb_newest_events_map: + newest_event_in_db = cntrb_newest_events_map[cntrb["gh_login"]] + + + cntrb_events = [] + try: + for event in github_data_access.paginate_resource(repo_cntrb_url): + + cntrb_events.append(event) + + event_age = datetime.strptime(event["created_at"], "%Y-%m-%dT%H:%M:%SZ") + if event_age < newest_event_in_db: + logger.info("Found cntrb events we already have...skipping the rest") + break + + if len(cntrb_events) == 0: + logger.info("There are no cntrb events, or new events for this user.\n") + continue + + except UrlNotFoundException as e: + logger.warning(e) + continue + + events = process_contributor_events(cntrb, cntrb_events, logger, tool_source, tool_version, data_source) + + logger.info(f"Inserting {len(events)} events") + natural_keys = ["event_id", "tool_version"] + bulk_insert_dicts(logger, events, ContributorRepo, natural_keys) + + +def process_contributor_events(cntrb, cntrb_events, logger, tool_source, tool_version, data_source): + + cntrb_repos_insert = [] + for event_id_api in cntrb_events: + + cntrb_repos_insert.append({ + "cntrb_id": cntrb['cntrb_id'], + "repo_git": event_id_api['repo']['url'], + "tool_source": tool_source, + "tool_version": tool_version, + "data_source": data_source, + "repo_name": event_id_api['repo']['name'], + "gh_repo_id": event_id_api['repo']['id'], + "cntrb_category": event_id_api['type'], + "event_id": int(event_id_api['id']), + "created_at": event_id_api['created_at'] + }) + + return cntrb_repos_insert diff --git a/augur/tasks/test.py b/augur/tasks/test.py index efdacb77f..7858c25d8 100644 --- a/augur/tasks/test.py +++ b/augur/tasks/test.py @@ -1,11 +1,11 @@ -from augur.tasks.init.celery_app import celery_app as celery - -@celery.task() -def successful_task(): - pass - -@celery.task() -def failure_task(): - raise Exception("ERROR") - - +from augur.tasks.init.celery_app import celery_app as celery +from augur.tasks.data_analysis.contributor_breadth_worker import +@celery.task() +def successful_task(): + pass + +@celery.task() +def failure_task(): + raise Exception("ERROR") + + From e0a16471fc4202f98ab1afafb50bc5a4758b9ecc Mon Sep 17 00:00:00 2001 From: PredictiveManish Date: Fri, 9 Jan 2026 23:50:45 +0530 Subject: [PATCH 015/389] Removed unnecessary import Signed-off-by: PredictiveManish --- augur/tasks/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/test.py b/augur/tasks/test.py index 7858c25d8..53950c448 100644 --- a/augur/tasks/test.py +++ b/augur/tasks/test.py @@ -1,5 +1,5 @@ from augur.tasks.init.celery_app import celery_app as celery -from augur.tasks.data_analysis.contributor_breadth_worker import + @celery.task() def successful_task(): pass From bc5c30cd59b84e001df34e933dfe518c0274524b Mon Sep 17 00:00:00 2001 From: PredictiveManish Date: Sat, 10 Jan 2026 01:59:16 +0530 Subject: [PATCH 016/389] removing whole file update Signed-off-by: PredictiveManish --- .../contributor_breadth_worker.py | 292 +++++++++--------- 1 file changed, 146 insertions(+), 146 deletions(-) diff --git a/augur/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py b/augur/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py index 8af0e54a1..0244b2446 100644 --- a/augur/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py +++ b/augur/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py @@ -1,146 +1,146 @@ -#SPDX-License-Identifier: MIT -import logging -import sqlalchemy as s -from datetime import datetime - -from augur.tasks.init.celery_app import celery_app as celery -from augur.tasks.github.util.github_data_access import GithubDataAccess, UrlNotFoundException -from augur.application.db.models import ContributorRepo -from augur.application.db.lib import bulk_insert_dicts -from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth - -### This worker scans all the platform users in Augur, and pulls their platform activity -### logs. Those are then used to analyze what repos each is working in (which will include repos not -### tracked in the Augur instance.) -### Logic: For each unique platform contributor, gather non duplicate events, using the GitHub "id" -### for the event API (GitLab coming!) - -@celery.task(bind=True) -def contributor_breadth_model(self) -> None: - - engine = self.app.engine - - logger = logging.getLogger(__name__) - - tool_source = 'Contributor Breadth Worker' - tool_version = '0.0.1' - data_source = 'GitHub API' - - key_auth = GithubRandomKeyAuth(logger) - - # This version of the query pulls contributors who have not had any data collected yet - # To the top of the list - cntrb_login_query = s.sql.text(""" - SELECT DISTINCT - gh_login, - cntrb_id - FROM - ( - SELECT DISTINCT - gh_login, - cntrb_id, - data_collection_date - FROM - ( - SELECT DISTINCT - contributors.gh_login, - contributors.cntrb_id, - contributor_repo.data_collection_date :: DATE - FROM - contributor_repo - RIGHT OUTER JOIN contributors ON contributors.cntrb_id = contributor_repo.cntrb_id - AND contributors.gh_login IS NOT NULL - ORDER BY - contributor_repo.data_collection_date :: DATE NULLS FIRST - ) A - ORDER BY - data_collection_date DESC NULLS FIRST - ) b - """) - - with engine.connect() as connection: - result = connection.execute(cntrb_login_query) - - current_cntrb_logins = [dict(row) for row in result.mappings()] - - cntrb_newest_events_query = s.sql.text(""" - SELECT c.gh_login, MAX(cr.created_at) as newest_event_date - FROM contributor_repo AS cr - JOIN contributors AS c ON cr.cntrb_id = c.cntrb_id - GROUP BY c.gh_login; - """) - - with engine.connect() as connection: - cntrb_newest_events_list = connection.execute(cntrb_newest_events_query) - - cntrb_newest_events_list = [dict(row) for row in cntrb_newest_events_list.mappings()] - - cntrb_newest_events_map = {} - for cntrb_event in cntrb_newest_events_list: - - gh_login = cntrb_event["gh_login"] - newest_event_date = cntrb_event["newest_event_date"] - - cntrb_newest_events_map[gh_login] = newest_event_date - - github_data_access = GithubDataAccess(key_auth, logger) - - index = 1 - total = len(current_cntrb_logins) - for cntrb in current_cntrb_logins: - - logger.info(f"Processing cntrb {index} of {total}") - index += 1 - - repo_cntrb_url = f"https://api.github.com/users/{cntrb['gh_login']}/events" - - newest_event_in_db = datetime(1970, 1, 1) - if cntrb["gh_login"] in cntrb_newest_events_map: - newest_event_in_db = cntrb_newest_events_map[cntrb["gh_login"]] - - - cntrb_events = [] - try: - for event in github_data_access.paginate_resource(repo_cntrb_url): - - cntrb_events.append(event) - - event_age = datetime.strptime(event["created_at"], "%Y-%m-%dT%H:%M:%SZ") - if event_age < newest_event_in_db: - logger.info("Found cntrb events we already have...skipping the rest") - break - - if len(cntrb_events) == 0: - logger.info("There are no cntrb events, or new events for this user.\n") - continue - - except UrlNotFoundException as e: - logger.warning(e) - continue - - events = process_contributor_events(cntrb, cntrb_events, logger, tool_source, tool_version, data_source) - - logger.info(f"Inserting {len(events)} events") - natural_keys = ["event_id", "tool_version"] - bulk_insert_dicts(logger, events, ContributorRepo, natural_keys) - - -def process_contributor_events(cntrb, cntrb_events, logger, tool_source, tool_version, data_source): - - cntrb_repos_insert = [] - for event_id_api in cntrb_events: - - cntrb_repos_insert.append({ - "cntrb_id": cntrb['cntrb_id'], - "repo_git": event_id_api['repo']['url'], - "tool_source": tool_source, - "tool_version": tool_version, - "data_source": data_source, - "repo_name": event_id_api['repo']['name'], - "gh_repo_id": event_id_api['repo']['id'], - "cntrb_category": event_id_api['type'], - "event_id": int(event_id_api['id']), - "created_at": event_id_api['created_at'] - }) - - return cntrb_repos_insert +#SPDX-License-Identifier: MIT +import logging +import sqlalchemy as s +from datetime import datetime + +from augur.tasks.init.celery_app import celery_app as celery +from augur.tasks.github.util.github_data_access import GithubDataAccess, UrlNotFoundException +from augur.application.db.models import ContributorRepo +from augur.application.db.lib import bulk_insert_dicts +from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth + +### This worker scans all the platform users in Augur, and pulls their platform activity +### logs. Those are then used to analyze what repos each is working in (which will include repos not +### tracked in the Augur instance.) +### Logic: For each unique platform contributor, gather non duplicate events, using the GitHub "id" +### for the event API (GitLab coming!) + +@celery.task(bind=True) +def contributor_breadth_model(self) -> None: + + engine = self.app.engine + + logger = logging.getLogger(__name__) + + tool_source = 'Contributor Breadth Worker' + tool_version = '0.0.1' + data_source = 'GitHub API' + + key_auth = GithubRandomKeyAuth(logger) + + # This version of the query pulls contributors who have not had any data collected yet + # To the top of the list + cntrb_login_query = s.sql.text(""" + SELECT DISTINCT + gh_login, + cntrb_id + FROM + ( + SELECT DISTINCT + gh_login, + cntrb_id, + data_collection_date + FROM + ( + SELECT DISTINCT + contributors.gh_login, + contributors.cntrb_id, + contributor_repo.data_collection_date :: DATE + FROM + contributor_repo + RIGHT OUTER JOIN contributors ON contributors.cntrb_id = contributor_repo.cntrb_id + AND contributors.gh_login IS NOT NULL + ORDER BY + contributor_repo.data_collection_date :: DATE NULLS FIRST + ) A + ORDER BY + data_collection_date DESC NULLS FIRST + ) b + """) + + with engine.connect() as connection: + result = connection.execute(cntrb_login_query) + + current_cntrb_logins = [dict(row) for row in result.mappings()] + + cntrb_newest_events_query = s.sql.text(""" + SELECT c.gh_login, MAX(cr.created_at) as newest_event_date + FROM contributor_repo AS cr + JOIN contributors AS c ON cr.cntrb_id = c.cntrb_id + GROUP BY c.gh_login; + """) + + with engine.connect() as connection: + cntrb_newest_events_list = connection.execute(cntrb_newest_events_query) + + cntrb_newest_events_list = [dict(row) for row in cntrb_newest_events_list.mappings()] + + cntrb_newest_events_map = {} + for cntrb_event in cntrb_newest_events_list: + + gh_login = cntrb_event["gh_login"] + newest_event_date = cntrb_event["newest_event_date"] + + cntrb_newest_events_map[gh_login] = newest_event_date + + github_data_access = GithubDataAccess(key_auth, logger) + + index = 1 + total = len(current_cntrb_logins) + for cntrb in current_cntrb_logins: + + logger.info(f"Processing cntrb {index} of {total}") + index += 1 + + repo_cntrb_url = f"https://api.github.com/users/{cntrb['gh_login']}/events" + + newest_event_in_db = datetime(1970, 1, 1) + if cntrb["gh_login"] in cntrb_newest_events_map: + newest_event_in_db = cntrb_newest_events_map[cntrb["gh_login"]] + + + cntrb_events = [] + try: + for event in github_data_access.paginate_resource(repo_cntrb_url): + + cntrb_events.append(event) + + event_age = datetime.strptime(event["created_at"], "%Y-%m-%dT%H:%M:%SZ") + if event_age < newest_event_in_db: + logger.info("Found cntrb events we already have...skipping the rest") + break + + if len(cntrb_events) == 0: + logger.info("There are no cntrb events, or new events for this user.\n") + continue + + except UrlNotFoundException as e: + logger.warning(e) + continue + + events = process_contributor_events(cntrb, cntrb_events, logger, tool_source, tool_version, data_source) + + logger.info(f"Inserting {len(events)} events") + natural_keys = ["event_id", "tool_version"] + bulk_insert_dicts(logger, events, ContributorRepo, natural_keys) + + +def process_contributor_events(cntrb, cntrb_events, logger, tool_source, tool_version, data_source): + + cntrb_repos_insert = [] + for event_id_api in cntrb_events: + + cntrb_repos_insert.append({ + "cntrb_id": cntrb['cntrb_id'], + "repo_git": event_id_api['repo']['url'], + "tool_source": tool_source, + "tool_version": tool_version, + "data_source": data_source, + "repo_name": event_id_api['repo']['name'], + "gh_repo_id": event_id_api['repo']['id'], + "cntrb_category": event_id_api['type'], + "event_id": int(event_id_api['id']), + "created_at": event_id_api['created_at'] + }) + + return cntrb_repos_insert From 0e567d0d10e39cb30e5b389d7c7bc3c671935012 Mon Sep 17 00:00:00 2001 From: PredictiveManish Date: Sat, 10 Jan 2026 02:03:26 +0530 Subject: [PATCH 017/389] Removed unnecessary files Signed-off-by: PredictiveManish --- augur/tasks/test.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/augur/tasks/test.py b/augur/tasks/test.py index 53950c448..efdacb77f 100644 --- a/augur/tasks/test.py +++ b/augur/tasks/test.py @@ -1,11 +1,11 @@ -from augur.tasks.init.celery_app import celery_app as celery - -@celery.task() -def successful_task(): - pass - -@celery.task() -def failure_task(): - raise Exception("ERROR") - - +from augur.tasks.init.celery_app import celery_app as celery + +@celery.task() +def successful_task(): + pass + +@celery.task() +def failure_task(): + raise Exception("ERROR") + + From 235e332d336539a83ef7a9a2dc62fe54d62b5b0d Mon Sep 17 00:00:00 2001 From: PredictiveManish Date: Fri, 9 Jan 2026 23:41:15 +0530 Subject: [PATCH 018/389] Replace print statements with logging in contributor_breadth_worker Signed-off-by: PredictiveManish --- .../contributor_breadth_worker.py | 292 +++++++++--------- augur/tasks/test.py | 22 +- 2 files changed, 157 insertions(+), 157 deletions(-) diff --git a/augur/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py b/augur/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py index 896ccd61d..8af0e54a1 100644 --- a/augur/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py +++ b/augur/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py @@ -1,146 +1,146 @@ -#SPDX-License-Identifier: MIT -import logging -import sqlalchemy as s -from datetime import datetime - -from augur.tasks.init.celery_app import celery_app as celery -from augur.tasks.github.util.github_data_access import GithubDataAccess, UrlNotFoundException -from augur.application.db.models import ContributorRepo -from augur.application.db.lib import bulk_insert_dicts -from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth - -### This worker scans all the platform users in Augur, and pulls their platform activity -### logs. Those are then used to analyze what repos each is working in (which will include repos not -### tracked in the Augur instance.) -### Logic: For each unique platform contributor, gather non duplicate events, using the GitHub "id" -### for the event API (GitLab coming!) - -@celery.task(bind=True) -def contributor_breadth_model(self) -> None: - - engine = self.app.engine - - logger = logging.getLogger(contributor_breadth_model.__name__) - - tool_source = 'Contributor Breadth Worker' - tool_version = '0.0.1' - data_source = 'GitHub API' - - key_auth = GithubRandomKeyAuth(logger) - - # This version of the query pulls contributors who have not had any data collected yet - # To the top of the list - cntrb_login_query = s.sql.text(""" - SELECT DISTINCT - gh_login, - cntrb_id - FROM - ( - SELECT DISTINCT - gh_login, - cntrb_id, - data_collection_date - FROM - ( - SELECT DISTINCT - contributors.gh_login, - contributors.cntrb_id, - contributor_repo.data_collection_date :: DATE - FROM - contributor_repo - RIGHT OUTER JOIN contributors ON contributors.cntrb_id = contributor_repo.cntrb_id - AND contributors.gh_login IS NOT NULL - ORDER BY - contributor_repo.data_collection_date :: DATE NULLS FIRST - ) A - ORDER BY - data_collection_date DESC NULLS FIRST - ) b - """) - - with engine.connect() as connection: - result = connection.execute(cntrb_login_query) - - current_cntrb_logins = [dict(row) for row in result.mappings()] - - cntrb_newest_events_query = s.sql.text(""" - SELECT c.gh_login, MAX(cr.created_at) as newest_event_date - FROM contributor_repo AS cr - JOIN contributors AS c ON cr.cntrb_id = c.cntrb_id - GROUP BY c.gh_login; - """) - - with engine.connect() as connection: - cntrb_newest_events_list = connection.execute(cntrb_newest_events_query) - - cntrb_newest_events_list = [dict(row) for row in cntrb_newest_events_list.mappings()] - - cntrb_newest_events_map = {} - for cntrb_event in cntrb_newest_events_list: - - gh_login = cntrb_event["gh_login"] - newest_event_date = cntrb_event["newest_event_date"] - - cntrb_newest_events_map[gh_login] = newest_event_date - - github_data_access = GithubDataAccess(key_auth, logger) - - index = 1 - total = len(current_cntrb_logins) - for cntrb in current_cntrb_logins: - - print(f"Processing cntrb {index} of {total}") - index += 1 - - repo_cntrb_url = f"https://api.github.com/users/{cntrb['gh_login']}/events" - - newest_event_in_db = datetime(1970, 1, 1) - if cntrb["gh_login"] in cntrb_newest_events_map: - newest_event_in_db = cntrb_newest_events_map[cntrb["gh_login"]] - - - cntrb_events = [] - try: - for event in github_data_access.paginate_resource(repo_cntrb_url): - - cntrb_events.append(event) - - event_age = datetime.strptime(event["created_at"], "%Y-%m-%dT%H:%M:%SZ") - if event_age < newest_event_in_db: - logger.info("Found cntrb events we already have...skipping the rest") - break - - if len(cntrb_events) == 0: - logger.info("There are no cntrb events, or new events for this user.\n") - continue - - except UrlNotFoundException as e: - logger.warning(e) - continue - - events = process_contributor_events(cntrb, cntrb_events, logger, tool_source, tool_version, data_source) - - logger.info(f"Inserting {len(events)} events") - natural_keys = ["event_id", "tool_version"] - bulk_insert_dicts(logger, events, ContributorRepo, natural_keys) - - -def process_contributor_events(cntrb, cntrb_events, logger, tool_source, tool_version, data_source): - - cntrb_repos_insert = [] - for event_id_api in cntrb_events: - - cntrb_repos_insert.append({ - "cntrb_id": cntrb['cntrb_id'], - "repo_git": event_id_api['repo']['url'], - "tool_source": tool_source, - "tool_version": tool_version, - "data_source": data_source, - "repo_name": event_id_api['repo']['name'], - "gh_repo_id": event_id_api['repo']['id'], - "cntrb_category": event_id_api['type'], - "event_id": int(event_id_api['id']), - "created_at": event_id_api['created_at'] - }) - - return cntrb_repos_insert +#SPDX-License-Identifier: MIT +import logging +import sqlalchemy as s +from datetime import datetime + +from augur.tasks.init.celery_app import celery_app as celery +from augur.tasks.github.util.github_data_access import GithubDataAccess, UrlNotFoundException +from augur.application.db.models import ContributorRepo +from augur.application.db.lib import bulk_insert_dicts +from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth + +### This worker scans all the platform users in Augur, and pulls their platform activity +### logs. Those are then used to analyze what repos each is working in (which will include repos not +### tracked in the Augur instance.) +### Logic: For each unique platform contributor, gather non duplicate events, using the GitHub "id" +### for the event API (GitLab coming!) + +@celery.task(bind=True) +def contributor_breadth_model(self) -> None: + + engine = self.app.engine + + logger = logging.getLogger(__name__) + + tool_source = 'Contributor Breadth Worker' + tool_version = '0.0.1' + data_source = 'GitHub API' + + key_auth = GithubRandomKeyAuth(logger) + + # This version of the query pulls contributors who have not had any data collected yet + # To the top of the list + cntrb_login_query = s.sql.text(""" + SELECT DISTINCT + gh_login, + cntrb_id + FROM + ( + SELECT DISTINCT + gh_login, + cntrb_id, + data_collection_date + FROM + ( + SELECT DISTINCT + contributors.gh_login, + contributors.cntrb_id, + contributor_repo.data_collection_date :: DATE + FROM + contributor_repo + RIGHT OUTER JOIN contributors ON contributors.cntrb_id = contributor_repo.cntrb_id + AND contributors.gh_login IS NOT NULL + ORDER BY + contributor_repo.data_collection_date :: DATE NULLS FIRST + ) A + ORDER BY + data_collection_date DESC NULLS FIRST + ) b + """) + + with engine.connect() as connection: + result = connection.execute(cntrb_login_query) + + current_cntrb_logins = [dict(row) for row in result.mappings()] + + cntrb_newest_events_query = s.sql.text(""" + SELECT c.gh_login, MAX(cr.created_at) as newest_event_date + FROM contributor_repo AS cr + JOIN contributors AS c ON cr.cntrb_id = c.cntrb_id + GROUP BY c.gh_login; + """) + + with engine.connect() as connection: + cntrb_newest_events_list = connection.execute(cntrb_newest_events_query) + + cntrb_newest_events_list = [dict(row) for row in cntrb_newest_events_list.mappings()] + + cntrb_newest_events_map = {} + for cntrb_event in cntrb_newest_events_list: + + gh_login = cntrb_event["gh_login"] + newest_event_date = cntrb_event["newest_event_date"] + + cntrb_newest_events_map[gh_login] = newest_event_date + + github_data_access = GithubDataAccess(key_auth, logger) + + index = 1 + total = len(current_cntrb_logins) + for cntrb in current_cntrb_logins: + + logger.info(f"Processing cntrb {index} of {total}") + index += 1 + + repo_cntrb_url = f"https://api.github.com/users/{cntrb['gh_login']}/events" + + newest_event_in_db = datetime(1970, 1, 1) + if cntrb["gh_login"] in cntrb_newest_events_map: + newest_event_in_db = cntrb_newest_events_map[cntrb["gh_login"]] + + + cntrb_events = [] + try: + for event in github_data_access.paginate_resource(repo_cntrb_url): + + cntrb_events.append(event) + + event_age = datetime.strptime(event["created_at"], "%Y-%m-%dT%H:%M:%SZ") + if event_age < newest_event_in_db: + logger.info("Found cntrb events we already have...skipping the rest") + break + + if len(cntrb_events) == 0: + logger.info("There are no cntrb events, or new events for this user.\n") + continue + + except UrlNotFoundException as e: + logger.warning(e) + continue + + events = process_contributor_events(cntrb, cntrb_events, logger, tool_source, tool_version, data_source) + + logger.info(f"Inserting {len(events)} events") + natural_keys = ["event_id", "tool_version"] + bulk_insert_dicts(logger, events, ContributorRepo, natural_keys) + + +def process_contributor_events(cntrb, cntrb_events, logger, tool_source, tool_version, data_source): + + cntrb_repos_insert = [] + for event_id_api in cntrb_events: + + cntrb_repos_insert.append({ + "cntrb_id": cntrb['cntrb_id'], + "repo_git": event_id_api['repo']['url'], + "tool_source": tool_source, + "tool_version": tool_version, + "data_source": data_source, + "repo_name": event_id_api['repo']['name'], + "gh_repo_id": event_id_api['repo']['id'], + "cntrb_category": event_id_api['type'], + "event_id": int(event_id_api['id']), + "created_at": event_id_api['created_at'] + }) + + return cntrb_repos_insert diff --git a/augur/tasks/test.py b/augur/tasks/test.py index efdacb77f..7858c25d8 100644 --- a/augur/tasks/test.py +++ b/augur/tasks/test.py @@ -1,11 +1,11 @@ -from augur.tasks.init.celery_app import celery_app as celery - -@celery.task() -def successful_task(): - pass - -@celery.task() -def failure_task(): - raise Exception("ERROR") - - +from augur.tasks.init.celery_app import celery_app as celery +from augur.tasks.data_analysis.contributor_breadth_worker import +@celery.task() +def successful_task(): + pass + +@celery.task() +def failure_task(): + raise Exception("ERROR") + + From 76ef226aabc305ced8d7ef170e5f63573b2f23b1 Mon Sep 17 00:00:00 2001 From: PredictiveManish Date: Fri, 9 Jan 2026 23:50:45 +0530 Subject: [PATCH 019/389] Removed unnecessary import Signed-off-by: PredictiveManish --- augur/tasks/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/test.py b/augur/tasks/test.py index 7858c25d8..53950c448 100644 --- a/augur/tasks/test.py +++ b/augur/tasks/test.py @@ -1,5 +1,5 @@ from augur.tasks.init.celery_app import celery_app as celery -from augur.tasks.data_analysis.contributor_breadth_worker import + @celery.task() def successful_task(): pass From b618c385ef6111d52e951bde24cb69169e7d9fff Mon Sep 17 00:00:00 2001 From: PredictiveManish Date: Sat, 10 Jan 2026 01:59:16 +0530 Subject: [PATCH 020/389] removing whole file update Signed-off-by: PredictiveManish --- .../contributor_breadth_worker.py | 292 +++++++++--------- 1 file changed, 146 insertions(+), 146 deletions(-) diff --git a/augur/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py b/augur/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py index 8af0e54a1..0244b2446 100644 --- a/augur/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py +++ b/augur/tasks/data_analysis/contributor_breadth_worker/contributor_breadth_worker.py @@ -1,146 +1,146 @@ -#SPDX-License-Identifier: MIT -import logging -import sqlalchemy as s -from datetime import datetime - -from augur.tasks.init.celery_app import celery_app as celery -from augur.tasks.github.util.github_data_access import GithubDataAccess, UrlNotFoundException -from augur.application.db.models import ContributorRepo -from augur.application.db.lib import bulk_insert_dicts -from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth - -### This worker scans all the platform users in Augur, and pulls their platform activity -### logs. Those are then used to analyze what repos each is working in (which will include repos not -### tracked in the Augur instance.) -### Logic: For each unique platform contributor, gather non duplicate events, using the GitHub "id" -### for the event API (GitLab coming!) - -@celery.task(bind=True) -def contributor_breadth_model(self) -> None: - - engine = self.app.engine - - logger = logging.getLogger(__name__) - - tool_source = 'Contributor Breadth Worker' - tool_version = '0.0.1' - data_source = 'GitHub API' - - key_auth = GithubRandomKeyAuth(logger) - - # This version of the query pulls contributors who have not had any data collected yet - # To the top of the list - cntrb_login_query = s.sql.text(""" - SELECT DISTINCT - gh_login, - cntrb_id - FROM - ( - SELECT DISTINCT - gh_login, - cntrb_id, - data_collection_date - FROM - ( - SELECT DISTINCT - contributors.gh_login, - contributors.cntrb_id, - contributor_repo.data_collection_date :: DATE - FROM - contributor_repo - RIGHT OUTER JOIN contributors ON contributors.cntrb_id = contributor_repo.cntrb_id - AND contributors.gh_login IS NOT NULL - ORDER BY - contributor_repo.data_collection_date :: DATE NULLS FIRST - ) A - ORDER BY - data_collection_date DESC NULLS FIRST - ) b - """) - - with engine.connect() as connection: - result = connection.execute(cntrb_login_query) - - current_cntrb_logins = [dict(row) for row in result.mappings()] - - cntrb_newest_events_query = s.sql.text(""" - SELECT c.gh_login, MAX(cr.created_at) as newest_event_date - FROM contributor_repo AS cr - JOIN contributors AS c ON cr.cntrb_id = c.cntrb_id - GROUP BY c.gh_login; - """) - - with engine.connect() as connection: - cntrb_newest_events_list = connection.execute(cntrb_newest_events_query) - - cntrb_newest_events_list = [dict(row) for row in cntrb_newest_events_list.mappings()] - - cntrb_newest_events_map = {} - for cntrb_event in cntrb_newest_events_list: - - gh_login = cntrb_event["gh_login"] - newest_event_date = cntrb_event["newest_event_date"] - - cntrb_newest_events_map[gh_login] = newest_event_date - - github_data_access = GithubDataAccess(key_auth, logger) - - index = 1 - total = len(current_cntrb_logins) - for cntrb in current_cntrb_logins: - - logger.info(f"Processing cntrb {index} of {total}") - index += 1 - - repo_cntrb_url = f"https://api.github.com/users/{cntrb['gh_login']}/events" - - newest_event_in_db = datetime(1970, 1, 1) - if cntrb["gh_login"] in cntrb_newest_events_map: - newest_event_in_db = cntrb_newest_events_map[cntrb["gh_login"]] - - - cntrb_events = [] - try: - for event in github_data_access.paginate_resource(repo_cntrb_url): - - cntrb_events.append(event) - - event_age = datetime.strptime(event["created_at"], "%Y-%m-%dT%H:%M:%SZ") - if event_age < newest_event_in_db: - logger.info("Found cntrb events we already have...skipping the rest") - break - - if len(cntrb_events) == 0: - logger.info("There are no cntrb events, or new events for this user.\n") - continue - - except UrlNotFoundException as e: - logger.warning(e) - continue - - events = process_contributor_events(cntrb, cntrb_events, logger, tool_source, tool_version, data_source) - - logger.info(f"Inserting {len(events)} events") - natural_keys = ["event_id", "tool_version"] - bulk_insert_dicts(logger, events, ContributorRepo, natural_keys) - - -def process_contributor_events(cntrb, cntrb_events, logger, tool_source, tool_version, data_source): - - cntrb_repos_insert = [] - for event_id_api in cntrb_events: - - cntrb_repos_insert.append({ - "cntrb_id": cntrb['cntrb_id'], - "repo_git": event_id_api['repo']['url'], - "tool_source": tool_source, - "tool_version": tool_version, - "data_source": data_source, - "repo_name": event_id_api['repo']['name'], - "gh_repo_id": event_id_api['repo']['id'], - "cntrb_category": event_id_api['type'], - "event_id": int(event_id_api['id']), - "created_at": event_id_api['created_at'] - }) - - return cntrb_repos_insert +#SPDX-License-Identifier: MIT +import logging +import sqlalchemy as s +from datetime import datetime + +from augur.tasks.init.celery_app import celery_app as celery +from augur.tasks.github.util.github_data_access import GithubDataAccess, UrlNotFoundException +from augur.application.db.models import ContributorRepo +from augur.application.db.lib import bulk_insert_dicts +from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth + +### This worker scans all the platform users in Augur, and pulls their platform activity +### logs. Those are then used to analyze what repos each is working in (which will include repos not +### tracked in the Augur instance.) +### Logic: For each unique platform contributor, gather non duplicate events, using the GitHub "id" +### for the event API (GitLab coming!) + +@celery.task(bind=True) +def contributor_breadth_model(self) -> None: + + engine = self.app.engine + + logger = logging.getLogger(__name__) + + tool_source = 'Contributor Breadth Worker' + tool_version = '0.0.1' + data_source = 'GitHub API' + + key_auth = GithubRandomKeyAuth(logger) + + # This version of the query pulls contributors who have not had any data collected yet + # To the top of the list + cntrb_login_query = s.sql.text(""" + SELECT DISTINCT + gh_login, + cntrb_id + FROM + ( + SELECT DISTINCT + gh_login, + cntrb_id, + data_collection_date + FROM + ( + SELECT DISTINCT + contributors.gh_login, + contributors.cntrb_id, + contributor_repo.data_collection_date :: DATE + FROM + contributor_repo + RIGHT OUTER JOIN contributors ON contributors.cntrb_id = contributor_repo.cntrb_id + AND contributors.gh_login IS NOT NULL + ORDER BY + contributor_repo.data_collection_date :: DATE NULLS FIRST + ) A + ORDER BY + data_collection_date DESC NULLS FIRST + ) b + """) + + with engine.connect() as connection: + result = connection.execute(cntrb_login_query) + + current_cntrb_logins = [dict(row) for row in result.mappings()] + + cntrb_newest_events_query = s.sql.text(""" + SELECT c.gh_login, MAX(cr.created_at) as newest_event_date + FROM contributor_repo AS cr + JOIN contributors AS c ON cr.cntrb_id = c.cntrb_id + GROUP BY c.gh_login; + """) + + with engine.connect() as connection: + cntrb_newest_events_list = connection.execute(cntrb_newest_events_query) + + cntrb_newest_events_list = [dict(row) for row in cntrb_newest_events_list.mappings()] + + cntrb_newest_events_map = {} + for cntrb_event in cntrb_newest_events_list: + + gh_login = cntrb_event["gh_login"] + newest_event_date = cntrb_event["newest_event_date"] + + cntrb_newest_events_map[gh_login] = newest_event_date + + github_data_access = GithubDataAccess(key_auth, logger) + + index = 1 + total = len(current_cntrb_logins) + for cntrb in current_cntrb_logins: + + logger.info(f"Processing cntrb {index} of {total}") + index += 1 + + repo_cntrb_url = f"https://api.github.com/users/{cntrb['gh_login']}/events" + + newest_event_in_db = datetime(1970, 1, 1) + if cntrb["gh_login"] in cntrb_newest_events_map: + newest_event_in_db = cntrb_newest_events_map[cntrb["gh_login"]] + + + cntrb_events = [] + try: + for event in github_data_access.paginate_resource(repo_cntrb_url): + + cntrb_events.append(event) + + event_age = datetime.strptime(event["created_at"], "%Y-%m-%dT%H:%M:%SZ") + if event_age < newest_event_in_db: + logger.info("Found cntrb events we already have...skipping the rest") + break + + if len(cntrb_events) == 0: + logger.info("There are no cntrb events, or new events for this user.\n") + continue + + except UrlNotFoundException as e: + logger.warning(e) + continue + + events = process_contributor_events(cntrb, cntrb_events, logger, tool_source, tool_version, data_source) + + logger.info(f"Inserting {len(events)} events") + natural_keys = ["event_id", "tool_version"] + bulk_insert_dicts(logger, events, ContributorRepo, natural_keys) + + +def process_contributor_events(cntrb, cntrb_events, logger, tool_source, tool_version, data_source): + + cntrb_repos_insert = [] + for event_id_api in cntrb_events: + + cntrb_repos_insert.append({ + "cntrb_id": cntrb['cntrb_id'], + "repo_git": event_id_api['repo']['url'], + "tool_source": tool_source, + "tool_version": tool_version, + "data_source": data_source, + "repo_name": event_id_api['repo']['name'], + "gh_repo_id": event_id_api['repo']['id'], + "cntrb_category": event_id_api['type'], + "event_id": int(event_id_api['id']), + "created_at": event_id_api['created_at'] + }) + + return cntrb_repos_insert From 43c8635d6a3962944fe91ce3d7342773ef572bdf Mon Sep 17 00:00:00 2001 From: PredictiveManish Date: Sat, 10 Jan 2026 02:03:26 +0530 Subject: [PATCH 021/389] Removed unnecessary files Signed-off-by: PredictiveManish --- augur/tasks/test.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/augur/tasks/test.py b/augur/tasks/test.py index 53950c448..efdacb77f 100644 --- a/augur/tasks/test.py +++ b/augur/tasks/test.py @@ -1,11 +1,11 @@ -from augur.tasks.init.celery_app import celery_app as celery - -@celery.task() -def successful_task(): - pass - -@celery.task() -def failure_task(): - raise Exception("ERROR") - - +from augur.tasks.init.celery_app import celery_app as celery + +@celery.task() +def successful_task(): + pass + +@celery.task() +def failure_task(): + raise Exception("ERROR") + + From 61b992804cf0c3c67ecb87822972a7624beafeac Mon Sep 17 00:00:00 2001 From: Dhanesh Kolu Date: Sat, 10 Jan 2026 13:59:24 +0530 Subject: [PATCH 022/389] docs: clarify GitHub and GitLab token permissions for data collection Signed-off-by: Dhanesh Kolu --- .../getting-started/collecting-data.rst | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/docs/source/getting-started/collecting-data.rst b/docs/source/getting-started/collecting-data.rst index 7c75097fd..745415ccd 100644 --- a/docs/source/getting-started/collecting-data.rst +++ b/docs/source/getting-started/collecting-data.rst @@ -26,6 +26,49 @@ Now, here's a ton of brain-splitting detail about celery collection. There are 2 Since the default setup will work for most use cases, we'll first cover how to configure some specific data collection jobs and then briefly touch on the celery configuration options, after which we'll cover how to add repos and repo groups to the database. +Authentication and API Tokens +============================= + +Augur collects data from hosted source control platforms such as GitHub and GitLab using +their respective REST APIs. To avoid strict API rate limits and to enable access to +private repositories, Augur requires Personal Access Tokens (PATs) with appropriate +read-only permissions. + +GitHub Authentication +--------------------- + +Augur uses the GitHub REST API to collect repository metadata, issues, pull requests, +releases, and contributor information. + +A GitHub Personal Access Token (PAT) is required. The minimum recommended permissions are: + +- **Classic Personal Access Token (recommended)** + - ``repo`` — required for private repositories + - ``read:org`` — required when collecting data from repositories owned by an organization + - ``read:user`` — required for contributor and user metadata returned by the GitHub API + +For public repositories only, a token without ``repo`` scope may be sufficient, but API +rate limits will be significantly lower when requests are unauthenticated or made without a token. + +GitHub tokens should be treated as secrets and supplied to Augur using environment +variables or configuration options described during installation. + +GitLab Authentication +--------------------- + +Augur collects data from the GitLab API using a GitLab Personal Access Token. + +The token must include the following scopes: + +- ``read_api`` — required for accessing repository metadata, issues, and merge requests +- ``read_repository`` — required for repository and commit data + +These scopes apply to both GitLab.com and self-hosted GitLab instances. When using a +self-hosted GitLab deployment, ensure the API base URL is configured correctly. + +As with GitHub tokens, GitLab tokens should be stored securely and provided to Augur +through environment variables or configuration files. + Configuring Collection ---------------------- From e0526ff0d626656fb9c4d9bcc4a8f60a8722eb86 Mon Sep 17 00:00:00 2001 From: Dhanesh Kolu Date: Sun, 11 Jan 2026 14:11:27 +0530 Subject: [PATCH 023/389] docs: clarify GitHub token requirement for public repositories Signed-off-by: Dhanesh Kolu --- t | 101 ++++++ ...72 minor README cleanup and clarification" | 324 ++++++++++++++++++ 2 files changed, 425 insertions(+) create mode 100644 t create mode 100644 "\357\200\272 minor README cleanup and clarification" diff --git a/t b/t new file mode 100644 index 000000000..b59033adf --- /dev/null +++ b/t @@ -0,0 +1,101 @@ +39546e8ca (HEAD -> docs/readme-cleanup, origin/docs/readme-cleanup) docs: address review feedback in README +b3b0db73e docs: minor README cleanup and clarification +76b520db6 (origin/main, origin/HEAD, main) Merge pull request #3481 from PredictiveManish/Redis-update +f876f98ed Merge pull request #3484 from chaoss/readme-typo +49af484d2 update Code of Conduct link +d410b3b9c Fix broken 8knot link in README Signed-off-by: Kushagra Signed-off-by: Adrian Edwards +ffd250598 fix: Updated links for Redis Installation +629da3a8a Fix: Updated link of redis windows installation +b67cb6f85 Merge pull request #3344 from chaoss/toml-swap +1963924f2 Merge pull request #3412 from shlokgilda/fix/issue-3401-database-connection-leaks +28bc9d469 Merge pull request #3439 from shlokgilda/fix/pr-reviews-batched-processing +abec3f17f Merge pull request #3353 from chaoss/commit-message-toggle +6f9ebefd0 Merge branch 'main' into fix/pr-reviews-batched-processing +2e6308efa Merge pull request #3466 from shlokgilda/fix/issue-3455-releases-null-target +c0fc44036 Merge pull request #3424 from shlokgilda/fix/issue-3404-large-in-memory-lists +59bd91826 Fix null target check in get_release_inf function +1cab8cea7 remove import from within function +0f27edea9 tomli only needed on older python versions since its part of the standard lib since 3.11 +9553151e1 use built in tomllib instead +539536940 swap toml package in dependencies +29edd21a1 swap code to using tomli +fad9eadc4 Merge branch 'fix/issue-3404-large-in-memory-lists' of github.com:shlokgilda/augur into fix/issue-3404-large-in-memory-lists +f9052cbfe Pylint and other style fixes +b5eac7a48 fix: Optimize database cursor usage by fetching results immediately in insert_facade_contributors +0d068dc8c Update augur/tasks/github/issues.py +ab2fd7b72 Update augur/tasks/github/issues.py +44e6967d7 fix: Add batch processing to PR commits and files collection +7f502bd94 fix: Convert issues collection to generator pattern with batching +836544d9d fix: Process facade contributor results in batches +79fa27981 fix: Use list.clear() in facade tasks to reduce memory overhead +7dcc2822b Merge pull request #3420 from shlokgilda/fix/git-deadlock-issue +289038954 Merge branch 'main' into fix/pr-reviews-batched-processing +9214819bf Merge branch 'main' into fix/issue-3404-large-in-memory-lists +52e88f5b2 Merge branch 'main' into fix/git-deadlock-issue +34f5644e2 Merge pull request #3450 from chaoss/ntdn/consistent-sa-model-setup +c7c860a60 Merge branch 'main' into fix/pr-reviews-batched-processing +df51d587a Merge pull request #3438 from chaoss/null_string_fix +134facd36 Merge pull request #3436 from chaoss/ntdn/old_schemas +f2929f764 table_args formatting +1acb41bbd formatting: move table name and schema attributes up top for consistency +4d10fc13b Implement batched processing for pull request reviews and contributors +1a52bf505 Update augur/tasks/git/util/facade_worker/facade_worker/config.py +c39b9f290 (origin/null_string_fix) specify `pr_review_body` as a User generated content string field for cleaning +aac134ea9 remove three files that are entirely comments +1ccc8dd1e Pylint and other style fixes +7bf42a373 refactor subprocess.run calls in FacadeHelper to use common options +f8f06a259 refactor git command execution to use unified timeout handling across facade operation +6365814fd fix: Optimize database cursor usage by fetching results immediately in insert_facade_contributors +11019b796 Update augur/tasks/github/issues.py +a2c1b78a4 Update augur/tasks/github/issues.py +40f9fab2e fix: Add batch processing to PR commits and files collection +05165f108 fix: Convert issues collection to generator pattern with batching +19d0a9b37 fix: Process facade contributor results in batches +0d487afd9 fix: Use list.clear() in facade tasks to reduce memory overhead +7aefd1bef Fix deadlock issues by implementing timeout handling for git operations +f33054b13 refactor DEI and user CLI functions to use context managers for database sessions and improve error handling +3a97f18b2 Merge pull request #3375 from shlokgilda/feature/issue-3310-dynamic-csv-columns +f759e7060 Merge pull request #3386 from AdeebaNizam404/docs/move-contributors-to-CONTRIBUTORS-md +cc55864b0 Merge pull request #3416 from chaoss/augur-auto-migrations +134dc9078 Merge pull request #3339 from chaoss/unused +21a02b222 Update John's name +361dbf854 python-dotenv is not just a dev dependency +f41ed4e18 Merge pull request #3409 from PredictiveManish/docs-collection +31bd7f447 remove unused imports per reviewdog +0f7da8e33 use the public schema by default for the version table schema. +7b18880c5 include schemas/be schema-aware in offline version of migrations too +ec7793da5 update alembic +e286f8f53 remove search paths "It can make reflection “lose” schema names. Remove the connect listener that sets search_path while generating migrations." - gpt5 +28fb8397d replace file contents-based revision check with one that just looks at the filenames +0e54842c2 set up alembic to automatically determine the next version number +972303a4a remove date from migration filename format +8f7368f62 load from .env +d78f5dceb install python-dotenv +95b2b7835 connect up the url in another place to prevent errors about a missing config file +d704670fc Merge pull request #3408 from chaoss/migrate-topic-models-2 +a96942364 docs: move contributor lists to CONTRIBUTORS.md and update README for clarity +016ad79c2 Merge branch 'chaoss:main' into docs-collection +17963e42c Fix: collection_intervals into seconds +6a406fd92 (origin/migrate-topic-models-2) Create a migration to synchronize the topic model tables +6475b634c Merge pull request #3389 from chaoss/gunicorn-errors +74da53843 Merge pull request #3400 from shlokgilda/feature/issue-3392-too-many-clients +1c0247db7 Merge pull request #3405 from xiaoha-cloud/topic-modeling-schema-only +2f83a2e05 Add explicit Integer type to repo_id column +382e7b7f5 Add TopicModelEvent ORM model to augur_data.py +15b2dcc6e move RepoLoadController within the database session context +9fff7d8ad add unit tests for CSV processing utilities including validation and error handling +b42333f62 enhance type annotations and docstrings for CSV processing functions in cli +515adc1bf improve CSV processing error handling and logging in db commands +b2d061437 remove header rows from test CSV files for repo groups and repos +46e5b69fa add flexible column order support for CSV imports +6b48ab604 Merge pull request #3397 from xiaoha-cloud/topic-modeling-schema-only +5248b075e Merge pull request #3399 from shlokgilda/feature/issue-3398-fix-test-typos +aa67f9b42 chore: rely on SQLAlchemy TIMESTAMP type with timezone +c952f662a fix: Use timezone-aware timestamps for topic modeling schema +c64246264 (origin/gunicorn-errors) Detect docker environments and ensure gunicorn error logs end up in dockers log stream +a96e62ff7 fix incorrect path for worker persistence in pytest configuration +6702c35cf refactor: Remove payload index to match Augur conventions +d40e9acb9 fix typos in the tests folder. +d20c672e5 feat: Add Topic Modeling database schema tables +4732c8090 (tag: v0.91.0) updated the version in README.md + \ No newline at end of file diff --git "a/\357\200\272 minor README cleanup and clarification" "b/\357\200\272 minor README cleanup and clarification" new file mode 100644 index 000000000..74570f663 --- /dev/null +++ "b/\357\200\272 minor README cleanup and clarification" @@ -0,0 +1,324 @@ + + SSUUMMMMAARRYY OOFF LLEESSSS CCOOMMMMAANNDDSS + + Commands marked with * may be preceded by a number, _N. + Notes in parentheses indicate the behavior if _N is given. + A key preceded by a caret indicates the Ctrl key; thus ^K is ctrl-K. + + h H Display this help. + q :q Q :Q ZZ Exit. + --------------------------------------------------------------------------- + + MMOOVVIINNGG + + e ^E j ^N CR * Forward one line (or _N lines). + y ^Y k ^K ^P * Backward one line (or _N lines). + ESC-j * Forward one file line (or _N file lines). + ESC-k * Backward one file line (or _N file lines). + f ^F ^V SPACE * Forward one window (or _N lines). + b ^B ESC-v * Backward one window (or _N lines). + z * Forward one window (and set window to _N). + w * Backward one window (and set window to _N). + ESC-SPACE * Forward one window, but don't stop at end-of-file. + ESC-b * Backward one window, but don't stop at beginning-of-file. + d ^D * Forward one half-window (and set half-window to _N). + u ^U * Backward one half-window (and set half-window to _N). + ESC-) RightArrow * Right one half screen width (or _N positions). + ESC-( LeftArrow * Left one half screen width (or _N positions). + ESC-} ^RightArrow Right to last column displayed. + ESC-{ ^LeftArrow Left to first column. + F Forward forever; like "tail -f". + ESC-F Like F but stop when search pattern is found. + r ^R ^L Repaint screen. + R Repaint screen, discarding buffered input. + --------------------------------------------------- + Default "window" is the screen height. + Default "half-window" is half of the screen height. + --------------------------------------------------------------------------- + + SSEEAARRCCHHIINNGG + + /_p_a_t_t_e_r_n * Search forward for (_N-th) matching line. + ?_p_a_t_t_e_r_n * Search backward for (_N-th) matching line. + n * Repeat previous search (for _N-th occurrence). + N * Repeat previous search in reverse direction. + ESC-n * Repeat previous search, spanning files. + ESC-N * Repeat previous search, reverse dir. & spanning files. + ^O^N ^On * Search forward for (_N-th) OSC8 hyperlink. + ^O^P ^Op * Search backward for (_N-th) OSC8 hyperlink. + ^O^L ^Ol Jump to the currently selected OSC8 hyperlink. + ESC-u Undo (toggle) search highlighting. + ESC-U Clear search highlighting. + &_p_a_t_t_e_r_n * Display only matching lines. + --------------------------------------------------- + Search is case-sensitive unless changed with -i or -I. + A search pattern may begin with one or more of: + ^N or ! Search for NON-matching lines. + ^E or * Search multiple files (pass thru END OF FILE). + ^F or @ Start search at FIRST file (for /) or last file (for ?). + ^K Highlight matches, but don't move (KEEP position). + ^R Don't use REGULAR EXPRESSIONS. + ^S _n Search for match in _n-th parenthesized subpattern. + ^W WRAP search if no match found. + ^L Enter next character literally into pattern. + --------------------------------------------------------------------------- + + JJUUMMPPIINNGG + + g < ESC-< * Go to first line in file (or line _N). + G > ESC-> * Go to last line in file (or line _N). + p % * Go to beginning of file (or _N percent into file). + t * Go to the (_N-th) next tag. + T * Go to the (_N-th) previous tag. + { ( [ * Find close bracket } ) ]. + } ) ] * Find open bracket { ( [. + ESC-^F _<_c_1_> _<_c_2_> * Find close bracket _<_c_2_>. + ESC-^B _<_c_1_> _<_c_2_> * Find open bracket _<_c_1_>. + --------------------------------------------------- + Each "find close bracket" command goes forward to the close bracket + matching the (_N-th) open bracket in the top line. + Each "find open bracket" command goes backward to the open bracket + matching the (_N-th) close bracket in the bottom line. + + m_<_l_e_t_t_e_r_> Mark the current top line with . + M_<_l_e_t_t_e_r_> Mark the current bottom line with . + '_<_l_e_t_t_e_r_> Go to a previously marked position. + '' Go to the previous position. + ^X^X Same as '. + ESC-m_<_l_e_t_t_e_r_> Clear a mark. + --------------------------------------------------- + A mark is any upper-case or lower-case letter. + Certain marks are predefined: + ^ means beginning of the file + $ means end of the file + --------------------------------------------------------------------------- + + CCHHAANNGGIINNGG FFIILLEESS + + :e [_f_i_l_e] Examine a new file. + ^X^V Same as :e. + :n * Examine the (_N-th) next file from the command line. + :p * Examine the (_N-th) previous file from the command line. + :x * Examine the first (or _N-th) file from the command line. + ^O^O Open the currently selected OSC8 hyperlink. + :d Delete the current file from the command line list. + = ^G :f Print current file name. + --------------------------------------------------------------------------- + + MMIISSCCEELLLLAANNEEOOUUSS CCOOMMMMAANNDDSS + + -_<_f_l_a_g_> Toggle a command line option [see OPTIONS below]. + --_<_n_a_m_e_> Toggle a command line option, by name. + __<_f_l_a_g_> Display the setting of a command line option. + ___<_n_a_m_e_> Display the setting of an option, by name. + +_c_m_d Execute the less cmd each time a new file is examined. + + !_c_o_m_m_a_n_d Execute the shell command with $SHELL. + #_c_o_m_m_a_n_d Execute the shell command, expanded like a prompt. + |XX_c_o_m_m_a_n_d Pipe file between current pos & mark XX to shell command. + s _f_i_l_e Save input to a file. + v Edit the current file with $VISUAL or $EDITOR. + V Print version number of "less". + --------------------------------------------------------------------------- + + OOPPTTIIOONNSS + + Most options may be changed either on the command line, + or from within less by using the - or -- command. + Options may be given in one of two forms: either a single + character preceded by a -, or a name preceded by --. + + -? ........ --help + Display help (from command line). + -a ........ --search-skip-screen + Search skips current screen. + -A ........ --SEARCH-SKIP-SCREEN + Search starts just after target line. + -b [_N] .... --buffers=[_N] + Number of buffers. + -B ........ --auto-buffers + Don't automatically allocate buffers for pipes. + -c ........ --clear-screen + Repaint by clearing rather than scrolling. + -d ........ --dumb + Dumb terminal. + -D xx_c_o_l_o_r . --color=xx_c_o_l_o_r + Set screen colors. + -e -E .... --quit-at-eof --QUIT-AT-EOF + Quit at end of file. + -f ........ --force + Force open non-regular files. + -F ........ --quit-if-one-screen + Quit if entire file fits on first screen. + -g ........ --hilite-search + Highlight only last match for searches. + -G ........ --HILITE-SEARCH + Don't highlight any matches for searches. + -h [_N] .... --max-back-scroll=[_N] + Backward scroll limit. + -i ........ --ignore-case + Ignore case in searches that do not contain uppercase. + -I ........ --IGNORE-CASE + Ignore case in all searches. + -j [_N] .... --jump-target=[_N] + Screen position of target lines. + -J ........ --status-column + Display a status column at left edge of screen. + -k _f_i_l_e ... --lesskey-file=_f_i_l_e + Use a compiled lesskey file. + -K ........ --quit-on-intr + Exit less in response to ctrl-C. + -L ........ --no-lessopen + Ignore the LESSOPEN environment variable. + -m -M .... --long-prompt --LONG-PROMPT + Set prompt style. + -n ......... --line-numbers + Suppress line numbers in prompts and messages. + -N ......... --LINE-NUMBERS + Display line number at start of each line. + -o [_f_i_l_e] .. --log-file=[_f_i_l_e] + Copy to log file (standard input only). + -O [_f_i_l_e] .. --LOG-FILE=[_f_i_l_e] + Copy to log file (unconditionally overwrite). + -p _p_a_t_t_e_r_n . --pattern=[_p_a_t_t_e_r_n] + Start at pattern (from command line). + -P [_p_r_o_m_p_t] --prompt=[_p_r_o_m_p_t] + Define new prompt. + -q -Q .... --quiet --QUIET --silent --SILENT + Quiet the terminal bell. + -r -R .... --raw-control-chars --RAW-CONTROL-CHARS + Output "raw" control characters. + -s ........ --squeeze-blank-lines + Squeeze multiple blank lines. + -S ........ --chop-long-lines + Chop (truncate) long lines rather than wrapping. + -t _t_a_g .... --tag=[_t_a_g] + Find a tag. + -T [_t_a_g_s_f_i_l_e] --tag-file=[_t_a_g_s_f_i_l_e] + Use an alternate tags file. + -u -U .... --underline-special --UNDERLINE-SPECIAL + Change handling of backspaces, tabs and carriage returns. + -V ........ --version + Display the version number of "less". + -w ........ --hilite-unread + Highlight first new line after forward-screen. + -W ........ --HILITE-UNREAD + Highlight first new line after any forward movement. + -x [_N[,...]] --tabs=[_N[,...]] + Set tab stops. + -X ........ --no-init + Don't use termcap init/deinit strings. + -y [_N] .... --max-forw-scroll=[_N] + Forward scroll limit. + -z [_N] .... --window=[_N] + Set size of window. + -" [_c[_c]] . --quotes=[_c[_c]] + Set shell quote characters. + -~ ........ --tilde + Don't display tildes after end of file. + -# [_N] .... --shift=[_N] + Set horizontal scroll amount (0 = one half screen width). + + --exit-follow-on-close + Exit F command on a pipe when writer closes pipe. + --file-size + Automatically determine the size of the input file. + --follow-name + The F command changes files if the input file is renamed. + --form-feed + Stop scrolling when a form feed character is reached. + --header=[_L[,_C[,_N]]] + Use _L lines (starting at line _N) and _C columns as headers. + --incsearch + Search file as each pattern character is typed in. + --intr=[_C] + Use _C instead of ^X to interrupt a read. + --lesskey-context=_t_e_x_t + Use lesskey source file contents. + --lesskey-src=_f_i_l_e + Use a lesskey source file. + --line-num-width=[_N] + Set the width of the -N line number field to _N characters. + --match-shift=[_N] + Show at least _N characters to the left of a search match. + --modelines=[_N] + Read _N lines from the input file and look for vim modelines. + --mouse + Enable mouse input. + --no-edit-warn + Don't warn when using v command on a file opened via LESSOPEN. + --no-keypad + Don't send termcap keypad init/deinit strings. + --no-histdups + Remove duplicates from command history. + --no-number-headers + Don't give line numbers to header lines. + --no-paste + Ignore pasted input. + --no-search-header-lines + Searches do not include header lines. + --no-search-header-columns + Searches do not include header columns. + --no-search-headers + Searches do not include header lines or columns. + --no-vbell + Disable the terminal's visual bell. + --redraw-on-quit + Redraw final screen when quitting. + --rscroll=[_C] + Set the character used to mark truncated lines. + --save-marks + Retain marks across invocations of less. + --search-options=[EFKNRW-] + Set default options for every search. + --show-preproc-errors + Display a message if preprocessor exits with an error status. + --proc-backspace + Process backspaces for bold/underline. + --PROC-BACKSPACE + Treat backspaces as control characters. + --proc-return + Delete carriage returns before newline. + --PROC-RETURN + Treat carriage returns as control characters. + --proc-tab + Expand tabs to spaces. + --PROC-TAB + Treat tabs as control characters. + --status-col-width=[_N] + Set the width of the -J status column to _N characters. + --status-line + Highlight or color the entire line containing a mark. + --use-backslash + Subsequent options use backslash as escape char. + --use-color + Enables colored text. + --wheel-lines=[_N] + Each click of the mouse wheel moves _N lines. + --wordwrap + Wrap lines at spaces. + + + --------------------------------------------------------------------------- + + LLIINNEE EEDDIITTIINNGG + + These keys can be used to edit text being entered + on the "command line" at the bottom of the screen. + + RightArrow ..................... ESC-l ... Move cursor right one character. + LeftArrow ...................... ESC-h ... Move cursor left one character. + ctrl-RightArrow ESC-RightArrow ESC-w ... Move cursor right one word. + ctrl-LeftArrow ESC-LeftArrow ESC-b ... Move cursor left one word. + HOME ........................... ESC-0 ... Move cursor to start of line. + END ............................ ESC-$ ... Move cursor to end of line. + BACKSPACE ................................ Delete char to left of cursor. + DELETE ......................... ESC-x ... Delete char under cursor. + ctrl-BACKSPACE ESC-BACKSPACE ........... Delete word to left of cursor. + ctrl-DELETE .... ESC-DELETE .... ESC-X ... Delete word under cursor. + ctrl-U ......... ESC (MS-DOS only) ....... Delete entire line. + UpArrow ........................ ESC-k ... Retrieve previous command line. + DownArrow ...................... ESC-j ... Retrieve next command line. + TAB ...................................... Complete filename & cycle. + SHIFT-TAB ...................... ESC-TAB Complete filename & reverse cycle. + ctrl-L ................................... Complete filename, list all. From f11b4236449e29b5b5e8c6dcfde9c7ec11904f03 Mon Sep 17 00:00:00 2001 From: Dhanesh Kolu Date: Sun, 11 Jan 2026 14:16:35 +0530 Subject: [PATCH 024/389] docs: remove accidental files and clarify GitHub token requirement Signed-off-by: Dhanesh Kolu --- .../getting-started/collecting-data.rst | 4 +- t | 101 ------ ...72 minor README cleanup and clarification" | 324 ------------------ 3 files changed, 2 insertions(+), 427 deletions(-) delete mode 100644 t delete mode 100644 "\357\200\272 minor README cleanup and clarification" diff --git a/docs/source/getting-started/collecting-data.rst b/docs/source/getting-started/collecting-data.rst index 745415ccd..8fc4aff89 100644 --- a/docs/source/getting-started/collecting-data.rst +++ b/docs/source/getting-started/collecting-data.rst @@ -47,8 +47,8 @@ A GitHub Personal Access Token (PAT) is required. The minimum recommended permis - ``read:org`` — required when collecting data from repositories owned by an organization - ``read:user`` — required for contributor and user metadata returned by the GitHub API -For public repositories only, a token without ``repo`` scope may be sufficient, but API -rate limits will be significantly lower when requests are unauthenticated or made without a token. +For public repositories only, a token without ``repo`` scope may be sufficient, though a +GitHub Personal Access Token is still required for Augur to authenticate API requests. GitHub tokens should be treated as secrets and supplied to Augur using environment variables or configuration options described during installation. diff --git a/t b/t deleted file mode 100644 index b59033adf..000000000 --- a/t +++ /dev/null @@ -1,101 +0,0 @@ -39546e8ca (HEAD -> docs/readme-cleanup, origin/docs/readme-cleanup) docs: address review feedback in README -b3b0db73e docs: minor README cleanup and clarification -76b520db6 (origin/main, origin/HEAD, main) Merge pull request #3481 from PredictiveManish/Redis-update -f876f98ed Merge pull request #3484 from chaoss/readme-typo -49af484d2 update Code of Conduct link -d410b3b9c Fix broken 8knot link in README Signed-off-by: Kushagra Signed-off-by: Adrian Edwards -ffd250598 fix: Updated links for Redis Installation -629da3a8a Fix: Updated link of redis windows installation -b67cb6f85 Merge pull request #3344 from chaoss/toml-swap -1963924f2 Merge pull request #3412 from shlokgilda/fix/issue-3401-database-connection-leaks -28bc9d469 Merge pull request #3439 from shlokgilda/fix/pr-reviews-batched-processing -abec3f17f Merge pull request #3353 from chaoss/commit-message-toggle -6f9ebefd0 Merge branch 'main' into fix/pr-reviews-batched-processing -2e6308efa Merge pull request #3466 from shlokgilda/fix/issue-3455-releases-null-target -c0fc44036 Merge pull request #3424 from shlokgilda/fix/issue-3404-large-in-memory-lists -59bd91826 Fix null target check in get_release_inf function -1cab8cea7 remove import from within function -0f27edea9 tomli only needed on older python versions since its part of the standard lib since 3.11 -9553151e1 use built in tomllib instead -539536940 swap toml package in dependencies -29edd21a1 swap code to using tomli -fad9eadc4 Merge branch 'fix/issue-3404-large-in-memory-lists' of github.com:shlokgilda/augur into fix/issue-3404-large-in-memory-lists -f9052cbfe Pylint and other style fixes -b5eac7a48 fix: Optimize database cursor usage by fetching results immediately in insert_facade_contributors -0d068dc8c Update augur/tasks/github/issues.py -ab2fd7b72 Update augur/tasks/github/issues.py -44e6967d7 fix: Add batch processing to PR commits and files collection -7f502bd94 fix: Convert issues collection to generator pattern with batching -836544d9d fix: Process facade contributor results in batches -79fa27981 fix: Use list.clear() in facade tasks to reduce memory overhead -7dcc2822b Merge pull request #3420 from shlokgilda/fix/git-deadlock-issue -289038954 Merge branch 'main' into fix/pr-reviews-batched-processing -9214819bf Merge branch 'main' into fix/issue-3404-large-in-memory-lists -52e88f5b2 Merge branch 'main' into fix/git-deadlock-issue -34f5644e2 Merge pull request #3450 from chaoss/ntdn/consistent-sa-model-setup -c7c860a60 Merge branch 'main' into fix/pr-reviews-batched-processing -df51d587a Merge pull request #3438 from chaoss/null_string_fix -134facd36 Merge pull request #3436 from chaoss/ntdn/old_schemas -f2929f764 table_args formatting -1acb41bbd formatting: move table name and schema attributes up top for consistency -4d10fc13b Implement batched processing for pull request reviews and contributors -1a52bf505 Update augur/tasks/git/util/facade_worker/facade_worker/config.py -c39b9f290 (origin/null_string_fix) specify `pr_review_body` as a User generated content string field for cleaning -aac134ea9 remove three files that are entirely comments -1ccc8dd1e Pylint and other style fixes -7bf42a373 refactor subprocess.run calls in FacadeHelper to use common options -f8f06a259 refactor git command execution to use unified timeout handling across facade operation -6365814fd fix: Optimize database cursor usage by fetching results immediately in insert_facade_contributors -11019b796 Update augur/tasks/github/issues.py -a2c1b78a4 Update augur/tasks/github/issues.py -40f9fab2e fix: Add batch processing to PR commits and files collection -05165f108 fix: Convert issues collection to generator pattern with batching -19d0a9b37 fix: Process facade contributor results in batches -0d487afd9 fix: Use list.clear() in facade tasks to reduce memory overhead -7aefd1bef Fix deadlock issues by implementing timeout handling for git operations -f33054b13 refactor DEI and user CLI functions to use context managers for database sessions and improve error handling -3a97f18b2 Merge pull request #3375 from shlokgilda/feature/issue-3310-dynamic-csv-columns -f759e7060 Merge pull request #3386 from AdeebaNizam404/docs/move-contributors-to-CONTRIBUTORS-md -cc55864b0 Merge pull request #3416 from chaoss/augur-auto-migrations -134dc9078 Merge pull request #3339 from chaoss/unused -21a02b222 Update John's name -361dbf854 python-dotenv is not just a dev dependency -f41ed4e18 Merge pull request #3409 from PredictiveManish/docs-collection -31bd7f447 remove unused imports per reviewdog -0f7da8e33 use the public schema by default for the version table schema. -7b18880c5 include schemas/be schema-aware in offline version of migrations too -ec7793da5 update alembic -e286f8f53 remove search paths "It can make reflection “lose” schema names. Remove the connect listener that sets search_path while generating migrations." - gpt5 -28fb8397d replace file contents-based revision check with one that just looks at the filenames -0e54842c2 set up alembic to automatically determine the next version number -972303a4a remove date from migration filename format -8f7368f62 load from .env -d78f5dceb install python-dotenv -95b2b7835 connect up the url in another place to prevent errors about a missing config file -d704670fc Merge pull request #3408 from chaoss/migrate-topic-models-2 -a96942364 docs: move contributor lists to CONTRIBUTORS.md and update README for clarity -016ad79c2 Merge branch 'chaoss:main' into docs-collection -17963e42c Fix: collection_intervals into seconds -6a406fd92 (origin/migrate-topic-models-2) Create a migration to synchronize the topic model tables -6475b634c Merge pull request #3389 from chaoss/gunicorn-errors -74da53843 Merge pull request #3400 from shlokgilda/feature/issue-3392-too-many-clients -1c0247db7 Merge pull request #3405 from xiaoha-cloud/topic-modeling-schema-only -2f83a2e05 Add explicit Integer type to repo_id column -382e7b7f5 Add TopicModelEvent ORM model to augur_data.py -15b2dcc6e move RepoLoadController within the database session context -9fff7d8ad add unit tests for CSV processing utilities including validation and error handling -b42333f62 enhance type annotations and docstrings for CSV processing functions in cli -515adc1bf improve CSV processing error handling and logging in db commands -b2d061437 remove header rows from test CSV files for repo groups and repos -46e5b69fa add flexible column order support for CSV imports -6b48ab604 Merge pull request #3397 from xiaoha-cloud/topic-modeling-schema-only -5248b075e Merge pull request #3399 from shlokgilda/feature/issue-3398-fix-test-typos -aa67f9b42 chore: rely on SQLAlchemy TIMESTAMP type with timezone -c952f662a fix: Use timezone-aware timestamps for topic modeling schema -c64246264 (origin/gunicorn-errors) Detect docker environments and ensure gunicorn error logs end up in dockers log stream -a96e62ff7 fix incorrect path for worker persistence in pytest configuration -6702c35cf refactor: Remove payload index to match Augur conventions -d40e9acb9 fix typos in the tests folder. -d20c672e5 feat: Add Topic Modeling database schema tables -4732c8090 (tag: v0.91.0) updated the version in README.md - \ No newline at end of file diff --git "a/\357\200\272 minor README cleanup and clarification" "b/\357\200\272 minor README cleanup and clarification" deleted file mode 100644 index 74570f663..000000000 --- "a/\357\200\272 minor README cleanup and clarification" +++ /dev/null @@ -1,324 +0,0 @@ - - SSUUMMMMAARRYY OOFF LLEESSSS CCOOMMMMAANNDDSS - - Commands marked with * may be preceded by a number, _N. - Notes in parentheses indicate the behavior if _N is given. - A key preceded by a caret indicates the Ctrl key; thus ^K is ctrl-K. - - h H Display this help. - q :q Q :Q ZZ Exit. - --------------------------------------------------------------------------- - - MMOOVVIINNGG - - e ^E j ^N CR * Forward one line (or _N lines). - y ^Y k ^K ^P * Backward one line (or _N lines). - ESC-j * Forward one file line (or _N file lines). - ESC-k * Backward one file line (or _N file lines). - f ^F ^V SPACE * Forward one window (or _N lines). - b ^B ESC-v * Backward one window (or _N lines). - z * Forward one window (and set window to _N). - w * Backward one window (and set window to _N). - ESC-SPACE * Forward one window, but don't stop at end-of-file. - ESC-b * Backward one window, but don't stop at beginning-of-file. - d ^D * Forward one half-window (and set half-window to _N). - u ^U * Backward one half-window (and set half-window to _N). - ESC-) RightArrow * Right one half screen width (or _N positions). - ESC-( LeftArrow * Left one half screen width (or _N positions). - ESC-} ^RightArrow Right to last column displayed. - ESC-{ ^LeftArrow Left to first column. - F Forward forever; like "tail -f". - ESC-F Like F but stop when search pattern is found. - r ^R ^L Repaint screen. - R Repaint screen, discarding buffered input. - --------------------------------------------------- - Default "window" is the screen height. - Default "half-window" is half of the screen height. - --------------------------------------------------------------------------- - - SSEEAARRCCHHIINNGG - - /_p_a_t_t_e_r_n * Search forward for (_N-th) matching line. - ?_p_a_t_t_e_r_n * Search backward for (_N-th) matching line. - n * Repeat previous search (for _N-th occurrence). - N * Repeat previous search in reverse direction. - ESC-n * Repeat previous search, spanning files. - ESC-N * Repeat previous search, reverse dir. & spanning files. - ^O^N ^On * Search forward for (_N-th) OSC8 hyperlink. - ^O^P ^Op * Search backward for (_N-th) OSC8 hyperlink. - ^O^L ^Ol Jump to the currently selected OSC8 hyperlink. - ESC-u Undo (toggle) search highlighting. - ESC-U Clear search highlighting. - &_p_a_t_t_e_r_n * Display only matching lines. - --------------------------------------------------- - Search is case-sensitive unless changed with -i or -I. - A search pattern may begin with one or more of: - ^N or ! Search for NON-matching lines. - ^E or * Search multiple files (pass thru END OF FILE). - ^F or @ Start search at FIRST file (for /) or last file (for ?). - ^K Highlight matches, but don't move (KEEP position). - ^R Don't use REGULAR EXPRESSIONS. - ^S _n Search for match in _n-th parenthesized subpattern. - ^W WRAP search if no match found. - ^L Enter next character literally into pattern. - --------------------------------------------------------------------------- - - JJUUMMPPIINNGG - - g < ESC-< * Go to first line in file (or line _N). - G > ESC-> * Go to last line in file (or line _N). - p % * Go to beginning of file (or _N percent into file). - t * Go to the (_N-th) next tag. - T * Go to the (_N-th) previous tag. - { ( [ * Find close bracket } ) ]. - } ) ] * Find open bracket { ( [. - ESC-^F _<_c_1_> _<_c_2_> * Find close bracket _<_c_2_>. - ESC-^B _<_c_1_> _<_c_2_> * Find open bracket _<_c_1_>. - --------------------------------------------------- - Each "find close bracket" command goes forward to the close bracket - matching the (_N-th) open bracket in the top line. - Each "find open bracket" command goes backward to the open bracket - matching the (_N-th) close bracket in the bottom line. - - m_<_l_e_t_t_e_r_> Mark the current top line with . - M_<_l_e_t_t_e_r_> Mark the current bottom line with . - '_<_l_e_t_t_e_r_> Go to a previously marked position. - '' Go to the previous position. - ^X^X Same as '. - ESC-m_<_l_e_t_t_e_r_> Clear a mark. - --------------------------------------------------- - A mark is any upper-case or lower-case letter. - Certain marks are predefined: - ^ means beginning of the file - $ means end of the file - --------------------------------------------------------------------------- - - CCHHAANNGGIINNGG FFIILLEESS - - :e [_f_i_l_e] Examine a new file. - ^X^V Same as :e. - :n * Examine the (_N-th) next file from the command line. - :p * Examine the (_N-th) previous file from the command line. - :x * Examine the first (or _N-th) file from the command line. - ^O^O Open the currently selected OSC8 hyperlink. - :d Delete the current file from the command line list. - = ^G :f Print current file name. - --------------------------------------------------------------------------- - - MMIISSCCEELLLLAANNEEOOUUSS CCOOMMMMAANNDDSS - - -_<_f_l_a_g_> Toggle a command line option [see OPTIONS below]. - --_<_n_a_m_e_> Toggle a command line option, by name. - __<_f_l_a_g_> Display the setting of a command line option. - ___<_n_a_m_e_> Display the setting of an option, by name. - +_c_m_d Execute the less cmd each time a new file is examined. - - !_c_o_m_m_a_n_d Execute the shell command with $SHELL. - #_c_o_m_m_a_n_d Execute the shell command, expanded like a prompt. - |XX_c_o_m_m_a_n_d Pipe file between current pos & mark XX to shell command. - s _f_i_l_e Save input to a file. - v Edit the current file with $VISUAL or $EDITOR. - V Print version number of "less". - --------------------------------------------------------------------------- - - OOPPTTIIOONNSS - - Most options may be changed either on the command line, - or from within less by using the - or -- command. - Options may be given in one of two forms: either a single - character preceded by a -, or a name preceded by --. - - -? ........ --help - Display help (from command line). - -a ........ --search-skip-screen - Search skips current screen. - -A ........ --SEARCH-SKIP-SCREEN - Search starts just after target line. - -b [_N] .... --buffers=[_N] - Number of buffers. - -B ........ --auto-buffers - Don't automatically allocate buffers for pipes. - -c ........ --clear-screen - Repaint by clearing rather than scrolling. - -d ........ --dumb - Dumb terminal. - -D xx_c_o_l_o_r . --color=xx_c_o_l_o_r - Set screen colors. - -e -E .... --quit-at-eof --QUIT-AT-EOF - Quit at end of file. - -f ........ --force - Force open non-regular files. - -F ........ --quit-if-one-screen - Quit if entire file fits on first screen. - -g ........ --hilite-search - Highlight only last match for searches. - -G ........ --HILITE-SEARCH - Don't highlight any matches for searches. - -h [_N] .... --max-back-scroll=[_N] - Backward scroll limit. - -i ........ --ignore-case - Ignore case in searches that do not contain uppercase. - -I ........ --IGNORE-CASE - Ignore case in all searches. - -j [_N] .... --jump-target=[_N] - Screen position of target lines. - -J ........ --status-column - Display a status column at left edge of screen. - -k _f_i_l_e ... --lesskey-file=_f_i_l_e - Use a compiled lesskey file. - -K ........ --quit-on-intr - Exit less in response to ctrl-C. - -L ........ --no-lessopen - Ignore the LESSOPEN environment variable. - -m -M .... --long-prompt --LONG-PROMPT - Set prompt style. - -n ......... --line-numbers - Suppress line numbers in prompts and messages. - -N ......... --LINE-NUMBERS - Display line number at start of each line. - -o [_f_i_l_e] .. --log-file=[_f_i_l_e] - Copy to log file (standard input only). - -O [_f_i_l_e] .. --LOG-FILE=[_f_i_l_e] - Copy to log file (unconditionally overwrite). - -p _p_a_t_t_e_r_n . --pattern=[_p_a_t_t_e_r_n] - Start at pattern (from command line). - -P [_p_r_o_m_p_t] --prompt=[_p_r_o_m_p_t] - Define new prompt. - -q -Q .... --quiet --QUIET --silent --SILENT - Quiet the terminal bell. - -r -R .... --raw-control-chars --RAW-CONTROL-CHARS - Output "raw" control characters. - -s ........ --squeeze-blank-lines - Squeeze multiple blank lines. - -S ........ --chop-long-lines - Chop (truncate) long lines rather than wrapping. - -t _t_a_g .... --tag=[_t_a_g] - Find a tag. - -T [_t_a_g_s_f_i_l_e] --tag-file=[_t_a_g_s_f_i_l_e] - Use an alternate tags file. - -u -U .... --underline-special --UNDERLINE-SPECIAL - Change handling of backspaces, tabs and carriage returns. - -V ........ --version - Display the version number of "less". - -w ........ --hilite-unread - Highlight first new line after forward-screen. - -W ........ --HILITE-UNREAD - Highlight first new line after any forward movement. - -x [_N[,...]] --tabs=[_N[,...]] - Set tab stops. - -X ........ --no-init - Don't use termcap init/deinit strings. - -y [_N] .... --max-forw-scroll=[_N] - Forward scroll limit. - -z [_N] .... --window=[_N] - Set size of window. - -" [_c[_c]] . --quotes=[_c[_c]] - Set shell quote characters. - -~ ........ --tilde - Don't display tildes after end of file. - -# [_N] .... --shift=[_N] - Set horizontal scroll amount (0 = one half screen width). - - --exit-follow-on-close - Exit F command on a pipe when writer closes pipe. - --file-size - Automatically determine the size of the input file. - --follow-name - The F command changes files if the input file is renamed. - --form-feed - Stop scrolling when a form feed character is reached. - --header=[_L[,_C[,_N]]] - Use _L lines (starting at line _N) and _C columns as headers. - --incsearch - Search file as each pattern character is typed in. - --intr=[_C] - Use _C instead of ^X to interrupt a read. - --lesskey-context=_t_e_x_t - Use lesskey source file contents. - --lesskey-src=_f_i_l_e - Use a lesskey source file. - --line-num-width=[_N] - Set the width of the -N line number field to _N characters. - --match-shift=[_N] - Show at least _N characters to the left of a search match. - --modelines=[_N] - Read _N lines from the input file and look for vim modelines. - --mouse - Enable mouse input. - --no-edit-warn - Don't warn when using v command on a file opened via LESSOPEN. - --no-keypad - Don't send termcap keypad init/deinit strings. - --no-histdups - Remove duplicates from command history. - --no-number-headers - Don't give line numbers to header lines. - --no-paste - Ignore pasted input. - --no-search-header-lines - Searches do not include header lines. - --no-search-header-columns - Searches do not include header columns. - --no-search-headers - Searches do not include header lines or columns. - --no-vbell - Disable the terminal's visual bell. - --redraw-on-quit - Redraw final screen when quitting. - --rscroll=[_C] - Set the character used to mark truncated lines. - --save-marks - Retain marks across invocations of less. - --search-options=[EFKNRW-] - Set default options for every search. - --show-preproc-errors - Display a message if preprocessor exits with an error status. - --proc-backspace - Process backspaces for bold/underline. - --PROC-BACKSPACE - Treat backspaces as control characters. - --proc-return - Delete carriage returns before newline. - --PROC-RETURN - Treat carriage returns as control characters. - --proc-tab - Expand tabs to spaces. - --PROC-TAB - Treat tabs as control characters. - --status-col-width=[_N] - Set the width of the -J status column to _N characters. - --status-line - Highlight or color the entire line containing a mark. - --use-backslash - Subsequent options use backslash as escape char. - --use-color - Enables colored text. - --wheel-lines=[_N] - Each click of the mouse wheel moves _N lines. - --wordwrap - Wrap lines at spaces. - - - --------------------------------------------------------------------------- - - LLIINNEE EEDDIITTIINNGG - - These keys can be used to edit text being entered - on the "command line" at the bottom of the screen. - - RightArrow ..................... ESC-l ... Move cursor right one character. - LeftArrow ...................... ESC-h ... Move cursor left one character. - ctrl-RightArrow ESC-RightArrow ESC-w ... Move cursor right one word. - ctrl-LeftArrow ESC-LeftArrow ESC-b ... Move cursor left one word. - HOME ........................... ESC-0 ... Move cursor to start of line. - END ............................ ESC-$ ... Move cursor to end of line. - BACKSPACE ................................ Delete char to left of cursor. - DELETE ......................... ESC-x ... Delete char under cursor. - ctrl-BACKSPACE ESC-BACKSPACE ........... Delete word to left of cursor. - ctrl-DELETE .... ESC-DELETE .... ESC-X ... Delete word under cursor. - ctrl-U ......... ESC (MS-DOS only) ....... Delete entire line. - UpArrow ........................ ESC-k ... Retrieve previous command line. - DownArrow ...................... ESC-j ... Retrieve next command line. - TAB ...................................... Complete filename & cycle. - SHIFT-TAB ...................... ESC-TAB Complete filename & reverse cycle. - ctrl-L ................................... Complete filename, list all. From aeb02fe6ebb9ed78b74e395df6944681eccbaa2a Mon Sep 17 00:00:00 2001 From: Sukuna0007Abhi Date: Sun, 11 Jan 2026 12:07:26 +0000 Subject: [PATCH 025/389] Remove recursive flag from chmod in docker-compose Fixes #3306 Related to #3212 The recursive chmod was causing all files in cloned facade repositories to have their permissions changed to 777 on every restart, putting git repositories in a dirty state. Removing the -R flag ensures only the directory permissions are changed, not all files within them. Signed-off-by: Sukuna0007Abhi --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index f0ef41015..7f4f1fc70 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -87,7 +87,7 @@ services: user: 2345:2345 # Run as an arbitrary non-root user post_start: # Make sure the user has access to the volumes - - command: chmod -R 777 /facade /logs /config /cache + - command: chmod 777 /facade /logs /config /cache user: root # Flower is a UI that helps more easily monitor running tasks for celery workers. From 1971210012ffd05cf31442b26b18537918052297 Mon Sep 17 00:00:00 2001 From: iGufrankhan Date: Sun, 11 Jan 2026 19:30:03 +0000 Subject: [PATCH 026/389] Fix crash in /requests/report/wait by removing missing requestReports Signed-off-by: iGufrankhan --- augur/api/view/api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/augur/api/view/api.py b/augur/api/view/api.py index eee99c93c..be7996b29 100644 --- a/augur/api/view/api.py +++ b/augur/api/view/api.py @@ -1,6 +1,7 @@ import logging import re +from augur.api.view.init import report_requests from flask import flash, current_app, jsonify, redirect, request, url_for from flask_login import current_user, login_required @@ -234,5 +235,4 @@ def user_app_create(): """ @app.route('/requests/report/wait/') def wait_for_report_request(id): - requestReports(id) - return jsonify(report_requests[id]) + return jsonify(report_requests.get(id, {})) From 2f2bfb9eb300b0a8802b3c5b06d63c6c7d350d20 Mon Sep 17 00:00:00 2001 From: guptapratykshh Date: Sun, 11 Jan 2026 23:39:48 +0530 Subject: [PATCH 027/389] Add .gitattributes to force LF line endings on shell scripts Signed-off-by: guptapratykshh --- .gitattributes | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..efdba8764 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +* text=auto +*.sh text eol=lf From 79675848ce6d15aa9f25ff3b214d51411932cf11 Mon Sep 17 00:00:00 2001 From: 1steve78 Date: Sat, 10 Jan 2026 13:57:36 +0530 Subject: [PATCH 028/389] Remove obsolete explorer_libyear_all matview refresh Signed-off-by: 1steve78 --- scripts/control/refresh-matviews.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/control/refresh-matviews.sh b/scripts/control/refresh-matviews.sh index 1d1756031..444adb3a2 100644 --- a/scripts/control/refresh-matviews.sh +++ b/scripts/control/refresh-matviews.sh @@ -5,6 +5,5 @@ psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.api_get_all_repos_commits with data;' psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.augur_new_contributors with data;' psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_contributor_actions with data;' -psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_libyear_all with data;' psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_new_contributors with data;' -psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_entry_list with data;' \ No newline at end of file +psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_entry_list with data;' From 6d0cb25e7a0c372ba2b6d763039ea16ff17ae5a0 Mon Sep 17 00:00:00 2001 From: ANJAN672 Date: Sat, 10 Jan 2026 22:09:15 +0000 Subject: [PATCH 029/389] fix(docker): use custom database Dockerfile for schema initialization Signed-off-by: ANJAN672 --- docker-compose.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index f0ef41015..8f8869bb9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,7 +1,9 @@ #SPDX-License-Identifier: MIT services: augur-db: - image: postgres:16 + build: + context: . + dockerfile: ./docker/database/Dockerfile restart: unless-stopped environment: - "POSTGRES_DB=augur" From 7b638cc446ab0e237e205f18758b68bf202f3bfe Mon Sep 17 00:00:00 2001 From: ANJAN672 Date: Mon, 12 Jan 2026 15:07:45 +0000 Subject: [PATCH 030/389] chore: trigger CI re-run Signed-off-by: ANJAN672 From e888bb99f05f30439fef41f0fc72a464fc75c811 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Tue, 13 Jan 2026 11:01:51 -0500 Subject: [PATCH 031/389] add pylint disable for no-member warnings in KeyClient and KeyPublisher classes Signed-off-by: Shlok Gilda --- keyman/KeyClient.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/keyman/KeyClient.py b/keyman/KeyClient.py index 68249f386..8e337d36d 100644 --- a/keyman/KeyClient.py +++ b/keyman/KeyClient.py @@ -24,6 +24,7 @@ class KeyClient: Raises: ValueError: If platform is empty or None """ + # pylint: disable=no-member def __init__(self, platform: str, logger: Logger): self.id = getpid() @@ -191,7 +192,8 @@ class KeyPublisher: Typically used during Augur startup to load keys from database. """ - + # pylint: disable=no-member + def __init__(self) -> None: # Load channel names and IDs from the spec for channel in spec["channels"]: From cb8f8cbfa6f4b6e85cc4c7f86916de63d416fea6 Mon Sep 17 00:00:00 2001 From: Noaman-Akhtar Date: Wed, 14 Jan 2026 02:40:17 +0530 Subject: [PATCH 032/389] fix: cleanup TODO comments in frontend.py (fixes #3574) Signed-off-by: Noaman-Akhtar --- augur/tasks/frontend.py | 42 ----------------------------------------- 1 file changed, 42 deletions(-) diff --git a/augur/tasks/frontend.py b/augur/tasks/frontend.py index d1a391814..0b87fd80e 100644 --- a/augur/tasks/frontend.py +++ b/augur/tasks/frontend.py @@ -111,7 +111,6 @@ def add_gitlab_repos(user_id, group_name, repo_urls): if existing_repo.repo_group_id != repo_group_id: update_existing_repos_repo_group_id(session, existing_repo.repo_id, repo_group_id) - # TODO: add logic to update the existing records repo_group_id if it isn't equal to the existing record add_existing_repo_to_group(logger, session, group_id, existing_repo.repo_id) continue @@ -150,7 +149,6 @@ def get_org_repo_data(orgs, session): return repo_data -# TODO: Do we need to check if the repo already exists in the user group? def add_new_github_repos(repo_data, group_id, session, logger): # get data for repos to determine type, src id, and if they exist @@ -198,7 +196,6 @@ def divide_list_into_chunks(data, size): yield data[i:i + size] -# TODO: Make it only get like 100 at a time def get_github_repos_data(repo_data, session, logger): repo_urls = [x[0] for x in repo_data] @@ -364,42 +361,3 @@ def update_existing_repos_repo_group_id(session, repo_id, new_repo_group_id): # invalid_urls.append(url) # return valid_orgs, valid_repos, invalid_urls - - - - - -# TODO: Change to github specific -# @celery.task -# def add_repo(user_id, group_name, repo_url): - -# logger = logging.getLogger(add_org.__name__) - -# with GithubTaskSession(logger) as session: -# result = UserRepo.add_github_repo(session, repo_url, user_id, group_name) - -# print(repo_url, result) - - -# # TODO: Change to github specific -# @celery.task -# def add_org(user_id, group_name, org_url): - -# logger = logging.getLogger(add_org.__name__) - -# with GithubTaskSession(logger) as session: -# result = UserRepo.add_github_org_repos(session, org_url, user_id, group_name) - -# print(org_url, result) - - - - - - - - - - - - From 0d15490dcc12e92d51145434b1d91b118c3fcad0 Mon Sep 17 00:00:00 2001 From: guptapratykshh Date: Thu, 15 Jan 2026 21:43:43 +0530 Subject: [PATCH 033/389] Fix #3579: Replace subprocess.call() with check_call() in db.py to prevent silent failures Signed-off-by: guptapratykshh --- augur/application/cli/db.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/augur/application/cli/db.py b/augur/application/cli/db.py index 8d5408eae..d650f0f32 100644 --- a/augur/application/cli/db.py +++ b/augur/application/cli/db.py @@ -3,7 +3,7 @@ from os import environ, chmod, path, getenv import logging from sys import exit -from subprocess import call +from subprocess import check_call import random import string import click @@ -280,7 +280,7 @@ def print_db_version(): """ Get the version of the configured database """ - call(["alembic", "current"]) + check_call(["alembic", "current"]) @cli.command("upgrade-db-version") @@ -290,7 +290,7 @@ def upgrade_db_version(): """ Upgrade the configured database to the latest version """ - call(["alembic", "upgrade", "head"]) + check_call(["alembic", "upgrade", "head"]) @cli.command("check-for-upgrade") @@ -300,7 +300,7 @@ def check_for_upgrade(): """ Upgrade the configured database to the latest version """ - call(["alembic", "history", "-i"]) + check_call(["alembic", "history", "-i"]) @cli.command("create-schema") @@ -310,7 +310,7 @@ def create_schema(): """ Create schema in the configured database """ - call(["alembic", "upgrade", "head"]) + check_call(["alembic", "upgrade", "head"]) def generate_key(length): @@ -513,7 +513,7 @@ def run_psql_command_in_database(target_type, target): db_conn_string = f"postgresql+psycopg2://{db_config['user']}:{db_config['password']}@{db_config['host']}:{db_config['port']}/{db_config['database_name']}" engine = s.create_engine(db_conn_string) - call( + check_call( [ "psql", "-h", From 7c7642a98879d563d906ba9d5bc59f31c71da68f Mon Sep 17 00:00:00 2001 From: guptapratykshh Date: Fri, 16 Jan 2026 10:55:19 +0530 Subject: [PATCH 034/389] Fix docstring for check_for_upgrade() to accurately describe its purpose Signed-off-by: guptapratykshh --- augur/application/cli/db.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/application/cli/db.py b/augur/application/cli/db.py index d650f0f32..dcf1c5c6c 100644 --- a/augur/application/cli/db.py +++ b/augur/application/cli/db.py @@ -298,7 +298,7 @@ def upgrade_db_version(): @test_db_connection def check_for_upgrade(): """ - Upgrade the configured database to the latest version + Show available database migration history """ check_call(["alembic", "history", "-i"]) From a2b6a66810ff4339d553260ec44b89ee034c5040 Mon Sep 17 00:00:00 2001 From: Sukuna0007Abhi Date: Fri, 16 Jan 2026 11:33:48 +0000 Subject: [PATCH 035/389] Fix(messages): set per-message tool_source for issue/PR comments (fixes #2545) Signed-off-by: Sukuna0007Abhi --- augur/tasks/github/messages.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/augur/tasks/github/messages.py b/augur/tasks/github/messages.py index 812af0fad..e79cbaaea 100644 --- a/augur/tasks/github/messages.py +++ b/augur/tasks/github/messages.py @@ -136,7 +136,6 @@ def process_large_issue_and_pr_message_collection(repo_id, repo_git: str, logger def process_messages(messages, task_name, repo_id, logger, augur_db): - tool_source = "Pr comment task" tool_version = "2.0" data_source = "Github API" @@ -175,6 +174,12 @@ def process_messages(messages, task_name, repo_id, logger, augur_db): related_pr_or_issue_found = False + # determine whether this is an issue or PR message so we can set the correct tool_source in metadata + if is_issue_message(message["html_url"]): + tool_source = "Issue comment task" + else: + tool_source = "Pr comment task" + # this adds the cntrb_id to the message data # the returned contributor will be added to the contributors list later, if the related issue or pr are found # this logic is used so we don't insert a contributor when the related message isn't inserted From 21b12d70ec4d44b9cc5a0576fe5392eb92c00286 Mon Sep 17 00:00:00 2001 From: guptapratykshh Date: Sat, 17 Jan 2026 16:55:22 +0530 Subject: [PATCH 036/389] fix: filter NULL comment URLs in github message task to prevent crash Signed-off-by: guptapratykshh --- augur/tasks/github/messages.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/augur/tasks/github/messages.py b/augur/tasks/github/messages.py index 812af0fad..6a888be5e 100644 --- a/augur/tasks/github/messages.py +++ b/augur/tasks/github/messages.py @@ -93,21 +93,21 @@ def process_large_issue_and_pr_message_collection(repo_id, repo_git: str, logger if since: query = text(f""" - (select pr_comments_url from pull_requests WHERE repo_id={repo_id} AND pr_updated_at > timestamptz(timestamp '{since}') order by pr_created_at desc) + (select pr_comments_url from pull_requests WHERE repo_id={repo_id} AND pr_comments_url IS NOT NULL AND pr_updated_at > timestamptz(timestamp '{since}') order by pr_created_at desc) UNION - (select comments_url as comment_url from issues WHERE repo_id={repo_id} AND updated_at > timestamptz(timestamp '{since}') order by created_at desc); + (select comments_url as comment_url from issues WHERE repo_id={repo_id} AND comments_url IS NOT NULL AND updated_at > timestamptz(timestamp '{since}') order by created_at desc); """) else: query = text(f""" - (select pr_comments_url from pull_requests WHERE repo_id={repo_id} order by pr_created_at desc) + (select pr_comments_url from pull_requests WHERE repo_id={repo_id} AND pr_comments_url IS NOT NULL order by pr_created_at desc) UNION - (select comments_url as comment_url from issues WHERE repo_id={repo_id} order by created_at desc); + (select comments_url as comment_url from issues WHERE repo_id={repo_id} AND comments_url IS NOT NULL order by created_at desc); """) result = connection.execute(query).fetchall() - comment_urls = [x[0] for x in result] + comment_urls = [x[0] for x in result if x[0] is not None] github_data_access = GithubDataAccess(key_auth, logger) From 03894ce992a6db9eb83b34eff15ca57bc8b42eee Mon Sep 17 00:00:00 2001 From: atheendre130505 Date: Sun, 18 Jan 2026 18:30:41 +0530 Subject: [PATCH 037/389] [tasks] fix: undefined variable 's' in start_tasks.py The variable 's' (sqlalchemy) was undefined in start_tasks.py, causing a NameError in cleanup tasks. Imported 'sqlalchemy as s' to fix. Signed-off-by: atheendre130505 --- augur/tasks/start_tasks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index 91e05c6fc..8387e6746 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -4,6 +4,7 @@ #from celery.result import AsyncResult from celery import group, chain from sqlalchemy import and_,update +import sqlalchemy as s from augur.tasks.github import * From 0704486fd1a2befea1a53827a101cb8f08796298 Mon Sep 17 00:00:00 2001 From: atheendre130505 Date: Mon, 19 Jan 2026 09:57:36 +0530 Subject: [PATCH 038/389] [tasks] fix: skip dependencies with invalid/missing version info The process_libyear_dependency_metrics task was failing when encountering dependencies that couldn't be resolved on PyPI or NPM (e.g., the 'python' runtime requirement or versions like '2.9.0.0' not found in release history). This fix adds validation checks in get_deps_libyear_data: - Skips dependencies if the package cannot be found on the registry. - Skips dependencies if the current version requirement cannot be resolved. - Skips dependencies if release date information is missing for the current or latest version. These encountered issues are logged as warnings instead of causing task failures, ensuring the overall collection process remains robust even when some metadata is unavailable. Signed-off-by: atheendre130505 --- .../libyear_util/util.py | 88 ++++++++++--------- 1 file changed, 46 insertions(+), 42 deletions(-) diff --git a/augur/tasks/git/dependency_libyear_tasks/libyear_util/util.py b/augur/tasks/git/dependency_libyear_tasks/libyear_util/util.py index cf40b9f73..a372576e4 100644 --- a/augur/tasks/git/dependency_libyear_tasks/libyear_util/util.py +++ b/augur/tasks/git/dependency_libyear_tasks/libyear_util/util.py @@ -111,69 +111,73 @@ def get_deps_libyear_data(path, logger): #NOTE: Add new if for new package parser if dependency['package'] == 'PYPI': data = get_pypi_data(dependency['name']) + if not data: + logger.warning(f"Skipping dependency {dependency['name']} - could not find package on PYPI.") + continue + try: current_version = sort_dependency_requirement(dependency,data) except (KeyError, TypeError) as e: - logger.error(f"Could not get current version of dependency for path {path}.\n Dependency: {dependency}") - current_version = None + logger.warning(f"Skipping dependency {dependency['name']} - could not resolve current version requirement: {dependency['requirement']}") + continue + + if not current_version: + logger.warning(f"Skipping dependency {dependency['name']} - current version is null or unspecified.") + continue + try: latest_version = get_latest_version(data) - - except KeyError: - logger.error(f"Could not get current version of dependency for path {path}.\n Dependency: {dependency}") - latest_version = None + except (KeyError, TypeError): + logger.warning(f"Skipping dependency {dependency['name']} - could not get latest version from PYPI.") + continue + if not latest_version: + logger.warning(f"Skipping dependency {dependency['name']} - latest version is null.") + continue + try: - if latest_version: - latest_release_date = get_release_date(data, latest_version,logger) - else: - latest_release_date = None + latest_release_date = get_release_date(data, latest_version, logger) + current_release_date = get_release_date(data, current_version, logger) except KeyError: - logger.error(f"Could not get current date of dependency for path {path} with version {latest_version}.\n Dependency: {dependency}") - latest_release_date = None - - if current_version: - current_release_date = get_release_date(data, current_version,logger) + logger.warning(f"Skipping dependency {dependency['name']} - could not find release dates for version {current_version} or {latest_version}.") + continue + + if not current_release_date or not latest_release_date: + logger.warning(f"Skipping dependency {dependency['name']} - missing release date information.") + continue elif dependency['package'] == 'NPM': data = get_NPM_data(dependency['name']) + if not data: + logger.warning(f"Skipping dependency {dependency['name']} - could not find package on NPM.") + continue + current_version = get_npm_current_version(data, dependency['requirement']) + if not current_version: + logger.warning(f"Skipping dependency {dependency['name']} - could not resolve current version from requirement: {dependency['requirement']}") + continue + try: latest_version = get_npm_latest_version(data) except KeyError: - logger.error(f"Could not get latest version of dependency for path {path}.\n Dependency: {dependency}") + logger.warning(f"Skipping dependency {dependency['name']} - could not get latest version from NPM.") latest_version = None + if not latest_version: + continue + try: - if latest_version: - latest_release_date = get_npm_release_date(data, latest_version) - else: - latest_release_date = None + latest_release_date = get_npm_release_date(data, latest_version) + current_release_date = get_npm_release_date(data, current_version) except KeyError: - logger.error(f"Could not get latest version of dependency for path {path}.\n Dependency: {dependency}") - latest_release_date = None - - if current_version: - try: - current_release_date = get_npm_release_date(data, current_version) - except KeyError: - logger.error(f"Could not get latest version of dependency for path {path}.\n Dependency: {dependency}") - current_release_date = dateutil.parser.parse('1970-01-01 00:00:00') + logger.warning(f"Skipping dependency {dependency['name']} - missing release date info on NPM for {current_version}/{latest_version}") + continue + else: + # Unsupported package manager + continue - libyear = get_libyear(current_version, current_release_date, latest_version, latest_release_date) - if not latest_release_date: - latest_release_date = dateutil.parser.parse('1970-01-01 00:00:00') - libyear = -1 - - if not latest_version: - latest_version = 'unspecified' - - if not current_version: - current_version = latest_version - current_release_date = latest_release_date - if not dependency['requirement']: dependency['requirement'] = 'unspecified' @@ -183,4 +187,4 @@ def get_deps_libyear_data(path, logger): dependency['latest_release_date'] = latest_release_date dependency['libyear'] = libyear - return [d for d in dependencies if d] \ No newline at end of file + return [d for d in dependencies if 'libyear' in d] \ No newline at end of file From 5a94ef23f4d0aa2bf595826b234331eccfbfb9d7 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Tue, 20 Jan 2026 18:50:29 -0600 Subject: [PATCH 039/389] Update copyright year in README.md Signed-off-by: Sean P. Goggins --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0a0ab7dea..8b4ca071e 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ We strongly believe that much of what makes open source so great is the incredib ## License, Copyright, and Funding -Copyright © 2025 University of Missouri, Sean Goggins, and Derek Howard. +Copyright © 2026 University of Missouri, Sean Goggins, and Derek Howard. Augur is free software: you can redistribute it and/or modify it under the terms of the MIT License as published by the Open Source Initiative. See the [LICENSE](LICENSE) file for more details. From ec10899229b9c3200797d7cee10e91c48b340cbd Mon Sep 17 00:00:00 2001 From: nancywrites Date: Tue, 3 Feb 2026 17:42:30 +0100 Subject: [PATCH 040/389] docs: rewrite 'What is Augur?' section in index.rst for beginners Signed-off-by: nancywrites --- docs/source/index.rst | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 2307b272a..5b7e8dd92 100755 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -32,16 +32,40 @@ Augur Documentation What is Augur? ~~~~~~~~~~~~~~~~ -Augur is a software suite for collecting and measuring structured data about free and open-source software (FOSS) communities. +Augur is a software tool that helps you **collect and measure information about open-source software projects**. Open-source projects are software projects where anyone can see and contribute to the code. -Augur's main focus is to measure the overall health and sustainability of open source projects, as these types of projects are system critical for nearly every software organization or company. We do this by gathering data about project repositories and normalizing that into our data model to provide useful metrics about your project's health. For example, one of our metrics is Burstiness. Burstiness - how are short timeframes of intense activity, followed by a corresponding return to a typical pattern of activity, observed in a project? This can paint a picture of a project's focus and gain insight into the potential stability of a project and how its typical cycle of updates occurs. There are many more useful metrics, and you can find a full list of them `here `__. +The main goal of Augur is to **understand how healthy and sustainable a project is**. Healthy projects are easier to rely on, and they are important because many software organizations or companies depend on open-source software. +How Augur works +--------------- -Augur gathers trace data for a group of repositories, normalize it into our data model, and provide a variety of metrics about that data. +1. Augur **looks at the project’s repositories** (the place where the project’s code and files live). +2. It **collects data** about what is happening in those repositories. +3. It **organizes this data** into a standard format called a data model. +4. Then it **calculates metrics** that tell you about the project’s health. -This software is developed as part of the CHAOSS (Community Health Analytics Open Source Software) project. Many of our metrics are implementations of the metrics defined by our community. You can find more information about how to get involved on the `CHAOSS website `_. +Example of a metric: Burstiness +------------------------------- + +- **Burstiness** is **one of Augur’s metrics**. +- It shows periods when a project has **a lot of activity in a short time**, followed by periods when activity goes back to normal. +- This helps you see a project’s **focus, update patterns, and stability**. +- In other words, you can tell **how often big changes happen** and whether the project works in a steady, predictable way. + +Augur calculates **many other metrics**, which you can see in the full list `here `_. + +Who develops Augur +----------------- + +- Augur is developed as part of **CHAOSS** (Community Health Analytics Open Source Software). +- Many of Augur’s metrics come directly from the CHAOSS community. +- If you want to **get involved**, visit the `CHAOSS website `_. + +See it in action +---------------- + +- You can check out Augur live on the CHAOSS instance `here `_. -If you want to see augur in action, you can view CHAOSS's augur instance `here `_. Current maintainers From 508ce6b76d8922c18c77bf7c696699e8b783861c Mon Sep 17 00:00:00 2001 From: Nancy Nwankwo <63652512+nancywrites@users.noreply.github.com> Date: Wed, 4 Feb 2026 14:18:33 +0100 Subject: [PATCH 041/389] Update docs/source/index.rst Co-authored-by: Adrian Edwards <17362949+MoralCode@users.noreply.github.com> Signed-off-by: Nancy Nwankwo <63652512+nancywrites@users.noreply.github.com> --- docs/source/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 5b7e8dd92..eb04ec285 100755 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -57,7 +57,7 @@ Augur calculates **many other metrics**, which you can see in the full list `her Who develops Augur ----------------- -- Augur is developed as part of **CHAOSS** (Community Health Analytics Open Source Software). +- Augur is developed as part of **CHAOSS** (Community Health Analytics for Open Source Software). - Many of Augur’s metrics come directly from the CHAOSS community. - If you want to **get involved**, visit the `CHAOSS website `_. From adf098466548cfd0c9b4ce26bf58ebdb7c5c0a59 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 23 Oct 2025 10:36:20 -0400 Subject: [PATCH 042/389] remove deps that seem unused Signed-off-by: Adrian Edwards --- .gitignore | 7 --- pyproject.toml | 19 +----- uv.lock | 161 +------------------------------------------------ 3 files changed, 5 insertions(+), 182 deletions(-) diff --git a/.gitignore b/.gitignore index 93be721ef..6389abfcd 100644 --- a/.gitignore +++ b/.gitignore @@ -187,10 +187,3 @@ nohup.out # local db volume pgdata/ postgres-data/ - -# Generated files from github -.history/sendgrid.env -sendgrid.env -*sendgrid*.env -./sendgrid.env -sendgrid.env diff --git a/pyproject.toml b/pyproject.toml index 064b5e7bd..f8a7c3680 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,40 +21,30 @@ classifiers = [ dependencies = [ "alembic>=1.17.1", "Beaker==1.11.0", - "blinker==1.4", - "bokeh==2.0.2", "boto3==1.17.57", "bs4==0.0.1", "celery~=5.5", "click~=8.1", - "cloudpickle>=0.2.2", "coloredlogs==15.0", - "dask>=2021.6.2", "distributed>=2021.03.0", - "dnspython==2.6.1", "emoji==1.2.0", - "eventlet==0.35.2", "flask==2.0.2", "flask-cors==4.0.1", "flask-login==0.5.0", - "flask-wtf==1.0.0", "flask_graphql", "flower==2.0.1", - "fsspec>=0.6.0", "gensim>=4.2.0", "graphene", "graphene_sqlalchemy", "gunicorn==22.0.0", "h5py==3.10.0", "httpx==0.23.0", - "itsdangerous==2.0.1", "Jinja2~=3.0.3", "joblib==1.2.0", "keras>=2.15.0", "keras-preprocessing", "matplotlib>=3.5.1", "mdpdf==0.0.18", - "mistune", "nltk==3.6.6", "numpy==1.26.0", "pandas==1.5.3", @@ -62,24 +52,18 @@ dependencies = [ "protobuf<3.22", "psutil==5.8.0", "psycopg2-binary==2.9.9", - "pylint==2.15.5", "python-crfsuite>=0.9.8", "python-dotenv>=1.2.1", "pyYaml", "redis==4.3.3", - "requests==2.32.0", "scikit-image==0.19.1", "scikit-learn==1.5.0", + "requests~=2.32", "scipy>=1.10.0,<1.13.0", - "seaborn==0.11.1", - "selenium==3.141.0", - "sendgrid", - "six==1.15.0", "sklearn==0.0", "sklearn-crfsuite>=0.3.6", "slack==0.0.2", "SQLAlchemy==2.0.22", - "tabulate==0.8.9", "tenacity==8.3.0", "tensorflow==2.15.0", "textblob==0.15.3", @@ -104,6 +88,7 @@ dev = [ lint = [ "pylint", "mypy>=1.18.2", + "pylint==2.15.5", "types-requests>=2.31.0.6", "types-pyyaml>=6.0.12.20250915", "types-python-dateutil>=2.9.0.20251008", diff --git a/uv.lock b/uv.lock index daa844fd4..eef0b3df8 100644 --- a/uv.lock +++ b/uv.lock @@ -144,40 +144,30 @@ source = { editable = "." } dependencies = [ { name = "alembic" }, { name = "beaker" }, - { name = "blinker" }, - { name = "bokeh" }, { name = "boto3" }, { name = "bs4" }, { name = "celery" }, { name = "click" }, - { name = "cloudpickle" }, { name = "coloredlogs" }, - { name = "dask" }, { name = "distributed" }, - { name = "dnspython" }, { name = "emoji" }, - { name = "eventlet" }, { name = "flask" }, { name = "flask-cors" }, { name = "flask-graphql" }, { name = "flask-login" }, - { name = "flask-wtf" }, { name = "flower" }, - { name = "fsspec" }, { name = "gensim" }, { name = "graphene" }, { name = "graphene-sqlalchemy" }, { name = "gunicorn" }, { name = "h5py" }, { name = "httpx" }, - { name = "itsdangerous" }, { name = "jinja2" }, { name = "joblib" }, { name = "keras" }, { name = "keras-preprocessing" }, { name = "matplotlib" }, { name = "mdpdf" }, - { name = "mistune" }, { name = "nltk" }, { name = "numpy" }, { name = "pandas" }, @@ -185,7 +175,6 @@ dependencies = [ { name = "protobuf" }, { name = "psutil" }, { name = "psycopg2-binary" }, - { name = "pylint" }, { name = "python-crfsuite" }, { name = "python-dotenv" }, { name = "pyyaml" }, @@ -194,15 +183,10 @@ dependencies = [ { name = "scikit-image" }, { name = "scikit-learn" }, { name = "scipy" }, - { name = "seaborn" }, - { name = "selenium" }, - { name = "sendgrid" }, - { name = "six" }, { name = "sklearn" }, { name = "sklearn-crfsuite" }, { name = "slack" }, { name = "sqlalchemy" }, - { name = "tabulate" }, { name = "tenacity" }, { name = "tensorflow" }, { name = "textblob" }, @@ -269,40 +253,30 @@ test = [ requires-dist = [ { name = "alembic", specifier = ">=1.17.1" }, { name = "beaker", specifier = "==1.11.0" }, - { name = "blinker", specifier = "==1.4" }, - { name = "bokeh", specifier = "==2.0.2" }, { name = "boto3", specifier = "==1.17.57" }, { name = "bs4", specifier = "==0.0.1" }, { name = "celery", specifier = "~=5.5" }, { name = "click", specifier = "~=8.1" }, - { name = "cloudpickle", specifier = ">=0.2.2" }, { name = "coloredlogs", specifier = "==15.0" }, - { name = "dask", specifier = ">=2021.6.2" }, { name = "distributed", specifier = ">=2021.3.0" }, - { name = "dnspython", specifier = "==2.6.1" }, { name = "emoji", specifier = "==1.2.0" }, - { name = "eventlet", specifier = "==0.35.2" }, { name = "flask", specifier = "==2.0.2" }, { name = "flask-cors", specifier = "==4.0.1" }, { name = "flask-graphql" }, { name = "flask-login", specifier = "==0.5.0" }, - { name = "flask-wtf", specifier = "==1.0.0" }, { name = "flower", specifier = "==2.0.1" }, - { name = "fsspec", specifier = ">=0.6.0" }, { name = "gensim", specifier = ">=4.2.0" }, { name = "graphene" }, { name = "graphene-sqlalchemy" }, { name = "gunicorn", specifier = "==22.0.0" }, { name = "h5py", specifier = "==3.10.0" }, { name = "httpx", specifier = "==0.23.0" }, - { name = "itsdangerous", specifier = "==2.0.1" }, { name = "jinja2", specifier = "~=3.0.3" }, { name = "joblib", specifier = "==1.2.0" }, { name = "keras", specifier = ">=2.15.0" }, { name = "keras-preprocessing" }, { name = "matplotlib", specifier = ">=3.5.1" }, { name = "mdpdf", specifier = "==0.0.18" }, - { name = "mistune" }, { name = "nltk", specifier = "==3.6.6" }, { name = "numpy", specifier = "==1.26.0" }, { name = "pandas", specifier = "==1.5.3" }, @@ -310,24 +284,18 @@ requires-dist = [ { name = "protobuf", specifier = "<3.22" }, { name = "psutil", specifier = "==5.8.0" }, { name = "psycopg2-binary", specifier = "==2.9.9" }, - { name = "pylint", specifier = "==2.15.5" }, { name = "python-crfsuite", specifier = ">=0.9.8" }, { name = "python-dotenv", specifier = ">=1.2.1" }, { name = "pyyaml" }, { name = "redis", specifier = "==4.3.3" }, - { name = "requests", specifier = "==2.32.0" }, + { name = "requests", specifier = "~=2.32" }, { name = "scikit-image", specifier = "==0.19.1" }, { name = "scikit-learn", specifier = "==1.5.0" }, { name = "scipy", specifier = ">=1.10.0,<1.13.0" }, - { name = "seaborn", specifier = "==0.11.1" }, - { name = "selenium", specifier = "==3.141.0" }, - { name = "sendgrid" }, - { name = "six", specifier = "==1.15.0" }, { name = "sklearn", specifier = "==0.0" }, { name = "sklearn-crfsuite", specifier = ">=0.3.6" }, { name = "slack", specifier = "==0.0.2" }, { name = "sqlalchemy", specifier = "==2.0.22" }, - { name = "tabulate", specifier = "==0.8.9" }, { name = "tenacity", specifier = "==8.3.0" }, { name = "tensorflow", specifier = "==2.15.0" }, { name = "textblob", specifier = "==0.15.3" }, @@ -349,6 +317,7 @@ dev = [ { name = "ipdb", specifier = "==0.13.9" }, { name = "mypy", specifier = ">=1.18.2" }, { name = "pylint" }, + { name = "pylint", specifier = "==2.15.5" }, { name = "pytest" }, { name = "setuptools" }, { name = "sphinx", specifier = "==7.2.6" }, @@ -373,6 +342,7 @@ docs = [ lint = [ { name = "mypy", specifier = ">=1.18.2" }, { name = "pylint" }, + { name = "pylint", specifier = "==2.15.5" }, { name = "types-python-dateutil", specifier = ">=2.9.0.20251008" }, { name = "types-pyyaml", specifier = ">=6.0.12.20250915" }, { name = "types-requests", specifier = ">=2.31.0.6" }, @@ -421,28 +391,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a6/80/ef8dff49aae0e4430f81842f7403e14e0ca59db7bbaf7af41245b67c6b25/billiard-4.2.2-py3-none-any.whl", hash = "sha256:4bc05dcf0d1cc6addef470723aac2a6232f3c7ed7475b0b580473a9145829457", size = 86896, upload-time = "2025-09-20T14:44:39.157Z" }, ] -[[package]] -name = "blinker" -version = "1.4" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1b/51/e2a9f3b757eb802f61dc1f2b09c8c99f6eb01cf06416c0671253536517b6/blinker-1.4.tar.gz", hash = "sha256:471aee25f3992bd325afa3772f1063dbdbbca947a041b8b89466dc00d606f8b6", size = 111476, upload-time = "2015-07-23T12:26:37.745Z" } - -[[package]] -name = "bokeh" -version = "2.0.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "jinja2" }, - { name = "numpy" }, - { name = "packaging" }, - { name = "pillow" }, - { name = "python-dateutil" }, - { name = "pyyaml" }, - { name = "tornado" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/30/cf/9bef843880084b646cf5e6988e8f0aa081dccdf09f1617cfc6755f9a3353/bokeh-2.0.2.tar.gz", hash = "sha256:d9248bdb0156797abf6d04b5eac581dcb121f5d1db7acbc13282b0609314893a", size = 8636228, upload-time = "2020-04-22T15:42:52.232Z" } - [[package]] name = "boto3" version = "1.17.57" @@ -860,15 +808,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5e/6b/4a5dc8bf17a2714f53f648b6f44f1cd2ad7ab41aaaffe1c25489947c24f6/distributed-2023.10.1-py3-none-any.whl", hash = "sha256:0e0fe280d3b7b8be45840df3697dcb07d954c9c21c2a31d0c8e2dbe60bdaef21", size = 1002230, upload-time = "2023-10-27T22:14:59.23Z" }, ] -[[package]] -name = "dnspython" -version = "2.6.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/37/7d/c871f55054e403fdfd6b8f65fd6d1c4e147ed100d3e9f9ba1fe695403939/dnspython-2.6.1.tar.gz", hash = "sha256:e8f0f9c23a7b7cb99ded64e6c3a6f3e701d78f50c55e002b839dea7225cff7cc", size = 332727, upload-time = "2024-02-18T18:48:48.952Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/87/a1/8c5287991ddb8d3e4662f71356d9656d91ab3a36618c3dd11b280df0d255/dnspython-2.6.1-py3-none-any.whl", hash = "sha256:5ef3b9680161f6fa89daf8ad451b5f1a33b18ae8a1c6778cdf4b43f08c0a6e50", size = 307696, upload-time = "2024-02-18T18:48:46.786Z" }, -] - [[package]] name = "docutils" version = "0.20.1" @@ -887,19 +826,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/24/fa/b3368f41b95a286f8d300e323449ab4e86b85334c2e0b477e94422b8ed0f/emoji-1.2.0-py3-none-any.whl", hash = "sha256:6b19b65da8d6f30551eead1705539cc0eadcd9e33a6ecbc421a29b87f96287eb", size = 131318, upload-time = "2021-01-27T15:21:11.762Z" }, ] -[[package]] -name = "eventlet" -version = "0.35.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "dnspython" }, - { name = "greenlet" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/5e/a1/079895f493a7c7eef5d1fb1335aba96e05527fd22dc6cead98ff38acdd3a/eventlet-0.35.2.tar.gz", hash = "sha256:8d1263e20b7f816a046ac60e1d272f9e5bc503f7a34d9adc789f8a85b14fa57d", size = 548860, upload-time = "2024-02-21T17:05:17.647Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a6/5e/ea38bad6b685b0fde055b725e0a50613bfeecd572ec7606fa9449404f89a/eventlet-0.35.2-py3-none-any.whl", hash = "sha256:8fc1ee60d583f1dd58d6f304bb95fd46d34865ab22f57cb99008a81d61d573db", size = 359811, upload-time = "2024-02-21T17:05:15.13Z" }, -] - [[package]] name = "exceptiongroup" version = "1.3.0" @@ -996,20 +922,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2b/83/ac5bf3279f969704fc1e63f050c50e10985e50fd340e6069ec7e09df5442/Flask_Login-0.5.0-py2.py3-none-any.whl", hash = "sha256:7451b5001e17837ba58945aead261ba425fdf7b4f0448777e597ddab39f4fba0", size = 16039, upload-time = "2020-02-09T16:55:42.35Z" }, ] -[[package]] -name = "flask-wtf" -version = "1.0.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "flask" }, - { name = "itsdangerous" }, - { name = "wtforms" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/06/bb/090df80acb1f51ee3996d822ba34096b3e54b3af76c2c46530d6cf35c9b9/Flask-WTF-1.0.0.tar.gz", hash = "sha256:872fbb17b5888bfc734edbdcf45bc08fb365ca39f69d25dc752465a455517b28", size = 45365, upload-time = "2021-11-07T15:35:41.915Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5d/0a/59ddcce06b857a874187f2bc50da3fba8956c1855c19a2cc99d1dacab42a/Flask_WTF-1.0.0-py3-none-any.whl", hash = "sha256:01feccfc395405cea48a3f36c23f0d766e2cc6fd2a5a065ad50ad3e5827ec797", size = 12118, upload-time = "2021-11-07T15:35:40.151Z" }, -] - [[package]] name = "flatbuffers" version = "25.2.10" @@ -2795,15 +2707,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload-time = "2025-10-26T15:12:09.109Z" }, ] -[[package]] -name = "python-http-client" -version = "3.3.7" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/56/fa/284e52a8c6dcbe25671f02d217bf2f85660db940088faf18ae7a05e97313/python_http_client-3.3.7.tar.gz", hash = "sha256:bf841ee45262747e00dec7ee9971dfb8c7d83083f5713596488d67739170cea0", size = 9377, upload-time = "2022-03-09T20:23:56.386Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/29/31/9b360138f4e4035ee9dac4fe1132b6437bd05751aaf1db2a2d83dc45db5f/python_http_client-3.3.7-py3-none-any.whl", hash = "sha256:ad371d2bbedc6ea15c26179c6222a78bc9308d272435ddf1d5c84f068f249a36", size = 8352, upload-time = "2022-03-09T20:23:54.862Z" }, -] - [[package]] name = "pytz" version = "2025.2" @@ -3270,46 +3173,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f3/31/91a2a3c5eb85d2bfa86d7c98f2df5d77dcdefb3d80ca9f9037ad04393acf/scipy-1.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:e646d8571804a304e1da01040d21577685ce8e2db08ac58e543eaca063453e1c", size = 45816713, upload-time = "2024-01-20T21:12:26.619Z" }, ] -[[package]] -name = "seaborn" -version = "0.11.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "matplotlib" }, - { name = "numpy" }, - { name = "pandas" }, - { name = "scipy" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/ef/f4/1927dc0e07f34d54617ce7d31e83b0e3345f14e893b138e44eddd5fad806/seaborn-0.11.1.tar.gz", hash = "sha256:44e78eaed937c5a87fc7a892c329a7cc091060b67ebd1d0d306b446a74ba01ad", size = 261397, upload-time = "2020-12-20T20:53:26.849Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/68/ad/6c2406ae175f59ec616714e408979b674fe27b9587f79d59a528ddfbcd5b/seaborn-0.11.1-py3-none-any.whl", hash = "sha256:4e1cce9489449a1c6ff3c567f2113cdb41122f727e27a984950d004a88ef3c5c", size = 285007, upload-time = "2020-12-20T20:53:25.248Z" }, -] - -[[package]] -name = "selenium" -version = "3.141.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "urllib3" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/ed/9c/9030520bf6ff0b4c98988448a93c04fcbd5b13cd9520074d8ed53569ccfe/selenium-3.141.0.tar.gz", hash = "sha256:deaf32b60ad91a4611b98d8002757f29e6f2c2d5fcaf202e1c9ad06d6772300d", size = 854669, upload-time = "2018-11-01T09:01:07.6Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/80/d6/4294f0b4bce4de0abf13e17190289f9d0613b0a44e5dd6a7f5ca98459853/selenium-3.141.0-py2.py3-none-any.whl", hash = "sha256:2d7131d7bc5a5b99a2d9b04aaf2612c411b03b8ca1b1ee8d3de5845a9be2cb3c", size = 904577, upload-time = "2018-11-01T09:01:35.607Z" }, -] - -[[package]] -name = "sendgrid" -version = "6.11.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "python-http-client" }, - { name = "starkbank-ecdsa" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/df/48/d5bb52b65456da8a40d1b083bdd78168e3f26180dc2a18d92b315a79dcc3/sendgrid-6.11.0.tar.gz", hash = "sha256:71424b2a97f5a034121ea3b2666c653ba0ed315982f0d57b7851c0c9503dc5ab", size = 49923, upload-time = "2023-12-01T05:18:37.468Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/08/5f/16c45fdf3205db65ca4409528069cc25f74383610e902716462d9bb944fd/sendgrid-6.11.0-py3-none-any.whl", hash = "sha256:43ecf5bb742ea5850c7cfe68f5e7d9948772352306d4e83e119899959538b884", size = 101921, upload-time = "2023-12-01T05:18:34.883Z" }, -] - [[package]] name = "setuptools" version = "80.9.0" @@ -3629,12 +3492,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f1/7b/ce1eafaf1a76852e2ec9b22edecf1daa58175c090266e9f6c64afcd81d91/stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695", size = 24521, upload-time = "2023-09-30T13:58:03.53Z" }, ] -[[package]] -name = "starkbank-ecdsa" -version = "2.2.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a3/f8/a6091be6a60ed4df9ac806c89fbc5fe1a3416d0284f3ba70aa09a3419428/starkbank-ecdsa-2.2.0.tar.gz", hash = "sha256:9399c3371b899d4a235b68a1ed7919d202fbf024bd2c863ae8ebdad343c2a63a", size = 14690, upload-time = "2022-10-24T18:36:05.27Z" } - [[package]] name = "tabulate" version = "0.8.9" @@ -4237,18 +4094,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ba/7e/14113996bc6ee68eb987773b4139c87afd3ceff60e27e37648aa5eb2798a/wrapt-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:26046cd03936ae745a502abf44dac702a5e6880b2b01c29aea8ddf3353b68224", size = 35616, upload-time = "2023-10-07T08:30:14.868Z" }, ] -[[package]] -name = "wtforms" -version = "3.2.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "markupsafe" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/01/e4/633d080897e769ed5712dcfad626e55dbd6cf45db0ff4d9884315c6a82da/wtforms-3.2.1.tar.gz", hash = "sha256:df3e6b70f3192e92623128123ec8dca3067df9cfadd43d59681e210cfb8d4682", size = 137801, upload-time = "2024-10-21T11:34:00.108Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/08/c9/2088fb5645cd289c99ebe0d4cdcc723922a1d8e1beaefb0f6f76dff9b21c/wtforms-3.2.1-py3-none-any.whl", hash = "sha256:583bad77ba1dd7286463f21e11aa3043ca4869d03575921d1a1698d0715e0fd4", size = 152454, upload-time = "2024-10-21T11:33:58.44Z" }, -] - [[package]] name = "xgboost" version = "3.0.2" From 4d91474b7008d1f90db483052ccc21c740ad22c5 Mon Sep 17 00:00:00 2001 From: Noaman-Akhtar Date: Thu, 5 Feb 2026 13:18:14 +0530 Subject: [PATCH 043/389] Remove distributed from dependencies fixes #3645 Signed-off-by: Noaman-Akhtar --- pyproject.toml | 1 - uv.lock | 166 ------------------------------------------------- 2 files changed, 167 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f8a7c3680..f8975bdd2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,6 @@ dependencies = [ "celery~=5.5", "click~=8.1", "coloredlogs==15.0", - "distributed>=2021.03.0", "emoji==1.2.0", "flask==2.0.2", "flask-cors==4.0.1", diff --git a/uv.lock b/uv.lock index eef0b3df8..78372218b 100644 --- a/uv.lock +++ b/uv.lock @@ -149,7 +149,6 @@ dependencies = [ { name = "celery" }, { name = "click" }, { name = "coloredlogs" }, - { name = "distributed" }, { name = "emoji" }, { name = "flask" }, { name = "flask-cors" }, @@ -258,7 +257,6 @@ requires-dist = [ { name = "celery", specifier = "~=5.5" }, { name = "click", specifier = "~=8.1" }, { name = "coloredlogs", specifier = "==15.0" }, - { name = "distributed", specifier = ">=2021.3.0" }, { name = "emoji", specifier = "==1.2.0" }, { name = "flask", specifier = "==2.0.2" }, { name = "flask-cors", specifier = "==4.0.1" }, @@ -600,15 +598,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/52/40/9d857001228658f0d59e97ebd4c346fe73e138c6de1bce61dc568a57c7f8/click_repl-0.3.0-py3-none-any.whl", hash = "sha256:fb7e06deb8da8de86180a33a9da97ac316751c094c6899382da7feeeeb51b812", size = 10289, upload-time = "2023-06-15T12:43:48.626Z" }, ] -[[package]] -name = "cloudpickle" -version = "3.1.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/52/39/069100b84d7418bc358d81669d5748efb14b9cceacd2f9c75f550424132f/cloudpickle-3.1.1.tar.gz", hash = "sha256:b216fa8ae4019d5482a8ac3c95d8f6346115d8835911fd4aefd1a445e4242c64", size = 22113, upload-time = "2025-01-14T17:02:05.085Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7e/e8/64c37fadfc2816a7701fa8a6ed8d87327c7d54eacfbfb6edab14a2f2be75/cloudpickle-3.1.1-py3-none-any.whl", hash = "sha256:c8c5a44295039331ee9dad40ba100a9c7297b6f988e50e87ccdf3765a668350e", size = 20992, upload-time = "2025-01-14T17:02:02.417Z" }, -] - [[package]] name = "colorama" version = "0.4.6" @@ -715,25 +704,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321, upload-time = "2023-10-07T05:32:16.783Z" }, ] -[[package]] -name = "dask" -version = "2023.10.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "click" }, - { name = "cloudpickle" }, - { name = "fsspec" }, - { name = "importlib-metadata" }, - { name = "packaging" }, - { name = "partd" }, - { name = "pyyaml" }, - { name = "toolz" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/a5/b0/8efb1e4a7b6aaa87e51412a42a342159b73da75d30c54fafbc81c6fb4668/dask-2023.10.1.tar.gz", hash = "sha256:da3ef0526992845408df491fcd0b3a49c7207aa908a1675cea12ab2ea10c7940", size = 8548000, upload-time = "2023-10-27T22:15:04.555Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/54/a1/9a846d38298b81d8999b501467653b2feb69ec983dbd6538306c6b8e1884/dask-2023.10.1-py3-none-any.whl", hash = "sha256:1fb0ee4d79e3c7c8f2e7c9f2680fd0ef0668801a10eaa290b970982b26a714da", size = 1204098, upload-time = "2023-10-27T22:14:53.445Z" }, -] - [[package]] name = "decorator" version = "5.2.1" @@ -782,32 +752,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/91/a1/cf2472db20f7ce4a6be1253a81cfdf85ad9c7885ffbed7047fb72c24cf87/distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87", size = 468973, upload-time = "2024-10-09T18:35:44.272Z" }, ] -[[package]] -name = "distributed" -version = "2023.10.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "click" }, - { name = "cloudpickle" }, - { name = "dask" }, - { name = "jinja2" }, - { name = "locket" }, - { name = "msgpack" }, - { name = "packaging" }, - { name = "psutil" }, - { name = "pyyaml" }, - { name = "sortedcontainers" }, - { name = "tblib" }, - { name = "toolz" }, - { name = "tornado" }, - { name = "urllib3" }, - { name = "zict" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/fe/1c/bd5a045b63ea70165940b439458d5516c129ece9eb4e7d3dd59bda5d076e/distributed-2023.10.1.tar.gz", hash = "sha256:6c52dde9684bc7147f04d14594a5090a5cc7fa58258a015f4ca4fb289a374eaf", size = 1094004, upload-time = "2023-10-27T22:15:05.178Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5e/6b/4a5dc8bf17a2714f53f648b6f44f1cd2ad7ab41aaaffe1c25489947c24f6/distributed-2023.10.1-py3-none-any.whl", hash = "sha256:0e0fe280d3b7b8be45840df3697dcb07d954c9c21c2a31d0c8e2dbe60bdaef21", size = 1002230, upload-time = "2023-10-27T22:14:59.23Z" }, -] - [[package]] name = "docutils" version = "0.20.1" @@ -988,15 +932,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0b/2f/c536b5b9bb3c071e91d536a4d11f969e911dbb6b227939f4c5b0bca090df/fonttools-4.58.4-py3-none-any.whl", hash = "sha256:a10ce13a13f26cbb9f37512a4346bb437ad7e002ff6fa966a7ce7ff5ac3528bd", size = 1114660, upload-time = "2025-06-13T17:25:13.321Z" }, ] -[[package]] -name = "fsspec" -version = "2025.5.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/00/f7/27f15d41f0ed38e8fcc488584b57e902b331da7f7c6dcda53721b15838fc/fsspec-2025.5.1.tar.gz", hash = "sha256:2e55e47a540b91843b755e83ded97c6e897fa0942b11490113f09e9c443c2475", size = 303033, upload-time = "2025-05-24T12:03:23.792Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/bb/61/78c7b3851add1481b048b5fdc29067397a1784e2910592bc81bb3f608635/fsspec-2025.5.1-py3-none-any.whl", hash = "sha256:24d3a2e663d5fc735ab256263c4075f374a174c3410c0b25e5bd1970bceaa462", size = 199052, upload-time = "2025-05-24T12:03:21.66Z" }, -] - [[package]] name = "gast" version = "0.6.0" @@ -1172,7 +1107,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/92/db/b4c12cff13ebac2786f4f217f06588bccd8b53d260453404ef22b121fc3a/greenlet-3.2.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:1afd685acd5597349ee6d7a88a8bec83ce13c106ac78c196ee9dde7c04fe87be", size = 268977, upload-time = "2025-06-05T16:10:24.001Z" }, { url = "https://files.pythonhosted.org/packages/52/61/75b4abd8147f13f70986df2801bf93735c1bd87ea780d70e3b3ecda8c165/greenlet-3.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:761917cac215c61e9dc7324b2606107b3b292a8349bdebb31503ab4de3f559ac", size = 627351, upload-time = "2025-06-05T16:38:50.685Z" }, { url = "https://files.pythonhosted.org/packages/35/aa/6894ae299d059d26254779a5088632874b80ee8cf89a88bca00b0709d22f/greenlet-3.2.3-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:a433dbc54e4a37e4fff90ef34f25a8c00aed99b06856f0119dcf09fbafa16392", size = 638599, upload-time = "2025-06-05T16:41:34.057Z" }, - { url = "https://files.pythonhosted.org/packages/30/64/e01a8261d13c47f3c082519a5e9dbf9e143cc0498ed20c911d04e54d526c/greenlet-3.2.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:72e77ed69312bab0434d7292316d5afd6896192ac4327d44f3d613ecb85b037c", size = 634482, upload-time = "2025-06-05T16:48:16.26Z" }, { url = "https://files.pythonhosted.org/packages/47/48/ff9ca8ba9772d083a4f5221f7b4f0ebe8978131a9ae0909cf202f94cd879/greenlet-3.2.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:68671180e3849b963649254a882cd544a3c75bfcd2c527346ad8bb53494444db", size = 633284, upload-time = "2025-06-05T16:13:01.599Z" }, { url = "https://files.pythonhosted.org/packages/e9/45/626e974948713bc15775b696adb3eb0bd708bec267d6d2d5c47bb47a6119/greenlet-3.2.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:49c8cfb18fb419b3d08e011228ef8a25882397f3a859b9fe1436946140b6756b", size = 582206, upload-time = "2025-06-05T16:12:48.51Z" }, { url = "https://files.pythonhosted.org/packages/b1/8e/8b6f42c67d5df7db35b8c55c9a850ea045219741bb14416255616808c690/greenlet-3.2.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:efc6dc8a792243c31f2f5674b670b3a95d46fa1c6a912b8e310d6f542e7b0712", size = 1111412, upload-time = "2025-06-05T16:36:45.479Z" }, @@ -1181,7 +1115,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fc/2e/d4fcb2978f826358b673f779f78fa8a32ee37df11920dc2bb5589cbeecef/greenlet-3.2.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:784ae58bba89fa1fa5733d170d42486580cab9decda3484779f4759345b29822", size = 270219, upload-time = "2025-06-05T16:10:10.414Z" }, { url = "https://files.pythonhosted.org/packages/16/24/929f853e0202130e4fe163bc1d05a671ce8dcd604f790e14896adac43a52/greenlet-3.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0921ac4ea42a5315d3446120ad48f90c3a6b9bb93dd9b3cf4e4d84a66e42de83", size = 630383, upload-time = "2025-06-05T16:38:51.785Z" }, { url = "https://files.pythonhosted.org/packages/d1/b2/0320715eb61ae70c25ceca2f1d5ae620477d246692d9cc284c13242ec31c/greenlet-3.2.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:d2971d93bb99e05f8c2c0c2f4aa9484a18d98c4c3bd3c62b65b7e6ae33dfcfaf", size = 642422, upload-time = "2025-06-05T16:41:35.259Z" }, - { url = "https://files.pythonhosted.org/packages/bd/49/445fd1a210f4747fedf77615d941444349c6a3a4a1135bba9701337cd966/greenlet-3.2.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c667c0bf9d406b77a15c924ef3285e1e05250948001220368e039b6aa5b5034b", size = 638375, upload-time = "2025-06-05T16:48:18.235Z" }, { url = "https://files.pythonhosted.org/packages/7e/c8/ca19760cf6eae75fa8dc32b487e963d863b3ee04a7637da77b616703bc37/greenlet-3.2.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:592c12fb1165be74592f5de0d70f82bc5ba552ac44800d632214b76089945147", size = 637627, upload-time = "2025-06-05T16:13:02.858Z" }, { url = "https://files.pythonhosted.org/packages/65/89/77acf9e3da38e9bcfca881e43b02ed467c1dedc387021fc4d9bd9928afb8/greenlet-3.2.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:29e184536ba333003540790ba29829ac14bb645514fbd7e32af331e8202a62a5", size = 585502, upload-time = "2025-06-05T16:12:49.642Z" }, { url = "https://files.pythonhosted.org/packages/97/c6/ae244d7c95b23b7130136e07a9cc5aadd60d59b5951180dc7dc7e8edaba7/greenlet-3.2.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:93c0bb79844a367782ec4f429d07589417052e621aa39a5ac1fb99c5aa308edc", size = 1114498, upload-time = "2025-06-05T16:36:46.598Z" }, @@ -1190,7 +1123,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f3/94/ad0d435f7c48debe960c53b8f60fb41c2026b1d0fa4a99a1cb17c3461e09/greenlet-3.2.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:25ad29caed5783d4bd7a85c9251c651696164622494c00802a139c00d639242d", size = 271992, upload-time = "2025-06-05T16:11:23.467Z" }, { url = "https://files.pythonhosted.org/packages/93/5d/7c27cf4d003d6e77749d299c7c8f5fd50b4f251647b5c2e97e1f20da0ab5/greenlet-3.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:88cd97bf37fe24a6710ec6a3a7799f3f81d9cd33317dcf565ff9950c83f55e0b", size = 638820, upload-time = "2025-06-05T16:38:52.882Z" }, { url = "https://files.pythonhosted.org/packages/c6/7e/807e1e9be07a125bb4c169144937910bf59b9d2f6d931578e57f0bce0ae2/greenlet-3.2.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:baeedccca94880d2f5666b4fa16fc20ef50ba1ee353ee2d7092b383a243b0b0d", size = 653046, upload-time = "2025-06-05T16:41:36.343Z" }, - { url = "https://files.pythonhosted.org/packages/9d/ab/158c1a4ea1068bdbc78dba5a3de57e4c7aeb4e7fa034320ea94c688bfb61/greenlet-3.2.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:be52af4b6292baecfa0f397f3edb3c6092ce071b499dd6fe292c9ac9f2c8f264", size = 647701, upload-time = "2025-06-05T16:48:19.604Z" }, { url = "https://files.pythonhosted.org/packages/cc/0d/93729068259b550d6a0288da4ff72b86ed05626eaf1eb7c0d3466a2571de/greenlet-3.2.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0cc73378150b8b78b0c9fe2ce56e166695e67478550769536a6742dca3651688", size = 649747, upload-time = "2025-06-05T16:13:04.628Z" }, { url = "https://files.pythonhosted.org/packages/f6/f6/c82ac1851c60851302d8581680573245c8fc300253fc1ff741ae74a6c24d/greenlet-3.2.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:706d016a03e78df129f68c4c9b4c4f963f7d73534e48a24f5f5a7101ed13dbbb", size = 605461, upload-time = "2025-06-05T16:12:50.792Z" }, { url = "https://files.pythonhosted.org/packages/98/82/d022cf25ca39cf1200650fc58c52af32c90f80479c25d1cbf57980ec3065/greenlet-3.2.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:419e60f80709510c343c57b4bb5a339d8767bf9aef9b8ce43f4f143240f88b7c", size = 1121190, upload-time = "2025-06-05T16:36:48.59Z" }, @@ -1199,7 +1131,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b1/cf/f5c0b23309070ae93de75c90d29300751a5aacefc0a3ed1b1d8edb28f08b/greenlet-3.2.3-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:500b8689aa9dd1ab26872a34084503aeddefcb438e2e7317b89b11eaea1901ad", size = 270732, upload-time = "2025-06-05T16:10:08.26Z" }, { url = "https://files.pythonhosted.org/packages/48/ae/91a957ba60482d3fecf9be49bc3948f341d706b52ddb9d83a70d42abd498/greenlet-3.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a07d3472c2a93117af3b0136f246b2833fdc0b542d4a9799ae5f41c28323faef", size = 639033, upload-time = "2025-06-05T16:38:53.983Z" }, { url = "https://files.pythonhosted.org/packages/6f/df/20ffa66dd5a7a7beffa6451bdb7400d66251374ab40b99981478c69a67a8/greenlet-3.2.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:8704b3768d2f51150626962f4b9a9e4a17d2e37c8a8d9867bbd9fa4eb938d3b3", size = 652999, upload-time = "2025-06-05T16:41:37.89Z" }, - { url = "https://files.pythonhosted.org/packages/51/b4/ebb2c8cb41e521f1d72bf0465f2f9a2fd803f674a88db228887e6847077e/greenlet-3.2.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:5035d77a27b7c62db6cf41cf786cfe2242644a7a337a0e155c80960598baab95", size = 647368, upload-time = "2025-06-05T16:48:21.467Z" }, { url = "https://files.pythonhosted.org/packages/8e/6a/1e1b5aa10dced4ae876a322155705257748108b7fd2e4fae3f2a091fe81a/greenlet-3.2.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2d8aa5423cd4a396792f6d4580f88bdc6efcb9205891c9d40d20f6e670992efb", size = 650037, upload-time = "2025-06-05T16:13:06.402Z" }, { url = "https://files.pythonhosted.org/packages/26/f2/ad51331a157c7015c675702e2d5230c243695c788f8f75feba1af32b3617/greenlet-3.2.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2c724620a101f8170065d7dded3f962a2aea7a7dae133a009cada42847e04a7b", size = 608402, upload-time = "2025-06-05T16:12:51.91Z" }, { url = "https://files.pythonhosted.org/packages/26/bc/862bd2083e6b3aff23300900a956f4ea9a4059de337f5c8734346b9b34fc/greenlet-3.2.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:873abe55f134c48e1f2a6f53f7d1419192a3d1a4e873bace00499a4e45ea6af0", size = 1119577, upload-time = "2025-06-05T16:36:49.787Z" }, @@ -1208,7 +1139,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d8/ca/accd7aa5280eb92b70ed9e8f7fd79dc50a2c21d8c73b9a0856f5b564e222/greenlet-3.2.3-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:3d04332dddb10b4a211b68111dabaee2e1a073663d117dc10247b5b1642bac86", size = 271479, upload-time = "2025-06-05T16:10:47.525Z" }, { url = "https://files.pythonhosted.org/packages/55/71/01ed9895d9eb49223280ecc98a557585edfa56b3d0e965b9fa9f7f06b6d9/greenlet-3.2.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8186162dffde068a465deab08fc72c767196895c39db26ab1c17c0b77a6d8b97", size = 683952, upload-time = "2025-06-05T16:38:55.125Z" }, { url = "https://files.pythonhosted.org/packages/ea/61/638c4bdf460c3c678a0a1ef4c200f347dff80719597e53b5edb2fb27ab54/greenlet-3.2.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f4bfbaa6096b1b7a200024784217defedf46a07c2eee1a498e94a1b5f8ec5728", size = 696917, upload-time = "2025-06-05T16:41:38.959Z" }, - { url = "https://files.pythonhosted.org/packages/22/cc/0bd1a7eb759d1f3e3cc2d1bc0f0b487ad3cc9f34d74da4b80f226fde4ec3/greenlet-3.2.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:ed6cfa9200484d234d8394c70f5492f144b20d4533f69262d530a1a082f6ee9a", size = 692443, upload-time = "2025-06-05T16:48:23.113Z" }, { url = "https://files.pythonhosted.org/packages/67/10/b2a4b63d3f08362662e89c103f7fe28894a51ae0bc890fabf37d1d780e52/greenlet-3.2.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:02b0df6f63cd15012bed5401b47829cfd2e97052dc89da3cfaf2c779124eb892", size = 692995, upload-time = "2025-06-05T16:13:07.972Z" }, { url = "https://files.pythonhosted.org/packages/5a/c6/ad82f148a4e3ce9564056453a71529732baf5448ad53fc323e37efe34f66/greenlet-3.2.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:86c2d68e87107c1792e2e8d5399acec2487a4e993ab76c792408e59394d52141", size = 655320, upload-time = "2025-06-05T16:12:53.453Z" }, { url = "https://files.pythonhosted.org/packages/5c/4f/aab73ecaa6b3086a4c89863d94cf26fa84cbff63f52ce9bc4342b3087a06/greenlet-3.2.3-cp314-cp314-win_amd64.whl", hash = "sha256:8c47aae8fbbfcf82cc13327ae802ba13c9c36753b67e760023fd116bc124a62a", size = 301236, upload-time = "2025-06-05T16:15:20.111Z" }, @@ -1390,18 +1320,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ff/62/85c4c919272577931d407be5ba5d71c20f0b616d31a0befe0ae45bb79abd/imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b", size = 8769, upload-time = "2022-07-01T12:21:02.467Z" }, ] -[[package]] -name = "importlib-metadata" -version = "8.7.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "zipp" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/76/66/650a33bd90f786193e4de4b3ad86ea60b53c89b669a5c7be931fac31cdb0/importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000", size = 56641, upload-time = "2025-04-27T15:29:01.736Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" }, -] - [[package]] name = "iniconfig" version = "2.1.0" @@ -1993,54 +1911,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/08/89/c727fde1a3d12586e0b8c01abf53754707d76beaa9987640e70807d4545f/ml_dtypes-0.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:832a019a1b6db5c4422032ca9940a990fa104eee420f643713241b3a518977fa", size = 938744, upload-time = "2023-06-06T15:14:25.77Z" }, ] -[[package]] -name = "msgpack" -version = "1.1.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/45/b1/ea4f68038a18c77c9467400d166d74c4ffa536f34761f7983a104357e614/msgpack-1.1.1.tar.gz", hash = "sha256:77b79ce34a2bdab2594f490c8e80dd62a02d650b91a75159a63ec413b8d104cd", size = 173555, upload-time = "2025-06-13T06:52:51.324Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/33/52/f30da112c1dc92cf64f57d08a273ac771e7b29dea10b4b30369b2d7e8546/msgpack-1.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:353b6fc0c36fde68b661a12949d7d49f8f51ff5fa019c1e47c87c4ff34b080ed", size = 81799, upload-time = "2025-06-13T06:51:37.228Z" }, - { url = "https://files.pythonhosted.org/packages/e4/35/7bfc0def2f04ab4145f7f108e3563f9b4abae4ab0ed78a61f350518cc4d2/msgpack-1.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:79c408fcf76a958491b4e3b103d1c417044544b68e96d06432a189b43d1215c8", size = 78278, upload-time = "2025-06-13T06:51:38.534Z" }, - { url = "https://files.pythonhosted.org/packages/e8/c5/df5d6c1c39856bc55f800bf82778fd4c11370667f9b9e9d51b2f5da88f20/msgpack-1.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78426096939c2c7482bf31ef15ca219a9e24460289c00dd0b94411040bb73ad2", size = 402805, upload-time = "2025-06-13T06:51:39.538Z" }, - { url = "https://files.pythonhosted.org/packages/20/8e/0bb8c977efecfe6ea7116e2ed73a78a8d32a947f94d272586cf02a9757db/msgpack-1.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b17ba27727a36cb73aabacaa44b13090feb88a01d012c0f4be70c00f75048b4", size = 408642, upload-time = "2025-06-13T06:51:41.092Z" }, - { url = "https://files.pythonhosted.org/packages/59/a1/731d52c1aeec52006be6d1f8027c49fdc2cfc3ab7cbe7c28335b2910d7b6/msgpack-1.1.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7a17ac1ea6ec3c7687d70201cfda3b1e8061466f28f686c24f627cae4ea8efd0", size = 395143, upload-time = "2025-06-13T06:51:42.575Z" }, - { url = "https://files.pythonhosted.org/packages/2b/92/b42911c52cda2ba67a6418ffa7d08969edf2e760b09015593c8a8a27a97d/msgpack-1.1.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:88d1e966c9235c1d4e2afac21ca83933ba59537e2e2727a999bf3f515ca2af26", size = 395986, upload-time = "2025-06-13T06:51:43.807Z" }, - { url = "https://files.pythonhosted.org/packages/61/dc/8ae165337e70118d4dab651b8b562dd5066dd1e6dd57b038f32ebc3e2f07/msgpack-1.1.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:f6d58656842e1b2ddbe07f43f56b10a60f2ba5826164910968f5933e5178af75", size = 402682, upload-time = "2025-06-13T06:51:45.534Z" }, - { url = "https://files.pythonhosted.org/packages/58/27/555851cb98dcbd6ce041df1eacb25ac30646575e9cd125681aa2f4b1b6f1/msgpack-1.1.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:96decdfc4adcbc087f5ea7ebdcfd3dee9a13358cae6e81d54be962efc38f6338", size = 406368, upload-time = "2025-06-13T06:51:46.97Z" }, - { url = "https://files.pythonhosted.org/packages/d4/64/39a26add4ce16f24e99eabb9005e44c663db00e3fce17d4ae1ae9d61df99/msgpack-1.1.1-cp310-cp310-win32.whl", hash = "sha256:6640fd979ca9a212e4bcdf6eb74051ade2c690b862b679bfcb60ae46e6dc4bfd", size = 65004, upload-time = "2025-06-13T06:51:48.582Z" }, - { url = "https://files.pythonhosted.org/packages/7d/18/73dfa3e9d5d7450d39debde5b0d848139f7de23bd637a4506e36c9800fd6/msgpack-1.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:8b65b53204fe1bd037c40c4148d00ef918eb2108d24c9aaa20bc31f9810ce0a8", size = 71548, upload-time = "2025-06-13T06:51:49.558Z" }, - { url = "https://files.pythonhosted.org/packages/7f/83/97f24bf9848af23fe2ba04380388216defc49a8af6da0c28cc636d722502/msgpack-1.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:71ef05c1726884e44f8b1d1773604ab5d4d17729d8491403a705e649116c9558", size = 82728, upload-time = "2025-06-13T06:51:50.68Z" }, - { url = "https://files.pythonhosted.org/packages/aa/7f/2eaa388267a78401f6e182662b08a588ef4f3de6f0eab1ec09736a7aaa2b/msgpack-1.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:36043272c6aede309d29d56851f8841ba907a1a3d04435e43e8a19928e243c1d", size = 79279, upload-time = "2025-06-13T06:51:51.72Z" }, - { url = "https://files.pythonhosted.org/packages/f8/46/31eb60f4452c96161e4dfd26dbca562b4ec68c72e4ad07d9566d7ea35e8a/msgpack-1.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a32747b1b39c3ac27d0670122b57e6e57f28eefb725e0b625618d1b59bf9d1e0", size = 423859, upload-time = "2025-06-13T06:51:52.749Z" }, - { url = "https://files.pythonhosted.org/packages/45/16/a20fa8c32825cc7ae8457fab45670c7a8996d7746ce80ce41cc51e3b2bd7/msgpack-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a8b10fdb84a43e50d38057b06901ec9da52baac6983d3f709d8507f3889d43f", size = 429975, upload-time = "2025-06-13T06:51:53.97Z" }, - { url = "https://files.pythonhosted.org/packages/86/ea/6c958e07692367feeb1a1594d35e22b62f7f476f3c568b002a5ea09d443d/msgpack-1.1.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba0c325c3f485dc54ec298d8b024e134acf07c10d494ffa24373bea729acf704", size = 413528, upload-time = "2025-06-13T06:51:55.507Z" }, - { url = "https://files.pythonhosted.org/packages/75/05/ac84063c5dae79722bda9f68b878dc31fc3059adb8633c79f1e82c2cd946/msgpack-1.1.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:88daaf7d146e48ec71212ce21109b66e06a98e5e44dca47d853cbfe171d6c8d2", size = 413338, upload-time = "2025-06-13T06:51:57.023Z" }, - { url = "https://files.pythonhosted.org/packages/69/e8/fe86b082c781d3e1c09ca0f4dacd457ede60a13119b6ce939efe2ea77b76/msgpack-1.1.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:d8b55ea20dc59b181d3f47103f113e6f28a5e1c89fd5b67b9140edb442ab67f2", size = 422658, upload-time = "2025-06-13T06:51:58.419Z" }, - { url = "https://files.pythonhosted.org/packages/3b/2b/bafc9924df52d8f3bb7c00d24e57be477f4d0f967c0a31ef5e2225e035c7/msgpack-1.1.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4a28e8072ae9779f20427af07f53bbb8b4aa81151054e882aee333b158da8752", size = 427124, upload-time = "2025-06-13T06:51:59.969Z" }, - { url = "https://files.pythonhosted.org/packages/a2/3b/1f717e17e53e0ed0b68fa59e9188f3f610c79d7151f0e52ff3cd8eb6b2dc/msgpack-1.1.1-cp311-cp311-win32.whl", hash = "sha256:7da8831f9a0fdb526621ba09a281fadc58ea12701bc709e7b8cbc362feabc295", size = 65016, upload-time = "2025-06-13T06:52:01.294Z" }, - { url = "https://files.pythonhosted.org/packages/48/45/9d1780768d3b249accecc5a38c725eb1e203d44a191f7b7ff1941f7df60c/msgpack-1.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:5fd1b58e1431008a57247d6e7cc4faa41c3607e8e7d4aaf81f7c29ea013cb458", size = 72267, upload-time = "2025-06-13T06:52:02.568Z" }, - { url = "https://files.pythonhosted.org/packages/e3/26/389b9c593eda2b8551b2e7126ad3a06af6f9b44274eb3a4f054d48ff7e47/msgpack-1.1.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ae497b11f4c21558d95de9f64fff7053544f4d1a17731c866143ed6bb4591238", size = 82359, upload-time = "2025-06-13T06:52:03.909Z" }, - { url = "https://files.pythonhosted.org/packages/ab/65/7d1de38c8a22cf8b1551469159d4b6cf49be2126adc2482de50976084d78/msgpack-1.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:33be9ab121df9b6b461ff91baac6f2731f83d9b27ed948c5b9d1978ae28bf157", size = 79172, upload-time = "2025-06-13T06:52:05.246Z" }, - { url = "https://files.pythonhosted.org/packages/0f/bd/cacf208b64d9577a62c74b677e1ada005caa9b69a05a599889d6fc2ab20a/msgpack-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f64ae8fe7ffba251fecb8408540c34ee9df1c26674c50c4544d72dbf792e5ce", size = 425013, upload-time = "2025-06-13T06:52:06.341Z" }, - { url = "https://files.pythonhosted.org/packages/4d/ec/fd869e2567cc9c01278a736cfd1697941ba0d4b81a43e0aa2e8d71dab208/msgpack-1.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a494554874691720ba5891c9b0b39474ba43ffb1aaf32a5dac874effb1619e1a", size = 426905, upload-time = "2025-06-13T06:52:07.501Z" }, - { url = "https://files.pythonhosted.org/packages/55/2a/35860f33229075bce803a5593d046d8b489d7ba2fc85701e714fc1aaf898/msgpack-1.1.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cb643284ab0ed26f6957d969fe0dd8bb17beb567beb8998140b5e38a90974f6c", size = 407336, upload-time = "2025-06-13T06:52:09.047Z" }, - { url = "https://files.pythonhosted.org/packages/8c/16/69ed8f3ada150bf92745fb4921bd621fd2cdf5a42e25eb50bcc57a5328f0/msgpack-1.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d275a9e3c81b1093c060c3837e580c37f47c51eca031f7b5fb76f7b8470f5f9b", size = 409485, upload-time = "2025-06-13T06:52:10.382Z" }, - { url = "https://files.pythonhosted.org/packages/c6/b6/0c398039e4c6d0b2e37c61d7e0e9d13439f91f780686deb8ee64ecf1ae71/msgpack-1.1.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4fd6b577e4541676e0cc9ddc1709d25014d3ad9a66caa19962c4f5de30fc09ef", size = 412182, upload-time = "2025-06-13T06:52:11.644Z" }, - { url = "https://files.pythonhosted.org/packages/b8/d0/0cf4a6ecb9bc960d624c93effaeaae75cbf00b3bc4a54f35c8507273cda1/msgpack-1.1.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bb29aaa613c0a1c40d1af111abf025f1732cab333f96f285d6a93b934738a68a", size = 419883, upload-time = "2025-06-13T06:52:12.806Z" }, - { url = "https://files.pythonhosted.org/packages/62/83/9697c211720fa71a2dfb632cad6196a8af3abea56eece220fde4674dc44b/msgpack-1.1.1-cp312-cp312-win32.whl", hash = "sha256:870b9a626280c86cff9c576ec0d9cbcc54a1e5ebda9cd26dab12baf41fee218c", size = 65406, upload-time = "2025-06-13T06:52:14.271Z" }, - { url = "https://files.pythonhosted.org/packages/c0/23/0abb886e80eab08f5e8c485d6f13924028602829f63b8f5fa25a06636628/msgpack-1.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:5692095123007180dca3e788bb4c399cc26626da51629a31d40207cb262e67f4", size = 72558, upload-time = "2025-06-13T06:52:15.252Z" }, - { url = "https://files.pythonhosted.org/packages/a1/38/561f01cf3577430b59b340b51329803d3a5bf6a45864a55f4ef308ac11e3/msgpack-1.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3765afa6bd4832fc11c3749be4ba4b69a0e8d7b728f78e68120a157a4c5d41f0", size = 81677, upload-time = "2025-06-13T06:52:16.64Z" }, - { url = "https://files.pythonhosted.org/packages/09/48/54a89579ea36b6ae0ee001cba8c61f776451fad3c9306cd80f5b5c55be87/msgpack-1.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8ddb2bcfd1a8b9e431c8d6f4f7db0773084e107730ecf3472f1dfe9ad583f3d9", size = 78603, upload-time = "2025-06-13T06:52:17.843Z" }, - { url = "https://files.pythonhosted.org/packages/a0/60/daba2699b308e95ae792cdc2ef092a38eb5ee422f9d2fbd4101526d8a210/msgpack-1.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:196a736f0526a03653d829d7d4c5500a97eea3648aebfd4b6743875f28aa2af8", size = 420504, upload-time = "2025-06-13T06:52:18.982Z" }, - { url = "https://files.pythonhosted.org/packages/20/22/2ebae7ae43cd8f2debc35c631172ddf14e2a87ffcc04cf43ff9df9fff0d3/msgpack-1.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d592d06e3cc2f537ceeeb23d38799c6ad83255289bb84c2e5792e5a8dea268a", size = 423749, upload-time = "2025-06-13T06:52:20.211Z" }, - { url = "https://files.pythonhosted.org/packages/40/1b/54c08dd5452427e1179a40b4b607e37e2664bca1c790c60c442c8e972e47/msgpack-1.1.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4df2311b0ce24f06ba253fda361f938dfecd7b961576f9be3f3fbd60e87130ac", size = 404458, upload-time = "2025-06-13T06:52:21.429Z" }, - { url = "https://files.pythonhosted.org/packages/2e/60/6bb17e9ffb080616a51f09928fdd5cac1353c9becc6c4a8abd4e57269a16/msgpack-1.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e4141c5a32b5e37905b5940aacbc59739f036930367d7acce7a64e4dec1f5e0b", size = 405976, upload-time = "2025-06-13T06:52:22.995Z" }, - { url = "https://files.pythonhosted.org/packages/ee/97/88983e266572e8707c1f4b99c8fd04f9eb97b43f2db40e3172d87d8642db/msgpack-1.1.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b1ce7f41670c5a69e1389420436f41385b1aa2504c3b0c30620764b15dded2e7", size = 408607, upload-time = "2025-06-13T06:52:24.152Z" }, - { url = "https://files.pythonhosted.org/packages/bc/66/36c78af2efaffcc15a5a61ae0df53a1d025f2680122e2a9eb8442fed3ae4/msgpack-1.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4147151acabb9caed4e474c3344181e91ff7a388b888f1e19ea04f7e73dc7ad5", size = 424172, upload-time = "2025-06-13T06:52:25.704Z" }, - { url = "https://files.pythonhosted.org/packages/8c/87/a75eb622b555708fe0427fab96056d39d4c9892b0c784b3a721088c7ee37/msgpack-1.1.1-cp313-cp313-win32.whl", hash = "sha256:500e85823a27d6d9bba1d057c871b4210c1dd6fb01fbb764e37e4e8847376323", size = 65347, upload-time = "2025-06-13T06:52:26.846Z" }, - { url = "https://files.pythonhosted.org/packages/ca/91/7dc28d5e2a11a5ad804cf2b7f7a5fcb1eb5a4966d66a5d2b41aee6376543/msgpack-1.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:6d489fba546295983abd142812bda76b57e33d0b9f5d5b71c09a583285506f69", size = 72341, upload-time = "2025-06-13T06:52:27.835Z" }, -] - [[package]] name = "mypy" version = "1.18.2" @@ -3257,15 +3127,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c8/78/3565d011c61f5a43488987ee32b6f3f656e7f107ac2782dd57bdd7d91d9a/snowballstemmer-3.0.1-py3-none-any.whl", hash = "sha256:6cd7b3897da8d6c9ffb968a6781fa6532dce9c3618a4b127d920dab764a19064", size = 103274, upload-time = "2025-05-09T16:34:50.371Z" }, ] -[[package]] -name = "sortedcontainers" -version = "2.4.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", size = 30594, upload-time = "2021-05-16T22:03:42.897Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575, upload-time = "2021-05-16T22:03:41.177Z" }, -] - [[package]] name = "soupsieve" version = "2.7" @@ -3501,15 +3362,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ca/80/7c0cad11bd99985cfe7c09427ee0b4f9bd6b048bd13d4ffb32c6db237dfb/tabulate-0.8.9-py3-none-any.whl", hash = "sha256:d7c013fe7abbc5e491394e10fa845f8f32fe54f8dc60c6622c6cf482d25d47e4", size = 25123, upload-time = "2021-02-22T07:34:12.229Z" }, ] -[[package]] -name = "tblib" -version = "3.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/54/95/4b3044ec4bf248186769629bbfb495a458deb6e4c1f9eff7f298ae1e336e/tblib-3.1.0.tar.gz", hash = "sha256:06404c2c9f07f66fee2d7d6ad43accc46f9c3361714d9b8426e7f47e595cd652", size = 30766, upload-time = "2025-03-31T12:58:27.473Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/27/44/aa5c8b10b2cce7a053018e0d132bd58e27527a0243c4985383d5b6fd93e9/tblib-3.1.0-py3-none-any.whl", hash = "sha256:670bb4582578134b3d81a84afa1b016128b429f3d48e6cbbaecc9d15675e984e", size = 12552, upload-time = "2025-03-31T12:58:26.142Z" }, -] - [[package]] name = "tenacity" version = "8.3.0" @@ -4131,21 +3983,3 @@ sdist = { url = "https://files.pythonhosted.org/packages/d9/91/92d6032b2cc80674b wheels = [ { url = "https://files.pythonhosted.org/packages/6b/41/bf1aae04932d1eaffee1fc5f8b38ca47bbbf07d765129539bc4bcce1ce0c/XlsxWriter-1.3.7-py2.py3-none-any.whl", hash = "sha256:b807c2d3e379bf6a925f472955beef3e07495c1bac708640696876e68675b49b", size = 144610, upload-time = "2020-10-13T08:17:32.282Z" }, ] - -[[package]] -name = "zict" -version = "3.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d1/ac/3c494dd7ec5122cff8252c1a209b282c0867af029f805ae9befd73ae37eb/zict-3.0.0.tar.gz", hash = "sha256:e321e263b6a97aafc0790c3cfb3c04656b7066e6738c37fffcca95d803c9fba5", size = 33238, upload-time = "2023-04-17T21:41:16.041Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/80/ab/11a76c1e2126084fde2639514f24e6111b789b0bfa4fc6264a8975c7e1f1/zict-3.0.0-py2.py3-none-any.whl", hash = "sha256:5796e36bd0e0cc8cf0fbc1ace6a68912611c1dbd74750a3f3026b9b9d6a327ae", size = 43332, upload-time = "2023-04-17T21:41:13.444Z" }, -] - -[[package]] -name = "zipp" -version = "3.23.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50ede074e376733dca2ae7c6eb617489437771209d4180/zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166", size = 25547, upload-time = "2025-06-08T17:06:39.4Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276, upload-time = "2025-06-08T17:06:38.034Z" }, -] From 46b97043b66d52652ba88dfbe8aaee6e239d1cd8 Mon Sep 17 00:00:00 2001 From: AumOzaa Date: Tue, 14 Oct 2025 16:12:32 +0530 Subject: [PATCH 044/389] Add AUGUR_RESET_LOGS documentation in production guide Signed-off-by: AumOzaa --- docs/how_to_run_augur_in_production.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 docs/how_to_run_augur_in_production.md diff --git a/docs/how_to_run_augur_in_production.md b/docs/how_to_run_augur_in_production.md new file mode 100644 index 000000000..e638ae77b --- /dev/null +++ b/docs/how_to_run_augur_in_production.md @@ -0,0 +1,17 @@ +# How to Run Augur in Production + +## Resetting Logs with AUGUR_RESET_LOGS + +Augur provides an environment variable to control whether logs are reset on server startup. This gives system administrators more control over log management. + +`AUGUR_RESET_LOGS` : Controls the automatic reset of logs when Augur starts. + +- Default Behavior: + If the variable is not set, it defaults to True, meaning Augur will reset logs on startup to avoid infinite log growth. + +- Custom Behavior: + If set to False (or any common variation), Augur will not reset logs. In this case, the sysadmin is responsible for managing log growth. + +- Usage Example: + ```bash + export AUGUR_RESET_LOGS=False \ No newline at end of file From 50d1322b70d772d9c5c777602d46265ccccd1a38 Mon Sep 17 00:00:00 2001 From: AumOzaa Date: Wed, 15 Oct 2025 11:32:17 +0530 Subject: [PATCH 045/389] Add augur_reset_logs documentation Signed-off-by: AumOzaa --- docs/how_to_run_augur_in_production.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/how_to_run_augur_in_production.md b/docs/how_to_run_augur_in_production.md index e638ae77b..109d9a14a 100644 --- a/docs/how_to_run_augur_in_production.md +++ b/docs/how_to_run_augur_in_production.md @@ -2,9 +2,9 @@ ## Resetting Logs with AUGUR_RESET_LOGS -Augur provides an environment variable to control whether logs are reset on server startup. This gives system administrators more control over log management. +Augur does provides an environment variable to control whether logs are reset on server startup. This gives system administrators more control over log management. -`AUGUR_RESET_LOGS` : Controls the automatic reset of logs when Augur starts. +`AUGUR_RESET_LOGS` : This controls the automatic reset of logs when Augur starts. - Default Behavior: If the variable is not set, it defaults to True, meaning Augur will reset logs on startup to avoid infinite log growth. From 4cfc7ccb9ddfa37a8a520e842d58754bea31fb35 Mon Sep 17 00:00:00 2001 From: Adrian Edwards <17362949+MoralCode@users.noreply.github.com> Date: Wed, 15 Oct 2025 10:53:46 -0400 Subject: [PATCH 046/389] close quotes on example in docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Mosè Giordano <765740+giordano@users.noreply.github.com> Signed-off-by: Adrian Edwards <17362949+MoralCode@users.noreply.github.com> Signed-off-by: AumOzaa --- docs/how_to_run_augur_in_production.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/how_to_run_augur_in_production.md b/docs/how_to_run_augur_in_production.md index 109d9a14a..7274f7596 100644 --- a/docs/how_to_run_augur_in_production.md +++ b/docs/how_to_run_augur_in_production.md @@ -14,4 +14,5 @@ Augur does provides an environment variable to control whether logs are reset on - Usage Example: ```bash - export AUGUR_RESET_LOGS=False \ No newline at end of file + export AUGUR_RESET_LOGS=False + ``` \ No newline at end of file From 3a35730ace4063d12a3e1591a290d7261cc4bda0 Mon Sep 17 00:00:00 2001 From: AumOzaa Date: Tue, 14 Oct 2025 16:12:32 +0530 Subject: [PATCH 047/389] Move AUGUR_RESET_LOGS documentation to ReadTheDocs (.rst format) Signed-off-by: AumOzaa --- .../source/how_to_run_augur_in_production.rst | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 docs/source/how_to_run_augur_in_production.rst diff --git a/docs/source/how_to_run_augur_in_production.rst b/docs/source/how_to_run_augur_in_production.rst new file mode 100644 index 000000000..f09b5cd24 --- /dev/null +++ b/docs/source/how_to_run_augur_in_production.rst @@ -0,0 +1,29 @@ +How to Run Augur in Production +============================== + +Resetting Logs with AUGUR_RESET_LOGS +------------------------------------ + +Augur provides an environment variable to control whether logs are reset on server startup. +This gives system administrators more control over log management. + +**Environment Variable:** ``AUGUR_RESET_LOGS`` — controls the automatic reset of logs when Augur starts. + +Default Behavior +~~~~~~~~~~~~~~~~ + +If the variable is **not set**, it defaults to **True**, meaning Augur will reset logs on startup +to avoid infinite log growth. + +Custom Behavior +~~~~~~~~~~~~~~~~ + +If set to **False** (or any common variation), Augur will *not* reset logs automatically. +In this case, the system administrator is responsible for managing log growth manually. + +Usage Example +~~~~~~~~~~~~~ + +.. code-block:: bash + + export AUGUR_RESET_LOGS=False \ No newline at end of file From 5abde884a3d3f4b24a0762b50a602aad8c4d2237 Mon Sep 17 00:00:00 2001 From: AumOzaa Date: Wed, 5 Nov 2025 11:35:21 +0530 Subject: [PATCH 048/389] Added to toc.rst Signed-off-by: AumOzaa --- docs/how_to_run_augur_in_production.md | 18 ------------------ .../how_to_run_augur_in_production.rst | 0 docs/source/deployment/toc.rst | 2 +- 3 files changed, 1 insertion(+), 19 deletions(-) delete mode 100644 docs/how_to_run_augur_in_production.md rename docs/source/{ => deployment}/how_to_run_augur_in_production.rst (100%) diff --git a/docs/how_to_run_augur_in_production.md b/docs/how_to_run_augur_in_production.md deleted file mode 100644 index 7274f7596..000000000 --- a/docs/how_to_run_augur_in_production.md +++ /dev/null @@ -1,18 +0,0 @@ -# How to Run Augur in Production - -## Resetting Logs with AUGUR_RESET_LOGS - -Augur does provides an environment variable to control whether logs are reset on server startup. This gives system administrators more control over log management. - -`AUGUR_RESET_LOGS` : This controls the automatic reset of logs when Augur starts. - -- Default Behavior: - If the variable is not set, it defaults to True, meaning Augur will reset logs on startup to avoid infinite log growth. - -- Custom Behavior: - If set to False (or any common variation), Augur will not reset logs. In this case, the sysadmin is responsible for managing log growth. - -- Usage Example: - ```bash - export AUGUR_RESET_LOGS=False - ``` \ No newline at end of file diff --git a/docs/source/how_to_run_augur_in_production.rst b/docs/source/deployment/how_to_run_augur_in_production.rst similarity index 100% rename from docs/source/how_to_run_augur_in_production.rst rename to docs/source/deployment/how_to_run_augur_in_production.rst diff --git a/docs/source/deployment/toc.rst b/docs/source/deployment/toc.rst index 2474aa066..5255a6ec9 100644 --- a/docs/source/deployment/toc.rst +++ b/docs/source/deployment/toc.rst @@ -9,7 +9,7 @@ This section details describes production deployment of Augur. server-deployment nginx-configuration - + how_to_run_augur_in_production **THIS SECTION IS UNDER CONSTRUCTION.** From 93d6a6502ffd49fbae190d190f09e87409445907 Mon Sep 17 00:00:00 2001 From: AumOzaa Date: Wed, 19 Nov 2025 08:10:37 +0530 Subject: [PATCH 049/389] Updated how_to_run_augur_in_production.rst Signed-off-by: AumOzaa --- .../how_to_run_augur_in_production.rst | 64 +++++++++++++++---- 1 file changed, 52 insertions(+), 12 deletions(-) diff --git a/docs/source/deployment/how_to_run_augur_in_production.rst b/docs/source/deployment/how_to_run_augur_in_production.rst index f09b5cd24..ac372fec7 100644 --- a/docs/source/deployment/how_to_run_augur_in_production.rst +++ b/docs/source/deployment/how_to_run_augur_in_production.rst @@ -1,29 +1,69 @@ -How to Run Augur in Production -============================== +Running Augur in Production +=========================== -Resetting Logs with AUGUR_RESET_LOGS ------------------------------------- +This guide explains how to run Augur in a production environment and how to configure +important environment variables such as ``AUGUR_RESET_LOGS``. + +Prerequisites +------------- + +Before deploying Augur in production, ensure the following are installed and configured: + +- Docker and Docker Compose +- PostgreSQL (configured and accessible) +- Redis (installed and running) -Augur provides an environment variable to control whether logs are reset on server startup. -This gives system administrators more control over log management. +Environment Variables +--------------------- + +Augur uses several environment variables in production. Make sure to configure the ones relevant +to your deployment: + +- ``AUGUR_DB`` — PostgreSQL database connection string +- ``AUGUR_REDIS_URL`` — Redis connection string +- ``AUGUR_RESET_LOGS`` — controls automatic log reset on server startup + +Resetting Logs with AUGUR_RESET_LOGS +------------------------------------- -**Environment Variable:** ``AUGUR_RESET_LOGS`` — controls the automatic reset of logs when Augur starts. +Augur provides the ``AUGUR_RESET_LOGS`` environment variable to control whether logs are reset when +the server starts. This gives system administrators flexibility over log management. Default Behavior ~~~~~~~~~~~~~~~~ -If the variable is **not set**, it defaults to **True**, meaning Augur will reset logs on startup -to avoid infinite log growth. +If ``AUGUR_RESET_LOGS`` is **not set**, it defaults to **True**, meaning Augur will reset logs +on startup to prevent unbounded log growth. Custom Behavior ~~~~~~~~~~~~~~~~ -If set to **False** (or any common variation), Augur will *not* reset logs automatically. -In this case, the system administrator is responsible for managing log growth manually. +If set to ``False`` (or common variations), Augur will **not** reset logs automatically. +In this case, log rotation or manual log cleanup is the responsibility of the administrator. Usage Example ~~~~~~~~~~~~~ .. code-block:: bash - export AUGUR_RESET_LOGS=False \ No newline at end of file + export AUGUR_RESET_LOGS=False + +Related Resources +----------------- + +- https://github.com/oss-aspen/infra-ansible/ +- https://github.com/chaoss/augur-utilities/ + +Steps to Run in Production +-------------------------- + +1. Clone the repository: + + .. code-block:: bash + + git clone https://github.com/chaoss/augur.git + cd augur + +2. Configure all required environment variables +3. Set up Docker + Docker Compose or your deployment infrastructure +4. Start Augur following your deployment method. \ No newline at end of file From 7cae1fb8502e65fcdec26a5018878194de9b448d Mon Sep 17 00:00:00 2001 From: AumOzaa Date: Thu, 20 Nov 2025 13:53:57 +0530 Subject: [PATCH 050/389] Updated production docs Signed-off-by: AumOzaa --- .../how_to_run_augur_in_production.rst | 73 ++++++++----------- 1 file changed, 32 insertions(+), 41 deletions(-) diff --git a/docs/source/deployment/how_to_run_augur_in_production.rst b/docs/source/deployment/how_to_run_augur_in_production.rst index ac372fec7..6f6176b82 100644 --- a/docs/source/deployment/how_to_run_augur_in_production.rst +++ b/docs/source/deployment/how_to_run_augur_in_production.rst @@ -1,17 +1,9 @@ Running Augur in Production =========================== -This guide explains how to run Augur in a production environment and how to configure -important environment variables such as ``AUGUR_RESET_LOGS``. - -Prerequisites -------------- - -Before deploying Augur in production, ensure the following are installed and configured: - -- Docker and Docker Compose -- PostgreSQL (configured and accessible) -- Redis (installed and running) +This page collects practical tips, configuration notes, and important considerations +for deploying Augur in a production environment. This is a reference to help +configure Augur effectively. Environment Variables --------------------- @@ -19,51 +11,50 @@ Environment Variables Augur uses several environment variables in production. Make sure to configure the ones relevant to your deployment: -- ``AUGUR_DB`` — PostgreSQL database connection string -- ``AUGUR_REDIS_URL`` — Redis connection string -- ``AUGUR_RESET_LOGS`` — controls automatic log reset on server startup +- ``AUGUR_RESET_LOGS`` : Controls automatic log reset on server startup +- ``AUGUR_DB`` : PostgreSQL database connection string (used if variable not set) -Resetting Logs with AUGUR_RESET_LOGS -------------------------------------- +AUGUR_RESET_LOGS +---------------- -Augur provides the ``AUGUR_RESET_LOGS`` environment variable to control whether logs are reset when -the server starts. This gives system administrators flexibility over log management. +**Description:** +Controls whether Augur resets its log files every time the server starts. Useful for managing log size or integrating with external log rotation systems. -Default Behavior -~~~~~~~~~~~~~~~~ +**Type:** +boolean -If ``AUGUR_RESET_LOGS`` is **not set**, it defaults to **True**, meaning Augur will reset logs -on startup to prevent unbounded log growth. +**Default:** +`True` : Augur clears old logs at startup. -Custom Behavior -~~~~~~~~~~~~~~~~ +**Environment Variable:** +AUGUR_RESET_LOGS -If set to ``False`` (or common variations), Augur will **not** reset logs automatically. -In this case, log rotation or manual log cleanup is the responsibility of the administrator. +**Notes:** +If set to `False`, Augur will not reset logs automatically. Administrators must ensure log rotation or cleanup is handled manually. -Usage Example -~~~~~~~~~~~~~ +**Usage Example:** .. code-block:: bash export AUGUR_RESET_LOGS=False -Related Resources ------------------ +AUGUR_DB +-------- -- https://github.com/oss-aspen/infra-ansible/ -- https://github.com/chaoss/augur-utilities/ +**Description:** +Specifies the connection string for the PostgreSQL database used by Augur. If omitted, the default Docker database is used. -Steps to Run in Production --------------------------- +**Type:** +string -1. Clone the repository: +**Default:** +Docker container database (if `AUGUR_DB` is not specified) - .. code-block:: bash +**Environment Variable:** +AUGUR_DB - git clone https://github.com/chaoss/augur.git - cd augur +Related Resources +----------------- -2. Configure all required environment variables -3. Set up Docker + Docker Compose or your deployment infrastructure -4. Start Augur following your deployment method. \ No newline at end of file +- https://github.com/oss-aspen/infra-ansible/ +- https://github.com/chaoss/augur-utilities/ \ No newline at end of file From 43fc891df198ab18d7ef853bcaba82bddd298403 Mon Sep 17 00:00:00 2001 From: Dhanesh Kolu Date: Thu, 5 Feb 2026 07:13:53 +0530 Subject: [PATCH 051/389] docs: refine GitHub and GitLab token authentication documentation Signed-off-by: Dhanesh Kolu --- .../getting-started/collecting-data.rst | 26 +++++++------------ 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/docs/source/getting-started/collecting-data.rst b/docs/source/getting-started/collecting-data.rst index 8fc4aff89..c26d2ce8f 100644 --- a/docs/source/getting-started/collecting-data.rst +++ b/docs/source/getting-started/collecting-data.rst @@ -29,29 +29,25 @@ Since the default setup will work for most use cases, we'll first cover how to c Authentication and API Tokens ============================= -Augur collects data from hosted source control platforms such as GitHub and GitLab using -their respective REST APIs. To avoid strict API rate limits and to enable access to -private repositories, Augur requires Personal Access Tokens (PATs) with appropriate -read-only permissions. +Augur collects data from hosted source control platforms such as GitHub and GitLab using their respective APIs. To avoid strict API rate limits and to enable access to private repositories, Augur requires Personal Access Tokens (PATs) with appropriate read-only permissions. GitHub Authentication --------------------- -Augur uses the GitHub REST API to collect repository metadata, issues, pull requests, -releases, and contributor information. +Augur uses GitHub APIs to collect repository metadata, issues, pull requests, releases, and contributor information. -A GitHub Personal Access Token (PAT) is required. The minimum recommended permissions are: +Augur requires a GitHub Personal Access Token (PAT). Two token types are supported: -- **Classic Personal Access Token (recommended)** +- **Classic Personal Access Token** - ``repo`` — required for private repositories - ``read:org`` — required when collecting data from repositories owned by an organization - ``read:user`` — required for contributor and user metadata returned by the GitHub API -For public repositories only, a token without ``repo`` scope may be sufficient, though a -GitHub Personal Access Token is still required for Augur to authenticate API requests. +- **Fine-grained Personal Access Token** + - Fine-grained tokens provide repository-specific access with more precise permission controls. + - For public repository data collection, fine-grained tokens include read-only public repository access by default and typically require no additional permission changes. -GitHub tokens should be treated as secrets and supplied to Augur using environment -variables or configuration options described during installation. +GitHub tokens should be treated as secrets and supplied to Augur using environment variables or configuration options described during installation. GitLab Authentication --------------------- @@ -63,11 +59,9 @@ The token must include the following scopes: - ``read_api`` — required for accessing repository metadata, issues, and merge requests - ``read_repository`` — required for repository and commit data -These scopes apply to both GitLab.com and self-hosted GitLab instances. When using a -self-hosted GitLab deployment, ensure the API base URL is configured correctly. +These scopes apply to GitLab.com and most standard GitLab deployments. -As with GitHub tokens, GitLab tokens should be stored securely and provided to Augur -through environment variables or configuration files. +As with GitHub tokens, GitLab tokens should be stored securely and provided to Augur through environment variables or configuration files. Configuring Collection ---------------------- From e3d63e217215ed0603bd3acab64338a69740c83d Mon Sep 17 00:00:00 2001 From: nancywrites Date: Thu, 5 Feb 2026 16:57:08 +0100 Subject: [PATCH 052/389] Updated links and First paragraph Signed-off-by: nancywrites --- docs/source/index.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 5b7e8dd92..1716ad463 100755 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -31,8 +31,7 @@ Augur Documentation What is Augur? ~~~~~~~~~~~~~~~~ - -Augur is a software tool that helps you **collect and measure information about open-source software projects**. Open-source projects are software projects where anyone can see and contribute to the code. +Augur is a software tool that helps you **collect and measure information about open-source software projects**. Open-source projects are software projects where anyone can see and contribute to the code. Augur focuses on collecting data from public git-based code hosting platforms ("Forges") such as GitHub and GitLab to produce data about the health and sustainability of software projects based on the relevant CHAOSS metrics. The main goal of Augur is to **understand how healthy and sustainable a project is**. Healthy projects are easier to rely on, and they are important because many software organizations or companies depend on open-source software. @@ -64,7 +63,7 @@ Who develops Augur See it in action ---------------- -- You can check out Augur live on the CHAOSS instance `here `_. +- You can check out Augur live on the CHAOSS instance `here `_. From a894579f1af75203be827e990420f217d0213466 Mon Sep 17 00:00:00 2001 From: nancywrites Date: Thu, 5 Feb 2026 17:59:01 +0100 Subject: [PATCH 053/389] bolded text removed, link added, bullet point revised Signed-off-by: nancywrites --- docs/source/index.rst | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 849b85a43..6705c30a4 100755 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -31,34 +31,33 @@ Augur Documentation What is Augur? ~~~~~~~~~~~~~~~~ -Augur is a software tool that helps you **collect and measure information about open-source software projects**. Open-source projects are software projects where anyone can see and contribute to the code. Augur focuses on collecting data from public git-based code hosting platforms ("Forges") such as GitHub and GitLab to produce data about the health and sustainability of software projects based on the relevant CHAOSS metrics. +Augur is a software tool that helps you collect and measure information about `open-source `_ software projects. Augur focuses on collecting data from public git-based code hosting platforms ("Forges") such as GitHub and GitLab to produce data about the health and sustainability of software projects based on the relevant CHAOSS metrics. -The main goal of Augur is to **understand how healthy and sustainable a project is**. Healthy projects are easier to rely on, and they are important because many software organizations or companies depend on open-source software. +The main goal of Augur is to understand how healthy and sustainable a project is. Healthy projects are easier to rely on, and they are important because many software organizations or companies depend on open-source software. How Augur works --------------- -1. Augur **looks at the project’s repositories** (the place where the project’s code and files live). -2. It **collects data** about what is happening in those repositories. -3. It **organizes this data** into a standard format called a data model. -4. Then it **calculates metrics** that tell you about the project’s health. +1. Augur looks at the project’s repositories (the place where the project’s code and files live). +2. It collect data about activity that is happening in the project, including issues, comments, code changes, etc. +3. It organizes this data into a standard format called a data model. +4. Then it calculates metrics that tell you about the project’s health. Example of a metric: Burstiness ------------------------------- +- Burstiness is one of Augur’s metrics. +- It shows periods when a project has a lot of activity in a short time, followed by periods when activity goes back to normal. +- This helps you see a project’s focus, update patterns, and stability. +- In other words, you can tell how often big changes happen and whether the project works in a steady, predictable way. -- **Burstiness** is **one of Augur’s metrics**. -- It shows periods when a project has **a lot of activity in a short time**, followed by periods when activity goes back to normal. -- This helps you see a project’s **focus, update patterns, and stability**. -- In other words, you can tell **how often big changes happen** and whether the project works in a steady, predictable way. - -Augur calculates **many other metrics**, which you can see in the full list `here `_. +Augur calculates many other metrics, which you can see in the full list `here `_. Who develops Augur ----------------- -- Augur is developed as part of **CHAOSS** (Community Health Analytics for Open Source Software). +- Augur is developed as part of CHAOSS (Community Health Analytics for Open Source Software). - Many of Augur’s metrics come directly from the CHAOSS community. -- If you want to **get involved**, visit the `CHAOSS website `_. +- If you want to get involved, visit the `CHAOSS website `_. See it in action ---------------- @@ -66,7 +65,6 @@ See it in action - You can check out Augur live on the CHAOSS instance `here `_. - Current maintainers -------------------- - `Derek Howard `_ From f4f8a9ba906e2ecc463cfaa0694c88637b462913 Mon Sep 17 00:00:00 2001 From: nancywrites Date: Thu, 5 Feb 2026 18:08:36 +0100 Subject: [PATCH 054/389] changed open-source to open source Signed-off-by: nancywrites --- docs/source/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 6705c30a4..2a85d616c 100755 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -31,7 +31,7 @@ Augur Documentation What is Augur? ~~~~~~~~~~~~~~~~ -Augur is a software tool that helps you collect and measure information about `open-source `_ software projects. Augur focuses on collecting data from public git-based code hosting platforms ("Forges") such as GitHub and GitLab to produce data about the health and sustainability of software projects based on the relevant CHAOSS metrics. +Augur is a software tool that helps you collect and measure information about `opensource `_ software projects. Augur focuses on collecting data from public git-based code hosting platforms ("Forges") such as GitHub and GitLab to produce data about the health and sustainability of software projects based on the relevant CHAOSS metrics. The main goal of Augur is to understand how healthy and sustainable a project is. Healthy projects are easier to rely on, and they are important because many software organizations or companies depend on open-source software. From 4e211f40920b54207ecbeed763495196c8cf4e75 Mon Sep 17 00:00:00 2001 From: nancywrites Date: Thu, 5 Feb 2026 18:13:24 +0100 Subject: [PATCH 055/389] changed opensource to open source Signed-off-by: nancywrites --- docs/source/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 2a85d616c..f422b4b5b 100755 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -31,7 +31,7 @@ Augur Documentation What is Augur? ~~~~~~~~~~~~~~~~ -Augur is a software tool that helps you collect and measure information about `opensource `_ software projects. Augur focuses on collecting data from public git-based code hosting platforms ("Forges") such as GitHub and GitLab to produce data about the health and sustainability of software projects based on the relevant CHAOSS metrics. +Augur is a software tool that helps you collect and measure information about `open source `_ software projects. Augur focuses on collecting data from public git-based code hosting platforms ("Forges") such as GitHub and GitLab to produce data about the health and sustainability of software projects based on the relevant CHAOSS metrics. The main goal of Augur is to understand how healthy and sustainable a project is. Healthy projects are easier to rely on, and they are important because many software organizations or companies depend on open-source software. From 82cc37f14a65bf6d42a40a8fb772b9c27b301956 Mon Sep 17 00:00:00 2001 From: HimasreeKolathur24 Date: Mon, 2 Feb 2026 21:29:08 +0530 Subject: [PATCH 056/389] Align fork metric documentation with CHAOSS Technical Fork definition Signed-off-by: HimasreeKolathur24 --- augur/api/metrics/repo_meta.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/augur/api/metrics/repo_meta.py b/augur/api/metrics/repo_meta.py index c39922e17..18803a8a8 100644 --- a/augur/api/metrics/repo_meta.py +++ b/augur/api/metrics/repo_meta.py @@ -347,12 +347,15 @@ def cii_best_practices_badge(repo_group_id, repo_id=None): @register_metric() def forks(repo_group_id, repo_id=None): - """ - Returns a time series of the fork count + """CHAOSS Technical Fork Metric (Time Series) + + Measures the number of technical forks of a repository on the same code development platform over time. + A technical fork is a platform-native fork (e.g., a GitHub fork),excluding local clones. + Canonical definition: https://chaoss.community/?p=3431 :param repo_group_id: The repository's repo_group_id :param repo_id: The repository's repo_id, defaults to None - :return: Time series of fork count + :return: Time series of technical fork counts """ if not repo_id: forks_SQL = s.sql.text(""" @@ -389,13 +392,17 @@ def forks(repo_group_id, repo_id=None): @register_metric() def fork_count(repo_group_id, repo_id=None): - """ - Returns the latest fork count + """CHAOSS Technical Fork Metric (Latest Value) + + Returns the most recent count of technical forks for a repository. + A technical fork is a platform-native fork on the same code development platform. + Canonical definition: https://chaoss.community/?p=3431 :param repo_group_id: The repository's repo_group_id :param repo_id: The repository's repo_id, defaults to None - :return: Fork count + :return: Latest technical fork count """ + if not repo_id: fork_count_SQL = s.sql.text(""" SELECT a.repo_id, repo_name, a.fork_count AS forks From 01e4b7ab6fa99f159aac435a352e3e14e9a350f7 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Sat, 7 Feb 2026 08:15:31 -0500 Subject: [PATCH 057/389] Update augur/tasks/github/pull_requests/tasks.py Co-authored-by: Adrian Edwards <17362949+MoralCode@users.noreply.github.com> Signed-off-by: Shlok Gilda --- augur/tasks/github/pull_requests/tasks.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index 950c31fed..89dafca39 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -21,9 +21,7 @@ # Batch sizes for PR-related data collection # All use default_batch_size from config (default: 1000) -PR_BATCH_SIZE = get_batch_size() -PR_REVIEW_COMMENT_BATCH_SIZE = get_batch_size() -PR_REVIEW_BATCH_SIZE = get_batch_size() +PR_BATCH_SIZE = PR_REVIEW_COMMENT_BATCH_SIZE = PR_REVIEW_BATCH_SIZE = get_batch_size() platform_id = 1 From 52fc2764361530a33b9599db635c227de4a252e6 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Sat, 7 Feb 2026 08:46:33 -0500 Subject: [PATCH 058/389] Refactor batch size handling across event, issue, message, and PR processing to use dynamic configuration values Signed-off-by: Shlok Gilda --- augur/tasks/github/events.py | 19 +++++++++++-------- augur/tasks/github/facade_github/tasks.py | 9 ++++----- augur/tasks/github/issues.py | 7 +++---- augur/tasks/github/messages.py | 7 +++---- .../pull_requests/commits_model/core.py | 9 ++++----- .../github/pull_requests/files_model/core.py | 9 ++++----- augur/tasks/github/pull_requests/tasks.py | 15 +++++++++------ 7 files changed, 38 insertions(+), 37 deletions(-) diff --git a/augur/tasks/github/events.py b/augur/tasks/github/events.py index b7d301e88..a2a8736c8 100644 --- a/augur/tasks/github/events.py +++ b/augur/tasks/github/events.py @@ -17,9 +17,6 @@ from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts, get_issues_by_repo_id, get_pull_requests_by_repo_id, update_issue_closed_cntrbs_by_repo_id, get_session, get_engine, get_core_data_last_collected, batch_insert_contributors, get_batch_size -# Batch size for processing events - smaller than issues/PRs due to higher processing overhead per event -# Uses github_event_batch_size from config (default: 500) -EVENT_BATCH_SIZE = get_batch_size("event") platform_id = 1 @@ -119,12 +116,14 @@ def collect(self, repo_git, key_auth, since): owner, repo = get_owner_repo(repo_git) self.repo_identifier = f"{owner}/{repo}" + event_batch_size = get_batch_size("event") + events = [] for event in self._collect_events(repo_git, key_auth, since): events.append(event) # making this a decent size since process_events retrieves all the issues and prs each time - if len(events) >= EVENT_BATCH_SIZE: + if len(events) >= event_batch_size: self._process_events(events, repo_id) events.clear() @@ -281,6 +280,8 @@ def collect(self, repo_git, key_auth, since): def _collect_and_process_issue_events(self, owner, repo, repo_id, key_auth, since): + event_batch_size = get_batch_size("event") + engine = get_engine() with engine.connect() as connection: @@ -331,19 +332,21 @@ def _collect_and_process_issue_events(self, owner, repo, repo_id, key_auth, sinc except UrlNotFoundException as e: self._logger.info(f"{self.repo_identifier}: Issue with number of {issue_number} returned 404 on event data. Skipping.") - if len(events) >= EVENT_BATCH_SIZE: + if len(events) >= event_batch_size: self._insert_contributors(contributors) self._insert_issue_events(events) events.clear() - + if events: self._insert_contributors(contributors) self._insert_issue_events(events) events.clear() - + def _collect_and_process_pr_events(self, owner, repo, repo_id, key_auth, since): + event_batch_size = get_batch_size("event") + engine = get_engine() with engine.connect() as connection: @@ -393,7 +396,7 @@ def _collect_and_process_pr_events(self, owner, repo, repo_id, key_auth, since): self._logger.info(f"{self.repo_identifier}: PR with number of {pr_number} returned 404 on event data. Skipping.") continue - if len(events) >= EVENT_BATCH_SIZE: + if len(events) >= event_batch_size: self._insert_contributors(contributors) self._insert_pr_events(events) events.clear() diff --git a/augur/tasks/github/facade_github/tasks.py b/augur/tasks/github/facade_github/tasks.py index 6103b3e8e..22d07dc6c 100644 --- a/augur/tasks/github/facade_github/tasks.py +++ b/augur/tasks/github/facade_github/tasks.py @@ -11,9 +11,6 @@ from augur.tasks.git.util.facade_worker.facade_worker.facade00mainprogram import * -# Batch size for facade contributor processing -# Uses default_batch_size from config (default: 1000) -FACADE_CONTRIBUTOR_BATCH_SIZE = get_batch_size() def process_commit_metadata(logger, auth, contributorQueue, repo_id, platform_id): @@ -269,13 +266,15 @@ def insert_facade_contributors(self, repo_git): key_auth = GithubRandomKeyAuth(logger) + facade_batch_size = get_batch_size() + # Process results in batches to reduce memory usage batch = [] for row in rows: batch.append(dict(row)) - if len(batch) >= FACADE_CONTRIBUTOR_BATCH_SIZE: + if len(batch) >= facade_batch_size: process_commit_metadata(logger, key_auth, batch, repo_id, platform_id) batch.clear() @@ -330,7 +329,7 @@ def insert_facade_contributors(self, repo_git): for row in rows: batch.append(dict(row)) - if len(batch) >= FACADE_CONTRIBUTOR_BATCH_SIZE: + if len(batch) >= facade_batch_size: link_commits_to_contributor(logger, facade_helper, batch) batch.clear() diff --git a/augur/tasks/github/issues.py b/augur/tasks/github/issues.py index 09e1626d0..6b7b3dd8b 100644 --- a/augur/tasks/github/issues.py +++ b/augur/tasks/github/issues.py @@ -17,9 +17,6 @@ from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts, get_core_data_last_collected, batch_insert_contributors, get_batch_size -# Batch size for processing issues - controls memory usage during collection -# Uses default_batch_size from config (default: 1000) -ISSUE_BATCH_SIZE = get_batch_size() development = get_development_flag() @@ -57,6 +54,8 @@ def collect_issues(repo_git: str, full_collection: bool) -> int: try: issue_data_generator = retrieve_all_issue_data(repo_git, logger, key_auth, core_data_last_collected) + issue_batch_size = get_batch_size() + # Process issues in batches to avoid memory spikes batch = [] total_issues = 0 @@ -64,7 +63,7 @@ def collect_issues(repo_git: str, full_collection: bool) -> int: for issue in issue_data_generator: batch.append(issue) - if len(batch) >= ISSUE_BATCH_SIZE: + if len(batch) >= issue_batch_size: logger.info(f"{owner}/{repo}: Processing batch of {len(batch)} issues (total so far: {total_issues + len(batch)})") process_issues(batch, f"{owner}/{repo}: Issue task", repo_id, logger) total_issues += len(batch) diff --git a/augur/tasks/github/messages.py b/augur/tasks/github/messages.py index 906c64e0e..e8453a18d 100644 --- a/augur/tasks/github/messages.py +++ b/augur/tasks/github/messages.py @@ -14,9 +14,6 @@ from sqlalchemy.sql import text -# Batch size for processing messages - smaller due to large text content per message -# Uses github_message_batch_size from config (default: 20) -MESSAGE_BATCH_SIZE = get_batch_size("message") platform_id = 1 @@ -87,6 +84,8 @@ def fast_retrieve_all_pr_and_issue_messages(repo_git: str, logger, key_auth, tas def process_large_issue_and_pr_message_collection(repo_id, repo_git: str, logger, key_auth, task_name, augur_db, since) -> None: + message_batch_size = get_batch_size("message") + owner, repo = get_owner_repo(repo_git) # define logger for task @@ -129,7 +128,7 @@ def process_large_issue_and_pr_message_collection(repo_id, repo_git: str, logger logger.info(f"{task_name}: PR or issue comment url of {comment_url} returned 404. Skipping.") skipped_urls += 1 - if len(all_data) >= MESSAGE_BATCH_SIZE: + if len(all_data) >= message_batch_size: process_messages(all_data, task_name, repo_id, logger, augur_db) all_data.clear() diff --git a/augur/tasks/github/pull_requests/commits_model/core.py b/augur/tasks/github/pull_requests/commits_model/core.py index 58ddc1854..757d11daa 100644 --- a/augur/tasks/github/pull_requests/commits_model/core.py +++ b/augur/tasks/github/pull_requests/commits_model/core.py @@ -6,13 +6,12 @@ from augur.application.db.lib import get_secondary_data_last_collected, get_updated_prs, get_batch_size -# Batch size for PR commit collection -# Uses default_batch_size from config (default: 1000) -PR_COMMIT_BATCH_SIZE = get_batch_size() def pull_request_commits_model(repo_id,logger, augur_db, key_auth, full_collection=False): - + + pr_commit_batch_size = get_batch_size() + if full_collection: # query existing PRs and the respective url we will append the commits url to pr_url_sql = s.sql.text(""" @@ -77,7 +76,7 @@ def pull_request_commits_model(repo_id,logger, augur_db, key_auth, full_collecti } all_data.append(pr_commit_row) - if len(all_data) >= PR_COMMIT_BATCH_SIZE: + if len(all_data) >= pr_commit_batch_size: logger.info(f"{task_name}: Inserting {len(all_data)} rows") augur_db.insert_data(all_data,PullRequestCommit,pr_commits_natural_keys) all_data.clear() diff --git a/augur/tasks/github/pull_requests/files_model/core.py b/augur/tasks/github/pull_requests/files_model/core.py index 653880af8..059ace19b 100644 --- a/augur/tasks/github/pull_requests/files_model/core.py +++ b/augur/tasks/github/pull_requests/files_model/core.py @@ -6,13 +6,12 @@ from augur.application.db.lib import get_secondary_data_last_collected, get_updated_prs, get_batch_size -# Batch size for PR file collection -# Uses default_batch_size from config (default: 1000) -PR_FILE_BATCH_SIZE = get_batch_size() def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection=False): - + + pr_file_batch_size = get_batch_size() + if full_collection: # query existing PRs and the respective url we will append the commits url to pr_number_sql = s.sql.text(""" @@ -99,7 +98,7 @@ def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection pr_file_rows.append(data) - if len(pr_file_rows) >= PR_FILE_BATCH_SIZE: + if len(pr_file_rows) >= pr_file_batch_size: logger.info(f"{task_name}: Inserting {len(pr_file_rows)} rows") augur_db.insert_data(pr_file_rows, PullRequestFile, pr_file_natural_keys) pr_file_rows.clear() diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index 89dafca39..1681b80ff 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -19,9 +19,6 @@ from typing import List -# Batch sizes for PR-related data collection -# All use default_batch_size from config (default: 1000) -PR_BATCH_SIZE = PR_REVIEW_COMMENT_BATCH_SIZE = PR_REVIEW_BATCH_SIZE = get_batch_size() platform_id = 1 @@ -45,13 +42,15 @@ def collect_pull_requests(repo_git: str, full_collection: bool) -> int: # subtract 2 days to ensure all data is collected core_data_last_collected = (get_core_data_last_collected(repo_id) - timedelta(days=2)).replace(tzinfo=timezone.utc) + pr_batch_size = get_batch_size() + total_count = 0 all_data = [] for pr in retrieve_all_pr_data(repo_git, logger, manifest.key_auth, core_data_last_collected): all_data.append(pr) - if len(all_data) >= PR_BATCH_SIZE: + if len(all_data) >= pr_batch_size: process_pull_requests(all_data, f"{owner}/{repo}: Github Pr task", repo_id, logger, augur_db) total_count += len(all_data) all_data.clear() @@ -252,6 +251,8 @@ def collect_pull_request_review_comments(repo_git: str, full_collection: bool) - key_auth = GithubRandomKeyAuth(logger) github_data_access = GithubDataAccess(key_auth, logger) + pr_review_comment_batch_size = get_batch_size() + # Batch processing: accumulate comments until batch size reached, then flush contributors = [] pr_review_comment_dicts = [] @@ -274,7 +275,7 @@ def collect_pull_request_review_comments(repo_git: str, full_collection: bool) - pr_review_msg_mapping_data[comment["id"]] = comment # Flush batch when threshold reached (check both to prevent unbounded growth) - if len(pr_review_comment_dicts) >= PR_REVIEW_COMMENT_BATCH_SIZE or len(contributors) >= PR_REVIEW_COMMENT_BATCH_SIZE: + if len(pr_review_comment_dicts) >= pr_review_comment_batch_size or len(contributors) >= pr_review_comment_batch_size: refs_inserted = _flush_pr_review_comment_batch( logger, contributors, pr_review_comment_dicts, pr_review_msg_mapping_data, pr_review_id_mapping, repo_id, tool_version, data_source, owner, repo @@ -487,6 +488,8 @@ def collect_pull_request_reviews(repo_git: str, full_collection: bool) -> None: github_data_access = GithubDataAccess(manifest.key_auth, logger) + pr_review_batch_size = get_batch_size() + # Batch processing: accumulate reviews until batch size reached, then flush contributors = [] pr_review_dicts = [] @@ -522,7 +525,7 @@ def collect_pull_request_reviews(repo_git: str, full_collection: bool) -> None: ) # Flush batch when threshold reached - if len(pr_review_dicts) >= PR_REVIEW_BATCH_SIZE: + if len(pr_review_dicts) >= pr_review_batch_size: _flush_pr_review_batch(augur_db, contributors, pr_review_dicts, logger, owner, repo) total_reviews_collected += len(pr_review_dicts) contributors.clear() From 2f345e292b6da382e341e3feaac8050fba0ec586 Mon Sep 17 00:00:00 2001 From: iGufrankhan Date: Sat, 7 Feb 2026 19:31:06 +0000 Subject: [PATCH 059/389] Fix API view and repo info template Signed-off-by: iGufrankhan --- augur/api/view/api.py | 12 ---------- augur/templates/repo-info.j2 | 45 +----------------------------------- 2 files changed, 1 insertion(+), 56 deletions(-) diff --git a/augur/api/view/api.py b/augur/api/view/api.py index be7996b29..4f2543301 100644 --- a/augur/api/view/api.py +++ b/augur/api/view/api.py @@ -1,7 +1,6 @@ import logging import re -from augur.api.view.init import report_requests from flask import flash, current_app, jsonify, redirect, request, url_for from flask_login import current_user, login_required @@ -225,14 +224,3 @@ def user_app_create(): flash("Could not create app") return redirect(url_for("user_settings") + "?section=application") - - -""" ---------------------------------------------------------------- -Locking request loop: - This route will lock the current request until the - report request completes. A json response is guaranteed. - Assumes that the requested repo exists. -""" -@app.route('/requests/report/wait/') -def wait_for_report_request(id): - return jsonify(report_requests.get(id, {})) diff --git a/augur/templates/repo-info.j2 b/augur/templates/repo-info.j2 index 2738d70e2..5aee38a28 100644 --- a/augur/templates/repo-info.j2 +++ b/augur/templates/repo-info.j2 @@ -11,50 +11,7 @@ {% endif %} -{% if repo.repo_id %} -{# Wait for cache response: - This method queries the server from the client, asking for confirmation - of which images are available on the server. The server will asynchronously - download the requested images as the page is loading, then once the page - loads, the client will query a locking endpoint on the server and wait - for a response. -#} - -{% endif %} +