diff --git a/PROJECTS/intermediate/dlp-scanner/.dlp-scanner.yml b/PROJECTS/intermediate/dlp-scanner/.dlp-scanner.yml new file mode 100644 index 0000000..422cd5d --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/.dlp-scanner.yml @@ -0,0 +1,86 @@ +# ©AngelaMos | 2026 +# .dlp-scanner.yml + +scan: + file: + max_file_size_mb: 100 + recursive: true + exclude_patterns: + - "*.pyc" + - "__pycache__" + - ".git" + - "node_modules" + - ".venv" + include_extensions: + - ".pdf" + - ".docx" + - ".xlsx" + - ".xls" + - ".csv" + - ".json" + - ".xml" + - ".yaml" + - ".yml" + - ".txt" + - ".log" + - ".eml" + - ".msg" + - ".parquet" + - ".avro" + - ".tar.gz" + - ".tar.bz2" + - ".zip" + + database: + sample_percentage: 5 + max_rows_per_table: 10000 + timeout_seconds: 30 + exclude_tables: [] + include_tables: [] + + network: + bpf_filter: "" + entropy_threshold: 7.2 + dns_label_entropy_threshold: 4.0 + max_packets: 0 + +detection: + min_confidence: 0.20 + severity_threshold: "low" + context_window_tokens: 10 + enable_rules: + - "*" + disable_rules: [] + allowlists: + values: + - "123-45-6789" + - "000-00-0000" + - "4111111111111111" + domains: + - "example.com" + - "test.com" + file_patterns: + - "test_*" + - "*_fixture*" + - "mock_*" + +compliance: + frameworks: + - "HIPAA" + - "PCI_DSS" + - "GDPR" + - "CCPA" + - "SOX" + - "GLBA" + +output: + format: "console" + output_file: "" + redaction_style: "partial" + verbose: false + color: true + +logging: + level: "INFO" + json_output: false + log_file: "" diff --git a/PROJECTS/intermediate/dlp-scanner/.env.example b/PROJECTS/intermediate/dlp-scanner/.env.example new file mode 100644 index 0000000..01f62ab --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/.env.example @@ -0,0 +1,24 @@ +# ©AngelaMos | 2026 +# .env.example + +# PostgreSQL +PGHOST=localhost +PGPORT=5432 +PGUSER=dlp_scanner +PGPASSWORD=changeme +PGDATABASE=target_db + +# MySQL +MYSQL_HOST=localhost +MYSQL_PORT=3306 +MYSQL_USER=dlp_scanner +MYSQL_PASSWORD=changeme +MYSQL_DATABASE=target_db + +# MongoDB +MONGO_URI=mongodb://localhost:27017 +MONGO_DATABASE=target_db + +# Logging +DLP_LOG_LEVEL=INFO +DLP_LOG_JSON=false diff --git a/PROJECTS/intermediate/dlp-scanner/.gitignore b/PROJECTS/intermediate/dlp-scanner/.gitignore new file mode 100644 index 0000000..d7e13b7 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/.gitignore @@ -0,0 +1,11 @@ +docs/ +__pycache__/ +*.pyc +.env +.venv/ +*.egg-info/ +dist/ +build/ +.mypy_cache/ +.ruff_cache/ +.pytest_cache/ diff --git a/PROJECTS/intermediate/dlp-scanner/.style.yapf b/PROJECTS/intermediate/dlp-scanner/.style.yapf new file mode 100644 index 0000000..74d8341 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/.style.yapf @@ -0,0 +1,46 @@ +[style] +based_on_style = pep8 +column_limit = 75 +indent_width = 4 +continuation_indent_width = 4 +indent_closing_brackets = false +dedent_closing_brackets = true +indent_blank_lines = false +spaces_before_comment = 2 +spaces_around_power_operator = false +spaces_around_default_or_named_assign = true +space_between_ending_comma_and_closing_bracket = false +space_inside_brackets = false +spaces_around_subscript_colon = true +blank_line_before_nested_class_or_def = false +blank_line_before_class_docstring = false +blank_lines_around_top_level_definition = 2 +blank_lines_between_top_level_imports_and_variables = 2 +blank_line_before_module_docstring = false +split_before_logical_operator = true +split_before_first_argument = true +split_before_named_assigns = true +split_complex_comprehension = true +split_before_expression_after_opening_paren = false +split_before_closing_bracket = true +split_all_comma_separated_values = true +split_all_top_level_comma_separated_values = false +coalesce_brackets = false +each_dict_entry_on_separate_line = true +allow_multiline_lambdas = false +allow_multiline_dictionary_keys = false +split_penalty_import_names = 0 +join_multiple_lines = false +align_closing_bracket_with_visual_indent = true +arithmetic_precedence_indication = false +split_penalty_for_added_line_split = 275 +use_tabs = false +split_before_dot = false +split_arguments_when_comma_terminated = true +i18n_function_call = ['_', 'N_', 'gettext', 'ngettext'] +i18n_comment = ['# Translators:', '# i18n:'] +split_penalty_comprehension = 80 +split_penalty_after_opening_bracket = 280 +split_penalty_before_if_expr = 0 +split_penalty_bitwise_operator = 290 +split_penalty_logical_operator = 0 diff --git a/PROJECTS/intermediate/dlp-scanner/README.md b/PROJECTS/intermediate/dlp-scanner/README.md new file mode 100644 index 0000000..4bcedf0 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/README.md @@ -0,0 +1,112 @@ +```ruby +██████╗ ██╗ ██████╗ ███████╗ ██████╗ █████╗ ███╗ ██╗ +██╔══██╗██║ ██╔══██╗ ██╔════╝██╔════╝██╔══██╗████╗ ██║ +██║ ██║██║ ██████╔╝█████╗███████╗██║ ███████║██╔██╗ ██║ +██║ ██║██║ ██╔═══╝ ╚════╝╚════██║██║ ██╔══██║██║╚██╗██║ +██████╔╝███████╗██║ ███████║╚██████╗██║ ██║██║ ╚████║ +╚═════╝ ╚══════╝╚═╝ ╚══════╝ ╚═════╝╚═╝ ╚═╝╚═╝ ╚═══╝ +``` + +[![Cybersecurity Projects](https://img.shields.io/badge/Cybersecurity--Projects-intermediate-red?style=flat&logo=github)](https://github.com/CarterPerez-dev/Cybersecurity-Projects/tree/main/PROJECTS/intermediate/dlp-scanner) +[![Python](https://img.shields.io/badge/Python-3.12+-3776AB?style=flat&logo=python&logoColor=white)](https://python.org) +[![License: AGPLv3](https://img.shields.io/badge/License-AGPL_v3-purple.svg)](https://www.gnu.org/licenses/agpl-3.0) + +> Data Loss Prevention scanner for files, databases, and network traffic. + +*This is a quick overview. Security theory, architecture, and full walkthroughs are in the [learn modules](#learn).* + +## What It Does + +- Scans files (PDF, DOCX, XLSX, CSV, JSON, XML, YAML, Parquet, Avro, archives, emails) for PII, credentials, financial data, and PHI +- Scans databases (PostgreSQL, MySQL, MongoDB, SQLite) with schema introspection and sampling +- Scans network captures (PCAP/PCAPNG) with protocol parsing, TCP reassembly, and DNS exfiltration detection +- Confidence scoring pipeline: regex match, checksum validation (Luhn, Mod-97, Mod-11), context keyword proximity, entity co-occurrence +- Maps findings to compliance frameworks (HIPAA, PCI-DSS, GDPR, CCPA, SOX, GLBA, FERPA) +- Reports in console (Rich tables), JSON, SARIF 2.1.0, or CSV + +## Quick Start + +```bash +bash install.sh +dlp-scan file ./data +``` + +## Usage + +```bash +dlp-scan file ./data/employees/ # scan a directory +dlp-scan file ./report.pdf -f json # scan a file, JSON output +dlp-scan db postgres://user:pass@host/db # scan PostgreSQL +dlp-scan db sqlite:///path/to/local.db # scan SQLite +dlp-scan network capture.pcap # scan network traffic +dlp-scan file ./data -f sarif -o results.sarif # SARIF for CI/CD +dlp-scan report convert results.json -f csv # convert report format +dlp-scan report summary results.json # print summary stats +``` + +### Global Options + +``` +--config, -c Path to YAML config file +--verbose, -v Enable debug logging +--version Show version +``` + +### Output Formats + +| Format | Flag | Use Case | +|--------|------|----------| +| Console | `-f console` | Interactive review with Rich tables | +| JSON | `-f json` | Structured analysis and archival | +| SARIF | `-f sarif` | GitHub code scanning, CI/CD integration | +| CSV | `-f csv` | Compliance team export, spreadsheet import | + +## Stack + +**Language:** Python 3.12+ + +**CLI:** Typer 0.15+ with Rich integration + +**Detection:** Regex + checksum validators + Shannon entropy + context keyword scoring + +**File Formats:** PyMuPDF, python-docx, openpyxl, xlrd, defusedxml, lxml, pyarrow, fastavro, extract-msg + +**Databases:** asyncpg (PostgreSQL), aiomysql (MySQL), pymongo async (MongoDB), aiosqlite (SQLite) + +**Network:** dpkt (PCAP parsing), TCP reassembly, DPI protocol identification, DNS exfiltration heuristics + +**Config:** Pydantic 2.10+ models with YAML config loading (ruamel.yaml) + +**Quality:** ruff, mypy (strict), yapf, pytest + hypothesis, structlog + +## Configuration + +Copy `.dlp-scanner.yml` to your project root and customize. Key settings: + +```yaml +detection: + min_confidence: 0.20 # minimum score to report + enable_rules: ["*"] # glob patterns for rule IDs + allowlists: + values: ["123-45-6789"] # suppress known test values + +output: + format: "console" # console, json, sarif, csv + redaction_style: "partial" # partial, full, none +``` + +## Learn + +This project includes step-by-step learning materials covering security theory, architecture, and implementation. + +| Module | Topic | +|--------|-------| +| [00 - Overview](learn/00-OVERVIEW.md) | Prerequisites and quick start | +| [01 - Concepts](learn/01-CONCEPTS.md) | DLP theory and real-world breaches | +| [02 - Architecture](learn/02-ARCHITECTURE.md) | System design and data flow | +| [03 - Implementation](learn/03-IMPLEMENTATION.md) | Code walkthrough | +| [04 - Challenges](learn/04-CHALLENGES.md) | Extension ideas and exercises | + +## License + +[AGPLv3](https://www.gnu.org/licenses/agpl-3.0) diff --git a/PROJECTS/intermediate/dlp-scanner/install.sh b/PROJECTS/intermediate/dlp-scanner/install.sh new file mode 100755 index 0000000..19db9e7 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/install.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# ©AngelaMos | 2026 +# install.sh + +set -euo pipefail + +command -v uv >/dev/null 2>&1 || { + echo "Installing uv..." + curl -LsSf https://astral.sh/uv/install.sh | sh + export PATH="$HOME/.local/bin:$PATH" +} + +echo "Syncing dependencies..." +uv sync + +echo "Downloading spaCy model (optional, for NLP-based detection)..." +uv run python -m spacy download en_core_web_sm 2>/dev/null || true + +echo "" +echo "Setup complete. Run the scanner with:" +echo " uv run dlp-scan --help" +echo "" +echo "Quick start:" +echo " uv run dlp-scan scan file ./path/to/scan" +echo " uv run dlp-scan scan db sqlite:///path/to/db.sqlite3" +echo " uv run dlp-scan scan network ./capture.pcap" diff --git a/PROJECTS/intermediate/dlp-scanner/learn/00-OVERVIEW.md b/PROJECTS/intermediate/dlp-scanner/learn/00-OVERVIEW.md new file mode 100644 index 0000000..bd006f8 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/learn/00-OVERVIEW.md @@ -0,0 +1,76 @@ +# 00-OVERVIEW.md + +# DLP Scanner + +## What This Is + +A command-line Data Loss Prevention scanner that detects sensitive data across three surfaces: files (PDF, DOCX, XLSX, CSV, JSON, XML, YAML, Parquet, Avro, archives, emails), databases (PostgreSQL, MySQL, MongoDB, SQLite), and network captures (PCAP/PCAPNG with protocol parsing and TCP reassembly). It uses a confidence scoring pipeline combining regex matching, checksum validation (Luhn for credit cards, Mod-97 for IBANs, Mod-11 for NHS numbers), keyword proximity analysis, and Shannon entropy detection. Findings are classified by severity and mapped to compliance frameworks (HIPAA, PCI-DSS, GDPR, CCPA, SOX, GLBA, FERPA). Output supports console Rich tables, JSON, SARIF 2.1.0 for CI/CD, and CSV for compliance teams. + +## Why This Matters + +Data breaches involving PII exposure keep appearing because organizations cannot find sensitive data they do not know exists. The 2017 Equifax breach exposed 147 million SSNs from an unpatched Apache Struts application, but the underlying problem was that SSNs were stored in plaintext across multiple database tables without anyone tracking where that data lived. In 2019, Capital One lost 100 million credit applications from an S3 bucket because a misconfigured WAF allowed server-side request forgery, and nobody had scanned those files to realize unencrypted SSNs and credit card numbers sat in flat CSV exports. The Marriott breach (2018) exposed 500 million records including 5.25 million unencrypted passport numbers, partially because the Starwood reservation system merged without a data inventory that would have flagged those fields as sensitive. + +These are not failure-of-firewall problems. They are failure-of-visibility problems. DLP tools exist to answer "where is our sensitive data?" before attackers answer it for you. Commercial solutions (Symantec DLP, Microsoft Purview, Netskope) cost six figures and require enterprise deployment, but the core detection logic is straightforward: pattern matching with validation, context analysis to reduce false positives, and compliance framework mapping to prioritize remediation. + +This project builds a DLP engine from scratch, teaching you the same detection techniques that power production systems. + +**Real world scenarios where this applies:** +- Security engineers scanning file shares before a cloud migration to find PII that needs encryption +- Compliance teams auditing database tables for HIPAA-regulated PHI that should not be in plaintext +- SOC analysts inspecting PCAP captures for credentials or PII transmitted in the clear +- DevOps teams running DLP checks in CI/CD pipelines to catch secrets before they reach production +- Incident responders determining what sensitive data was accessible from a compromised network segment + +## What You'll Learn + +**Security Concepts:** +- Data classification tiers and how PII, PHI, PCI, and credential data map to regulatory requirements +- Confidence scoring: why regex alone produces false positives and how checksum validation, context keywords, and entity co-occurrence reduce them +- Compliance framework mapping: HIPAA's 18 identifiers, PCI-DSS cardholder data, GDPR personal data categories, CCPA consumer information +- Network DLP: detecting sensitive data in transit, DNS exfiltration via high-entropy subdomain labels, base64-encoded payloads in HTTP bodies +- Redaction strategies: why you never store the raw matched content in findings + +**Technical Skills:** +- Building a multi-format text extraction pipeline that handles 14+ file formats through a unified Protocol interface +- Database schema introspection across 4 database engines with statistical sampling (TABLESAMPLE BERNOULLI, $sample aggregation) +- TCP stream reassembly from raw packets using sequence-number ordering and bidirectional flow key normalization +- Confidence scoring pipeline: base scores, checksum boosts, context keyword proximity windows, entity co-occurrence +- SARIF 2.1.0 output for GitHub code scanning integration + +**Tools and Techniques:** +- Typer CLI with Annotated-style parameters and global option propagation through Click context +- Pydantic 2.x for configuration validation with YAML loading +- structlog with stdlib integration for structured JSON logging +- orjson for high-performance JSON serialization +- asyncpg, aiomysql, pymongo async, aiosqlite for async database access +- dpkt for fast PCAP parsing (100x faster than Scapy) +- pytest with hypothesis for property-based testing of detection rules + +## Prerequisites + +**Required knowledge:** +- Python fundamentals: dataclasses, type hints, list comprehensions, context managers +- Basic networking: TCP/IP, ports, packets, what PCAP files contain +- Basic SQL: SELECT, WHERE, table schemas, column types +- Security basics: what PII is, why SSNs and credit card numbers need protection, what compliance frameworks exist + +**Tools you'll need:** +- Python 3.12+ (uses modern generic syntax and `from __future__ import annotations`) +- uv package manager (install: `curl -LsSf https://astral.sh/uv/install.sh | sh`) +- A terminal with UTF-8 support (for Rich console output) + +**Helpful but not required:** +- Experience with regex and pattern matching +- Familiarity with dpkt or Scapy for packet analysis +- Knowledge of database URIs and connection strings +- Understanding of SARIF format for CI/CD security tooling + +## Quick Start + +```bash +bash install.sh +dlp-scan file ./data +dlp-scan file ./data -f json -o results.json +dlp-scan db sqlite:///path/to/database.db +dlp-scan report summary results.json +``` diff --git a/PROJECTS/intermediate/dlp-scanner/learn/01-CONCEPTS.md b/PROJECTS/intermediate/dlp-scanner/learn/01-CONCEPTS.md new file mode 100644 index 0000000..e1ec8e5 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/learn/01-CONCEPTS.md @@ -0,0 +1,133 @@ +# 01-CONCEPTS.md + +# DLP Concepts + +## What is Data Loss Prevention? + +DLP is the practice of detecting and preventing sensitive data from being stored, transmitted, or accessed in unauthorized ways. The three modes of DLP correspond to the three scan surfaces in this project: + +- **Data at rest**: files on disk, records in databases, documents in cloud storage. Our file scanner and database scanner cover this surface. +- **Data in motion**: network traffic, API calls, email transmissions. Our network scanner covers this surface. +- **Data in use**: clipboard contents, screen captures, application memory. Not covered here (requires endpoint agents). + +The fundamental question DLP answers: "Where is our sensitive data, and is it protected?" + +## Detection Techniques + +### Pattern Matching with Validation + +The simplest approach: regex patterns that match structural formats like SSNs (XXX-XX-XXXX), credit card numbers (16 digits with known prefixes), and API keys (known prefix patterns like `AKIA` for AWS). + +The problem with regex alone is false positive rates. The string `123-45-6789` matches an SSN pattern but appears in test data, serial numbers, and phone extensions. The string `4532015112830366` matches a Visa card pattern but could be a random 16-digit identifier. + +This is why production DLP systems never rely on regex alone. They add validation layers: + +**Checksum validation** eliminates structurally invalid matches. Credit card numbers use the Luhn algorithm: double every second digit from right, subtract 9 if the result exceeds 9, and verify the total is divisible by 10. A random 16-digit number has a ~10% chance of passing Luhn, which is still useful signal. IBANs use Mod-97 (ISO 7064): rearrange the country code and check digits, convert letters to numbers, and verify the result mod 97 equals 1. NHS numbers use Mod-11 with weighted digit multiplication. + +**SSN area validation** checks that the first three digits are not 000, 666, or 900-999 (never assigned by the SSA). Group and serial numbers must also be non-zero. This eliminates ranges that the Social Security Administration has never used. + +### Context Keyword Scoring + +A 9-digit number matching SSN format near the word "social security" is more likely to be an actual SSN than the same number in a column labeled "serial_number". Context scoring scans a bidirectional window around each match for relevant keywords: + +``` +For SSN patterns: "ssn", "social security", "social_security_number", "tax id" +For credit cards: "credit card", "card number", "payment", "billing" +For API keys: "api_key", "secret", "token", "authorization" +``` + +Keywords found within the window (default: 10 tokens in each direction) add a boost of +0.05 to +0.35 depending on proximity. Closer keywords contribute more confidence. + +### Shannon Entropy + +Random-looking strings often indicate secrets: API keys, encrypted values, base64-encoded credentials. Shannon entropy measures the randomness of a string: + +``` +H = -sum(p(x) * log2(p(x))) for each unique character x +``` + +English text has entropy around 3.5-4.5 bits per character. Base64-encoded data is around 5.5-6.0. Hex-encoded data is around 3.5-4.0. Truly random data approaches log2(alphabet_size). A 40-character string with entropy above 4.5 is flagged as a potential secret. + +### Confidence Scoring Pipeline + +Each detection produces a confidence score between 0.0 and 1.0: + +``` +1. Regex match -> base_score (0.10 to 0.85, configured per rule) +2. Checksum validation -> +0.30 if the checksum passes +3. Context keyword search -> +0.05 to +0.35 based on keyword proximity +4. Entity co-occurrence -> +0.10 to +0.20 if multiple PII types appear nearby +5. Final score capped at 1.0 +``` + +The score maps to severity: +- 0.85+ = critical +- 0.65+ = high +- 0.40+ = medium +- 0.20+ = low +- below 0.20 = discarded + +An SSN match (base 0.45) with valid area/group/serial and the word "ssn" nearby scores 0.45 + 0.30 (area validation acts as implicit checksum) + 0.15 (context) = 0.90, classified as critical. The same pattern without context scores 0.45, classified as medium, which is appropriate because it might be a phone number fragment. + +## Compliance Frameworks + +Regulatory frameworks define what data types require protection and what happens when they are exposed: + +**HIPAA (Health Insurance Portability and Accountability Act)**: Defines 18 types of Protected Health Information (PHI) including SSNs, medical record numbers, health plan beneficiary numbers, and biometric identifiers. A covered entity that fails to protect PHI faces fines from $100 to $50,000 per violation (up to $1.5 million per year per category). The 2015 Anthem breach exposed 78.8 million records and resulted in a $16 million settlement with HHS. + +**PCI-DSS (Payment Card Industry Data Security Standard)**: Requires protection of cardholder data: primary account numbers (PAN), cardholder names, expiration dates, and service codes. PAN must be rendered unreadable (encrypted, hashed, truncated, or tokenized). The Heartland Payment Systems breach (2008) compromised 130 million credit card numbers and cost the company $140 million in compensation. + +**GDPR (General Data Protection Regulation)**: Applies to personal data of EU residents including names, email addresses, phone numbers, IP addresses, and location data. Fines reach 4% of annual global revenue or 20 million euros, whichever is higher. Meta was fined 1.2 billion euros in 2023 for transferring EU user data to the US without adequate safeguards. + +**CCPA (California Consumer Privacy Act)**: Covers personal information of California residents. Similar categories to GDPR but with different enforcement mechanisms. Consumers can sue directly for data breaches involving unencrypted personal information ($100-$750 per consumer per incident). + +## Network DLP Concepts + +### DNS Exfiltration + +Attackers encode stolen data in DNS queries to bypass firewalls that do not inspect DNS traffic. The data is encoded in subdomain labels: + +``` +aGVsbG8gd29ybGQ.evil.com (base64 "hello world" in subdomain) +``` + +Detection signals: +- **Label entropy**: legitimate subdomains (www, mail, api) have low entropy. Base64-encoded data has entropy above 4.0 +- **QNAME length**: normal queries are under 50 characters. Exfiltration queries exceed 100+ +- **TXT query volume**: TXT records are used to receive exfiltrated data. A spike in TXT queries to a single domain is suspicious +- **Subdomain label length**: DNS labels above 50 characters are almost never legitimate + +The OilRig APT group (attributed to Iran) used DNS tunneling extensively in campaigns against Middle Eastern governments, encoding stolen documents in subdomain queries to command-and-control infrastructure. DNSCat2 and Iodine are open-source tools that implement this technique. + +### Protocol Identification + +Deep Packet Inspection (DPI) identifies application protocols from payload byte prefixes without relying on port numbers: + +- HTTP requests start with methods: `GET `, `POST `, `PUT `, `DELETE ` +- HTTP responses start with `HTTP/` +- TLS records start with `\x16\x03` (handshake + TLS version) +- SSH connections start with `SSH-` +- SMTP starts with `220 ` (server greeting) + +This matters because sensitive data in HTTP traffic (API keys in headers, SSNs in POST bodies) requires different handling than the same data in an encrypted TLS stream (where you can only flag that sensitive data was transmitted, not read the content). + +### TCP Stream Reassembly + +Application-layer data spans multiple TCP packets. Reassembly reconstructs the original byte stream: + +1. Track flows by 4-tuple: (src_ip, dst_ip, src_port, dst_port) +2. Use bidirectional flow keys so both directions of a conversation map to the same flow +3. Store segments indexed by TCP sequence number +4. Sort by sequence number and concatenate payloads, deduplicating retransmissions + +Without reassembly, a credit card number split across two packets would be missed by pattern matching on individual payloads. + +## Redaction + +DLP reports must never contain the raw sensitive data they detect. Redaction strategies: + +- **Partial**: preserve structure but mask content: `***-**-6789`, `4532****0366` +- **Full**: replace entirely: `[REDACTED]` +- **None**: no redaction (for debugging only, never in production reports) + +Partial redaction is preferred for triage because analysts can identify the data type and approximate value without exposing the full sensitive content. The last 4 digits of an SSN or credit card are commonly used as verification tokens and are considered non-sensitive by PCI-DSS. diff --git a/PROJECTS/intermediate/dlp-scanner/learn/02-ARCHITECTURE.md b/PROJECTS/intermediate/dlp-scanner/learn/02-ARCHITECTURE.md new file mode 100644 index 0000000..d8ef752 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/learn/02-ARCHITECTURE.md @@ -0,0 +1,543 @@ +# 02-ARCHITECTURE.md + +# System Architecture + +## High-Level Pipeline + +The scanner follows a linear pipeline: CLI parses arguments, the engine orchestrates, scanners extract and detect, and reporters format output. + +``` +┌──────────────────────────────────────────────────────────┐ +│ CLI Layer (Typer) │ +│ │ +│ dlp-scan file ./data -f json -o results.json │ +│ dlp-scan db postgres://user:pass@host/db │ +│ dlp-scan network capture.pcap │ +│ dlp-scan report summary results.json │ +└──────────────────────┬───────────────────────────────────┘ + │ + ▼ +┌──────────────────────────────────────────────────────────┐ +│ ScanEngine │ +│ │ +│ Loads config ─► Builds DetectorRegistry ─► Selects │ +│ scanner type ─► Runs scan ─► Routes to reporter │ +└──────────────────────┬───────────────────────────────────┘ + │ + ┌────────────┼────────────┐ + ▼ ▼ ▼ +┌──────────────┐ ┌──────────┐ ┌──────────────┐ +│ FileScanner │ │DBScanner │ │NetworkScanner│ +│ │ │ │ │ │ +│ Walk dirs │ │ Schema │ │ PCAP parse │ +│ Extract text │ │ introspect│ │ TCP reassembly│ +│ Run detectors│ │ Sample │ │ DNS exfil │ +│ │ │ rows │ │ DPI protocol │ +│ │ │ Detect │ │ Detect │ +└──────┬───────┘ └────┬─────┘ └──────┬───────┘ + │ │ │ + └──────────────┼──────────────┘ + ▼ +┌──────────────────────────────────────────────────────────┐ +│ DetectorRegistry │ +│ │ +│ PatternDetector ─► ContextBoost ─► CooccurrenceBoost │ +│ │ │ +│ └─► EntropyDetector (parallel) │ +│ │ +│ Rules: PII | Financial | Credentials | Health │ +└──────────────────────┬───────────────────────────────────┘ + │ + ▼ +┌──────────────────────────────────────────────────────────┐ +│ Reporter Layer │ +│ │ +│ ConsoleReporter ─► Rich tables with severity colors │ +│ JsonReporter ─► Structured JSON with metadata │ +│ SarifReporter ─► SARIF 2.1.0 for CI/CD pipelines │ +│ CsvReporter ─► Flat CSV for compliance teams │ +└──────────────────────────────────────────────────────────┘ +``` + +## Component Breakdown + +### CLI Layer + +**Purpose:** Parse command-line arguments, propagate global options, route to the correct scan command or report utility. + +**Files:** `cli.py`, `commands/scan.py`, `commands/report.py` + +The root Typer app in `cli.py` defines a callback that captures `--config`, `--verbose`, and `--version` into Click's context object. The scan commands (`file`, `db`, `network`) are defined in `commands/scan.py` and registered as top-level commands through a `register(app)` function that calls `app.command("file")(scan_file)` for each. This avoids nesting under a `scan` subgroup while keeping the command definitions in their own module. + +The `report` subgroup is a separate Typer instance added via `app.add_typer(report_app, name="report")`. It provides `convert` (JSON to other formats) and `summary` (print Rich table from JSON results). + +### ScanEngine + +**Purpose:** Single orchestration point that connects config to scanners to reporters. + +**File:** `engine.py` + +The engine takes a `ScanConfig` and constructs a `DetectorRegistry` by unpacking detection configuration into individual parameters: + +```python +class ScanEngine: + def __init__(self, config: ScanConfig) -> None: + self._config = config + detection = config.detection + allowlist_vals = detection.allowlists.values + self._registry = DetectorRegistry( + enable_patterns=detection.enable_rules, + disable_patterns=detection.disable_rules, + allowlist_values=( + frozenset(allowlist_vals) + if allowlist_vals else None + ), + context_window_tokens=( + detection.context_window_tokens + ), + ) +``` + +The engine exposes `scan_files`, `scan_database`, and `scan_network`, each of which constructs the appropriate scanner, runs it, and returns a `ScanResult`. Report generation uses a `REPORTER_MAP` dict that maps format strings to reporter classes. + +### DetectorRegistry + +**Purpose:** Central hub that loads detection rules, filters them by enable/disable globs, and runs the full scoring pipeline against text. + +**File:** `detectors/registry.py` + +The registry loads all rules from four rule modules (PII, Financial, Credentials, Health), filters them using `fnmatch.fnmatch` against enable/disable patterns, and wraps the survivors in a `PatternDetector`. When `detect()` is called: + +1. `PatternDetector` runs all regex patterns, validates matches with checksums (Luhn, Mod-97, Mod-11), and filters against the allowlist +2. `apply_context_boost` scans a token window around each match for relevant keywords and adjusts scores based on proximity +3. `_apply_cooccurrence_boost` adds a bonus when multiple different PII types appear within 500 characters of each other +4. `EntropyDetector` independently finds high-entropy regions using a sliding window + +``` +Text Input + │ + ▼ +┌─────────────────────────┐ +│ PatternDetector │ +│ │ +│ For each rule: │ +│ regex.finditer(text) │ +│ ─► allowlist filter │ +│ ─► validator (Luhn, │ +│ Mod-97, SSN area) │ +│ ─► base_score + boost │ +└─────────┬───────────────┘ + │ + ▼ +┌─────────────────────────┐ +│ Context Boost │ +│ │ +│ Token window ±10 tokens │ +│ Keyword proximity search │ +│ Distance-weighted boost │ +│ (0.05 to 0.35) │ +└─────────┬───────────────┘ + │ + ▼ +┌─────────────────────────┐ +│ Co-occurrence Boost │ +│ │ +│ Different rule_ids │ +│ within 500 chars ─► +0.15│ +└─────────┬───────────────┘ + │ + ▼ +┌─────────────────────────┐ +│ Entropy Detector │ +│ │ +│ Sliding 256-byte window │ +│ Shannon H >= 7.2 bits │ +│ Independent matches │ +└─────────┬───────────────┘ + │ + ▼ + DetectorMatch[] +``` + +### Scanners + +**Purpose:** Each scanner handles a different scan surface (files, databases, network) and converts raw data into text that the DetectorRegistry can process. + +**Files:** `scanners/file_scanner.py`, `scanners/db_scanner.py`, `scanners/network_scanner.py` + +All scanners follow the same `Scanner` protocol: a `scan(target: str) -> ScanResult` method. They share a common flow: iterate over targets, extract text, run detection, convert matches to findings via `match_to_finding` in `scoring.py` (which handles severity classification, compliance lookup, remediation, and redaction in one call), and aggregate into a `ScanResult`. + +**FileScanner** walks a directory tree, applies extension and exclusion filters, dispatches each file to the appropriate extractor based on extension, and runs the detector on each `TextChunk`. The extension-to-extractor mapping is built once by `_build_extension_map`, which iterates over all extractor instances and indexes by their `supported_extensions`. + +**DatabaseScanner** connects via URI scheme detection (postgres, mysql, mongodb, sqlite), introspects the schema to find text-type columns, samples rows using database-native sampling (TABLESAMPLE BERNOULLI for PostgreSQL, RAND() for MySQL, $sample for MongoDB), and scans column values. + +**NetworkScanner** reads PCAP files via `read_pcap`, feeds packets into a `FlowTracker` for TCP reassembly, and processes DNS traffic inline through `parse_dns` and `DnsExfilDetector`. Each packet payload is also checked by `detect_base64_payload` for encoded data. After packet iteration, the scanner reassembles TCP flows, identifies the application protocol via `identify_protocol`, extracts text with protocol awareness (`parse_http` for HTTP bodies and sensitive headers, skip encrypted TLS/SSH, UTF-8 decode for everything else), and runs detection on the extracted text. + +### Extractors + +**Purpose:** Convert binary and structured file formats into uniform `TextChunk` objects that carry both the extracted text and a `Location` describing where it came from. + +**Files:** `extractors/plaintext.py`, `extractors/pdf.py`, `extractors/office.py`, `extractors/structured.py`, `extractors/archive.py`, `extractors/email.py` + +All extractors implement the `Extractor` protocol: `extract(path) -> list[TextChunk]` and `supported_extensions -> frozenset[str]`. + +``` +┌───────────────────────────────────────────────┐ +│ Extractor Protocol │ +│ extract(path) -> list[TextChunk] │ +│ supported_extensions -> frozenset[str] │ +└───────────────────────────────────────────────┘ + │ + ┌────┴────┬──────────┬──────────┬──────┐ + ▼ ▼ ▼ ▼ ▼ +Plaintext PDF Office Structured Archive +.txt .log .pdf .docx .csv .json .zip +.cfg .py .xlsx .xml .yaml .tar.gz +.html .md .xls .parquet .tar.bz2 +.ts .go .avro +... .tsv +``` + +The `PlaintextExtractor` chunks files into 500-line blocks to keep memory bounded. Binary format extractors (PDF via PyMuPDF, DOCX via python-docx, XLSX via openpyxl) each return one `TextChunk` per page/sheet/section. The archive extractor recurses into compressed files up to a configurable depth with zip bomb protection (compression ratio threshold check). + +### Reporters + +**Purpose:** Take a `ScanResult` and serialize it into the requested output format. + +**Files:** `reporters/console.py`, `reporters/json_report.py`, `reporters/sarif.py`, `reporters/csv_report.py` + +Each reporter has a `generate(result) -> str` method. The `ConsoleReporter` also has a `display(result)` method for Rich-formatted terminal output with severity-colored tables. + +The JSON reporter outputs a structured document with `scan_metadata`, `findings`, and `summary` sections. The SARIF reporter produces a SARIF 2.1.0 document with `tool.driver.rules`, mapping severity levels through `SARIF_SEVERITY_MAP` (critical/high to "error", medium to "warning", low to "note"). The CSV reporter flattens findings into rows. + +## Data Models + +### Core Models + +```python +@dataclass(frozen=True, slots=True) +class Location: + source_type: str + uri: str + line: int | None = None + column: int | None = None + byte_offset: int | None = None + table_name: str | None = None + column_name: str | None = None + sheet_name: str | None = None + + +@dataclass(slots=True) +class Finding: + finding_id: str + rule_id: str + rule_name: str + severity: Severity + confidence: float + location: Location + redacted_snippet: str + compliance_frameworks: list[str] + remediation: str + detected_at: datetime + + +@dataclass(slots=True) +class ScanResult: + scan_id: str + tool_version: str + scan_started_at: datetime + scan_completed_at: datetime | None + targets_scanned: int + findings: list[Finding] + errors: list[str] +``` + +`Location` is frozen because it represents a fact about where something was found. `Finding` is mutable because fields like `finding_id` and `detected_at` get defaults from factory functions. `ScanResult` aggregates findings and provides computed properties (`findings_by_severity`, `findings_by_rule`, `findings_by_framework`) that group counts for summary reporting. + +The `TextChunk` dataclass carries extracted text paired with its `Location`, forming the bridge between extractors and detectors. Every text fragment knows exactly where it came from, which lets findings carry precise location information through the pipeline. + +### Detection Models + +```python +@dataclass(frozen=True, slots=True) +class DetectionRule: + rule_id: str + rule_name: str + pattern: re.Pattern[str] + base_score: float + context_keywords: list[str] + validator: Callable[[str], bool] | None + compliance_frameworks: list[str] + + +@dataclass(frozen=True, slots=True) +class DetectorMatch: + rule_id: str + rule_name: str + start: int + end: int + matched_text: str + score: float + context_keywords: list[str] + compliance_frameworks: list[str] +``` + +`DetectionRule` is a specification: the regex pattern to match, the base confidence score, optional checksum validator, and context keywords. `DetectorMatch` is a result: what was found, where in the text, and the current score after validation. The `score` field gets modified through the boost pipeline (context, co-occurrence) before being mapped to a `Severity` level and placed into a `Finding`. + +## Configuration Architecture + +``` +┌────────────────────────────────────────────┐ +│ .dlp-scanner.yml │ +│ │ +│ scan: │ +│ file: { max_file_size_mb, recursive } │ +│ database: { sample_percentage } │ +│ network: { bpf_filter, max_packets } │ +│ detection: │ +│ min_confidence, enable_rules, │ +│ disable_rules, allowlists │ +│ compliance: { frameworks } │ +│ output: { format, redaction_style } │ +│ logging: { level, json_output } │ +└────────────────┬───────────────────────────┘ + │ + ▼ +┌────────────────────────────────────────────┐ +│ load_config(path) -> ScanConfig │ +│ │ +│ 1. Check CLI --config flag │ +│ 2. Search candidates: │ +│ .dlp-scanner.yml │ +│ .dlp-scanner.yaml │ +│ ~/.dlp-scanner.yml │ +│ 3. Parse YAML via ruamel.yaml │ +│ 4. Validate with Pydantic 2.x models │ +│ 5. Return ScanConfig with defaults │ +└────────────────────────────────────────────┘ +``` + +Every configuration value has a constant default defined in `constants.py`. The Pydantic models in `config.py` use these constants as field defaults, so a completely empty config file produces a working scanner. Constrained-choice fields (`severity_threshold`, `format`, `redaction_style`) use `Literal` types defined in `constants.py` (e.g., `Literal["critical", "high", "medium", "low"]`), so Pydantic rejects invalid values at parse time rather than silently accepting a typo. The config loader uses `ruamel.yaml` (not PyYAML) because it preserves comments and handles YAML 1.2. + +The YAML structure uses a `scan:` top-level key to group scanner-specific config, while `detection:`, `compliance:`, `output:`, and `logging:` sit at root level. This mirrors how users think about configuration: "how to scan" vs. "what to detect" vs. "how to report". + +## Data Flow: File Scan + +Step-by-step walkthrough of `dlp-scan file ./data -f json`: + +``` +1. Typer parses args + └─► main() callback stores config_path="" and verbose=False in ctx.obj + +2. scan_file() receives ctx, target="./data", format="json" + └─► _run_scan() validates format, loads config, sets logging to WARNING + (WARNING for machine-readable formats keeps stdout clean) + +3. ScanEngine(config) constructs DetectorRegistry + └─► Registry loads 29 rules from PII/Financial/Credential/Health modules + └─► Filters through enable_rules=["*"], disable_rules=[] + +4. engine.scan_files("./data") + └─► FileScanner.scan() creates ScanResult, walks directory + +5. For each file in ./data/**/*: + └─► Check extension against include_extensions + └─► Check path against exclude_patterns + └─► Check file size against max_file_size_mb + └─► Select extractor by extension (e.g. .csv -> CsvExtractor) + └─► extractor.extract(path) -> list[TextChunk] + +6. For each TextChunk: + └─► registry.detect(chunk.text) -> list[DetectorMatch] + ├─► PatternDetector: regex match + allowlist + validator + ├─► apply_context_boost: keyword proximity scoring + ├─► _apply_cooccurrence_boost: multi-PII bonus + └─► EntropyDetector: high-entropy region detection + +7. For each DetectorMatch above min_confidence: + └─► match_to_finding(match, text, location, redaction_style) + ├─► score_to_severity(match.score) -> Severity + ├─► get_frameworks_for_rule(match.rule_id) -> compliance list + ├─► get_remediation_for_rule(match.rule_id) -> guidance string + └─► redact(chunk.text, start, end, style) -> snippet + └─► Append Finding to ScanResult + +8. Back in _run_scan(): + └─► engine.generate_report(result, "json") + └─► JsonReporter().generate(result) -> JSON string + └─► typer.echo(output) -> stdout +``` + +## Design Patterns + +### Protocol-Based Polymorphism + +The codebase uses Python's `typing.Protocol` instead of abstract base classes for extension points. The `Extractor`, `Scanner`, and `Detector` protocols define structural interfaces without requiring inheritance. + +```python +class Extractor(Protocol): + def extract(self, path: str) -> list[TextChunk]: ... + + @property + def supported_extensions(self) -> frozenset[str]: ... +``` + +Any class with matching method signatures satisfies the protocol. This means you can add a new extractor (say, for .pptx files) without importing the base module. The type checker verifies compliance; the runtime never checks inheritance. + +**Why not ABCs:** Abstract base classes force an import dependency and mandate `super().__init__()` chains. Protocols are lighter and match Python's duck typing philosophy. Since extractors are stateless (no shared state or lifecycle), there is nothing an ABC would provide beyond the type contract. + +### Registry Pattern + +The `DetectorRegistry` centralizes rule management: loading, filtering, and execution. Individual rule modules (pii.py, financial.py, credentials.py, health.py) each export a list of `DetectionRule` objects. The registry merges them into `ALL_RULES`, applies glob filtering, and wraps the result in a `PatternDetector`. + +This keeps rule definitions declarative. Adding a new rule is a matter of appending a `DetectionRule` to the appropriate list. The registry handles filtering and execution without rule authors needing to understand the scoring pipeline. + +### Command Registration Pattern + +CLI commands are defined in `commands/scan.py` as plain functions and registered on the root app through a `register(app)` function: + +```python +def register(app: typer.Typer) -> None: + app.command("file")(scan_file) + app.command("db")(scan_db) + app.command("network")(scan_network) +``` + +This achieves top-level commands (`dlp-scan file`, not `dlp-scan scan file`) while keeping the command logic out of `cli.py`. The `_run_scan` helper deduplicates the shared logic (config loading, format validation, output routing) across all three scan types. + +## Compliance Mapping + +The compliance module maps rule IDs to regulatory frameworks and remediation guidance using two static dictionaries: + +``` +RULE_FRAMEWORK_MAP: rule_id -> [frameworks] +RULE_REMEDIATION_MAP: rule_id -> guidance string +``` + +Rule IDs match actual detection rules (e.g., `FIN_CREDIT_CARD_VISA`, `FIN_CREDIT_CARD_MC`, not a generic `FIN_CREDIT_CARD`). Network exfiltration indicators (`NET_DNS_EXFIL_*`, `NET_ENCODED_*`) are also mapped. Every rule has a remediation entry with specific guidance text; unknown rules fall back to a generic default. + +When a `DetectorMatch` is converted to a `Finding` via `match_to_finding` in `scoring.py`, the function calls `get_frameworks_for_rule` and `get_remediation_for_rule` to decorate the finding with compliance metadata. If the detection rule itself also carries `compliance_frameworks`, both sets are merged. + +This design keeps detection rules independent of compliance logic. The PII module does not need to know that HIPAA cares about SSNs. The compliance module owns that mapping, and it can be updated independently when regulations change. + +## Redaction Pipeline + +``` +matched text + │ + ▼ + style == "none"? ─yes─► raw snippet with context + │ no + ▼ + style == "full"? ─yes─► [REDACTED] with context + │ no + ▼ + _partial_redact() + │ + ├─ 9+ digit number ─► *****6789 (mask all but last 4) + ├─ email address ─► j****@example.com + └─ generic string ─► keep last 25% + │ + ▼ + _build_snippet() + │ + └─ ±20 chars context ─► "...SSN: *****6789 for..." +``` + +Partial redaction is the default because it gives analysts enough to identify the data type and triage priority without exposing the full sensitive value. The last 4 digits of SSNs and credit cards are considered non-sensitive by PCI-DSS (you can print them on receipts), so partial redaction for those types is compliant. + +## Network Analysis Architecture + +``` +┌────────────────────────────────────────────┐ +│ PCAP File │ +│ (.pcap or .pcapng) │ +└────────────────┬───────────────────────────┘ + │ + ▼ +┌────────────────────────────────────────────┐ +│ pcap.read_pcap() │ +│ │ +│ dpkt.pcap.Reader / dpkt.pcapng.Reader │ +│ Parse Ethernet -> IP -> TCP/UDP │ +│ Yield PacketInfo(src_ip, dst_ip, │ +│ src_port, dst_port, payload, │ +│ tcp_seq, tcp_flags) │ +└────────────────┬───────────────────────────┘ + │ + ┌───────┴───────┐ + ▼ ▼ +┌─────────────┐ ┌───────────────┐ +│FlowTracker │ │DnsExfilDetector│ +│ │ │ │ +│Track by │ │Label length │ +│4-tuple key │ │check (>50) │ +│ │ │ │ +│Reassemble │ │Subdomain │ +│TCP streams │ │entropy (>4.0) │ +│by seq num │ │ │ +│ │ │QNAME length │ +│Dedup retx │ │check (>100) │ +└──────┬──────┘ │ │ + │ │TXT volume │ + ▼ │ratio check │ +┌─────────────┐ └───────┬───────┘ +│Protocol ID │ │ +│(DPI) │ ▼ +│ │ ExfilIndicator[] +│HTTP: method │ +│ prefix │ +│TLS: \x16\x03│ +│SSH: SSH- │ +│SMTP: 220 │ +└──────┬──────┘ + │ + ▼ + Reassembled text + sent to DetectorRegistry +``` + +The flow tracker creates bidirectional flow keys by sorting the forward and reverse 4-tuples, so `(A, B, 80, 12345)` and `(B, A, 12345, 80)` map to the same flow. TCP reassembly sorts segments by sequence number and deduplicates retransmissions. Without reassembly, a credit card number split across two TCP segments would be missed. + +The DNS exfiltration detector runs independently of the regex-based detectors. It analyzes DNS queries for encoding signals: base64-like entropy in subdomain labels, abnormally long labels, long QNAMEs, and suspicious TXT query volume ratios. The OilRig APT campaign used exactly these patterns to exfiltrate stolen documents through DNS tunneling to C2 infrastructure. + +## Error Handling Strategy + +Errors are collected, not thrown. Each scanner appends error messages to `ScanResult.errors` and continues scanning the remaining targets. The CLI checks `result.errors` after the scan completes and exits with code 1 if any errors occurred, but the partial results are still reported. + +This "collect and continue" approach means a single corrupt PDF in a directory of 10,000 files does not abort the scan. The Equifax breach investigation found that scanning tools that failed on individual files often left entire directories unscanned, which is why modern DLP tools treat extraction failures as warnings rather than fatal errors. + +## Performance Considerations + +**File scanning** is I/O-bound. The scanner processes files sequentially to avoid overwhelming disk I/O. Text extraction for binary formats (PDF, Office) can be CPU-intensive, but these files are typically a small fraction of the total. + +**Detection** scales linearly with text length times rule count. With 29 rules and an average text chunk of 500 lines, a single detection pass takes microseconds. The entropy detector is more expensive due to its sliding window, so it only runs when enabled and only against high-level text chunks (not individual regex matches). + +**Memory** stays bounded through chunking. The plaintext extractor reads 500 lines at a time. Archive extraction enforces depth limits and zip bomb ratio checks. + +## Key Files Reference + +- `cli.py` - Entry point, global options, Typer app +- `engine.py` - Orchestration, connects config to scanners to reporters +- `config.py` - Pydantic models, YAML loading, config search +- `constants.py` - All magic numbers, thresholds, type literals +- `models.py` - Finding, Location, ScanResult, TextChunk +- `compliance.py` - Rule-to-framework mapping, severity classification +- `scoring.py` - Shared match-to-finding conversion for all scanners +- `redaction.py` - Partial/full/none redaction strategies +- `detectors/registry.py` - Rule loading, filtering, scoring pipeline +- `detectors/pattern.py` - Regex matching with allowlist and checksum validation +- `detectors/context.py` - Keyword proximity boost, co-occurrence boost +- `detectors/entropy.py` - Shannon entropy detection, sliding window +- `detectors/rules/` - Rule definitions (pii, financial, credentials, health) +- `extractors/` - Text extraction from 14+ file formats +- `scanners/` - File, database, network scan implementations +- `network/` - PCAP parsing, flow tracking, DPI, DNS exfiltration +- `reporters/` - Console, JSON, SARIF, CSV output +- `commands/` - CLI command implementations (scan, report) + +## Next Steps + +Now that you understand the architecture: +1. Read [03-IMPLEMENTATION.md](./03-IMPLEMENTATION.md) for the code walkthrough +2. Try modifying a detection rule in `detectors/rules/pii.py` to see how the scoring pipeline responds diff --git a/PROJECTS/intermediate/dlp-scanner/learn/03-IMPLEMENTATION.md b/PROJECTS/intermediate/dlp-scanner/learn/03-IMPLEMENTATION.md new file mode 100644 index 0000000..a760e4d --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/learn/03-IMPLEMENTATION.md @@ -0,0 +1,957 @@ +# 03-IMPLEMENTATION.md + +# Implementation Guide + +This document walks through how the code works. We cover the detection engine, file extraction, network analysis, and CLI integration, with code snippets from the actual project. + +## File Structure + +``` +src/dlp_scanner/ +├── __init__.py +├── cli.py # Typer entry point +├── engine.py # Scan orchestration +├── config.py # Pydantic config models +├── constants.py # Thresholds, types, defaults +├── models.py # Finding, Location, ScanResult +├── compliance.py # Rule-to-framework mapping +├── redaction.py # Snippet masking +├── log.py # structlog configuration +├── scoring.py # Shared match-to-finding conversion +├── commands/ +│ ├── scan.py # file, db, network commands +│ └── report.py # convert, summary commands +├── detectors/ +│ ├── base.py # DetectionRule, DetectorMatch +│ ├── pattern.py # Regex + checksum detection +│ ├── context.py # Keyword proximity scoring +│ ├── entropy.py # Shannon entropy detection +│ ├── registry.py # Central detector registry +│ └── rules/ +│ ├── pii.py # SSN, email, phone, passport +│ ├── financial.py # Credit cards, IBAN, NHS +│ ├── credentials.py # AWS, GitHub, JWT, Stripe +│ └── health.py # Medical records, DEA, NPI +├── extractors/ +│ ├── base.py # Extractor protocol +│ ├── plaintext.py # .txt, .log, .cfg, source code +│ ├── pdf.py # .pdf via PyMuPDF +│ ├── office.py # .docx, .xlsx, .xls +│ ├── structured.py # .csv, .json, .xml, .yaml, .parquet, .avro +│ ├── archive.py # .zip, .tar.gz, .tar.bz2 +│ └── email.py # .eml, .msg +├── network/ +│ ├── pcap.py # PCAP/PCAPNG packet reader +│ ├── flow_tracker.py # TCP flow reassembly +│ ├── protocols.py # DPI protocol identification +│ └── exfiltration.py # DNS exfil detection +├── reporters/ +│ ├── base.py # Reporter protocol +│ ├── console.py # Rich terminal output +│ ├── json_report.py # Structured JSON +│ ├── sarif.py # SARIF 2.1.0 +│ └── csv_report.py # Flat CSV +└── scanners/ + ├── base.py # Scanner protocol + ├── file_scanner.py # Directory walking + extraction + ├── db_scanner.py # DB schema introspection + └── network_scanner.py # PCAP payload scanning +``` + +## Building the Detection Engine + +### Detection Rules + +Every detection rule is a data structure, not a class hierarchy. The `DetectionRule` dataclass holds the regex pattern, base confidence score, optional validator function, context keywords, and compliance framework tags: + +```python +@dataclass(frozen=True, slots=True) +class DetectionRule: + rule_id: str + rule_name: str + pattern: re.Pattern[str] + base_score: float + context_keywords: list[str] = field(default_factory=list) + validator: Callable[[str], bool] | None = None + compliance_frameworks: list[str] = field( + default_factory=list + ) +``` + +Rule modules export plain lists of these structs. Here is the SSN rule from `detectors/rules/pii.py`: + +```python +SSN_PATTERN = re.compile( + r"\b(?!000|666|9\d{2})\d{3}" + r"[-\s]?" + r"(?!00)\d{2}" + r"[-\s]?" + r"(?!0000)\d{4}\b" +) + +PII_RULES: list[DetectionRule] = [ + DetectionRule( + rule_id="PII_SSN", + rule_name="US Social Security Number", + pattern=SSN_PATTERN, + base_score=0.45, + context_keywords=SSN_CONTEXT, + validator=_validate_ssn, + compliance_frameworks=[ + "HIPAA", "CCPA", "GLBA", "GDPR", + ], + ), + ... +] +``` + +The regex uses negative lookaheads (`(?!000|666|9\d{2})`) to reject SSN area numbers the Social Security Administration has never assigned. This is a first-pass structural filter. The real validation happens in `_validate_ssn`, which the `PatternDetector` calls for every regex match. + +**Why base_score is 0.45, not higher:** A 9-digit number matching the SSN format appears in serial numbers, zip+4 codes, phone fragments, and test data constantly. The string `456-78-9012` matches the SSN pattern and passes area/group/serial validation, but without context it could be anything. A base of 0.45 keeps it in the "medium" severity tier until context boosts push it higher. + +### Checksum Validation + +The three checksum validators demonstrate different mathematical approaches to the same problem: distinguishing real identifiers from random digit sequences. + +**Luhn algorithm** for credit cards (in `detectors/rules/financial.py`): + +```python +def luhn_check(number: str) -> bool: + digits = [int(d) for d in number if d.isdigit()] + if len(digits) < 13: + return False + + odd_digits = digits[-1::-2] + even_digits = digits[-2::-2] + total = sum(odd_digits) + for d in even_digits: + total += sum(divmod(d * 2, 10)) + return total % 10 == 0 +``` + +The algorithm works right-to-left: take every other digit starting from the rightmost, sum them. For the remaining digits, double each, and if the result exceeds 9, subtract 9 (which is what `sum(divmod(d * 2, 10))` does). If the grand total is divisible by 10, the number is valid. A random 16-digit number has about a 10% chance of passing Luhn, so it reduces false positives by roughly 90%. + +**Mod-97** for IBANs (ISO 7064): + +```python +def iban_check(value: str) -> bool: + cleaned = value.replace(" ", "").upper() + if len(cleaned) < 15 or len(cleaned) > 34: + return False + + rearranged = cleaned[4:] + cleaned[:4] + numeric = "" + for char in rearranged: + if char.isalpha(): + numeric += str(ord(char) - ord("A") + 10) + else: + numeric += char + + return int(numeric) % 97 == 1 +``` + +Move the country code and check digits (first 4 chars) to the end, convert letters to two-digit numbers (A=10, B=11, etc.), then check that the entire number mod 97 equals 1. The false positive rate is approximately 1 in 97. + +**Mod-11** for NHS numbers: + +```python +def nhs_check(value: str) -> bool: + digits = value.replace("-", "").replace(" ", "") + if len(digits) != 10 or not digits.isdigit(): + return False + + weights = range(10, 1, -1) + total = sum( + int(d) * w + for d, w in zip(digits[:9], weights, strict=False) + ) + remainder = 11 - (total % 11) + if remainder == 11: + remainder = 0 + if remainder == 10: + return False + return remainder == int(digits[9]) +``` + +Multiply the first 9 digits by descending weights (10, 9, 8, ..., 2), sum them, compute `11 - (sum mod 11)`, and compare to the check digit. If the result is 10, the number is invalid (NHS never issues these). If the result is 11, the check digit is 0. + +**Luhn-80840** for NPIs (in `detectors/rules/health.py`): + +```python +def _validate_npi(value: str) -> bool: + digits = value.replace("-", "").replace(" ", "") + if len(digits) != 10 or not digits.isdigit(): + return False + + prefixed = "80840" + digits + total = 0 + for i, d in enumerate(reversed(prefixed)): + n = int(d) + if i % 2 == 1: + n *= 2 + if n > 9: + n -= 9 + total += n + return total % 10 == 0 +``` + +NPI (National Provider Identifier) validation is a Luhn variant. The trick is prepending `80840` (the healthcare industry prefix assigned by ANSI) before running the standard Luhn algorithm. This prefix is not part of the NPI itself, but the ISO standard requires it for check digit computation. A random 10-digit number has about a 10% chance of passing, making this check useful but not definitive. The base score of 0.10 reflects that NPI patterns match many unrelated 10-digit numbers, and context keywords like "provider" or "npi" are needed to push the score into actionable territory. + +### Pattern Detection + +The `PatternDetector` in `detectors/pattern.py` iterates over all active rules, runs each regex against the input text, filters through the allowlist, and applies checksum validation: + +```python +class PatternDetector: + def detect(self, text: str) -> list[DetectorMatch]: + matches: list[DetectorMatch] = [] + + for rule in self._rules: + for m in rule.pattern.finditer(text): + matched_text = m.group() + + if self._is_allowlisted(matched_text): + continue + + score = rule.base_score + + if rule.validator is not None: + if rule.validator(matched_text): + score = min(1.0, score + CHECKSUM_BOOST) + else: + continue + + matches.append( + DetectorMatch( + rule_id=rule.rule_id, + ... + score=score, + ) + ) + + return matches +``` + +When a rule has a validator and the match fails validation, the match is discarded entirely (`continue`). A Visa pattern that matches `4532015112830366` but fails Luhn is not a credit card. When validation passes, the score gets a +0.30 boost (`CHECKSUM_BOOST`). This is aggressive because checksum-passing matches are overwhelmingly real: the Luhn+Visa prefix combination has a false positive rate under 1%. + +The allowlist uses a frozen set lookup, defaulting to `KNOWN_TEST_VALUES` (common test card numbers, example SSNs like `123-45-6789`). This prevents DLP tools from flagging their own test data, which is a common complaint in production deployments. + +### Context Keyword Scoring + +After pattern detection, `apply_context_boost` in `detectors/context.py` scans the surrounding text for keywords that indicate the matched value is actually sensitive data: + +```python +def apply_context_boost( + text: str, + matches: list[DetectorMatch], + window_tokens: int = DEFAULT_CONTEXT_WINDOW_TOKENS, +) -> list[DetectorMatch]: + tokens = text.lower().split() + boosted: list[DetectorMatch] = [] + + for match in matches: + if not match.context_keywords: + boosted.append(match) + continue + + char_to_token = _char_offset_to_token_index( + text, match.start + ) + window_start = max( + 0, char_to_token - window_tokens + ) + window_end = min( + len(tokens), char_to_token + window_tokens + ) + window_text = " ".join( + tokens[window_start:window_end] + ) + + boost = _compute_keyword_boost( + window_text, + match.context_keywords, + window_tokens, + ) + + new_score = min(1.0, match.score + boost) + ... +``` + +The window is bidirectional: 10 tokens in each direction from the match. The boost is distance-weighted: a keyword right next to the match contributes up to `CONTEXT_BOOST_MAX` (0.35), while one at the edge of the window contributes almost nothing. This reflects a real observation: "SSN: 456-78-9012" is almost certainly an SSN, while "SSN" appearing 50 words away from "456-78-9012" is weaker signal. + +The `_compute_keyword_boost` function finds the best keyword match in the window and computes `CONTEXT_BOOST_MAX * proximity_factor`, where proximity is `1.0 - (distance / max_distance)`. Only the highest-scoring keyword matters, not the sum of all keywords. This prevents keyword stuffing from inflating scores. + +### Co-occurrence Boost + +After context boosting, `_apply_cooccurrence_boost` checks whether multiple different PII types appear near each other: + +```python +def _apply_cooccurrence_boost( + matches: list[DetectorMatch], +) -> list[DetectorMatch]: + if len(matches) < 2: + return matches + + proximity_threshold = 500 + + for i, match in enumerate(matches): + has_neighbor = False + for j, other in enumerate(matches): + if i == j: + continue + if other.rule_id == match.rule_id: + continue + distance = abs(match.start - other.start) + if distance < proximity_threshold: + has_neighbor = True + break + + if has_neighbor: + new_score = min( + 1.0, match.score + COOCCURRENCE_BOOST + ) + ... +``` + +An SSN near a credit card number is stronger evidence than either alone. The boost is +0.15 (`COOCCURRENCE_BOOST`), and it requires different `rule_id` values (two SSNs next to each other do not trigger it). The 500-character threshold roughly corresponds to a short paragraph or a few database columns. + +This heuristic matters in practice. The Capital One breach data contained CSV exports where SSNs, credit card numbers, and addresses appeared in adjacent columns. Co-occurrence detection would have flagged these files as critical priority. + +### Shannon Entropy Detection + +The `EntropyDetector` in `detectors/entropy.py` finds high-entropy regions that may contain secrets, encrypted data, or base64-encoded credentials: + +```python +def shannon_entropy(data: bytes) -> float: + if not data: + return 0.0 + + counts = Counter(data) + total = len(data) + return -sum( + (c / total) * math.log2(c / total) + for c in counts.values() + ) +``` + +Shannon entropy measures the average information content per byte. English text sits around 3.5-4.5 bits. Base64-encoded data is 5.5-6.0. Truly random bytes approach 8.0 (log2(256)). The detector uses a sliding window of 256 bytes with a 128-byte step: + +```python +def detect_high_entropy_regions( + data: bytes, + threshold: float = DEFAULT_ENTROPY_THRESHOLD, + window_size: int = WINDOW_SIZE, + step: int = WINDOW_STEP, +) -> list[tuple[int, int, float]]: + ... + while i + window_size <= len(data): + window = data[i:i + window_size] + h = shannon_entropy(window) + + if h >= threshold: + end = i + window_size + while end + step <= len(data): + next_window = data[ + end - window_size + step:end + step + ] + next_h = shannon_entropy(next_window) + if next_h < threshold: + break + h = max(h, next_h) + end += step + + regions.append((i, end, h)) + i = end + else: + i += step +``` + +When the entropy exceeds the threshold (default 7.2), the detector extends the region forward until entropy drops below the threshold. This merges adjacent high-entropy windows into a single region rather than reporting dozens of overlapping detections. + +The default threshold of 7.2 is intentionally high. Network payloads containing binary protocol data or compressed content often hit 6.0-7.0, which would generate massive false positive volume. At 7.2, the detector primarily catches encrypted blobs, base64-encoded secrets, and random key material. + +## File Extraction Pipeline + +### The Extractor Protocol + +All extractors implement a two-method protocol: + +```python +class Extractor(Protocol): + def extract(self, path: str) -> list[TextChunk]: ... + + @property + def supported_extensions(self) -> frozenset[str]: ... +``` + +The `FileScanner` builds an extension-to-extractor map at initialization by iterating over all extractor instances and indexing by their supported extensions. When scanning a file, it looks up the extractor by the file's extension and calls `extract`. + +### Plaintext Extraction + +The `PlaintextExtractor` reads files in 500-line chunks to keep memory bounded: + +```python +class PlaintextExtractor: + def extract(self, path: str) -> list[TextChunk]: + chunks: list[TextChunk] = [] + + with open( + path, encoding="utf-8", errors="replace", + ) as f: + lines: list[str] = [] + line_number = 1 + chunk_start = 1 + + for line in f: + lines.append(line) + if len(lines) >= CHUNK_MAX_LINES: + chunks.append( + TextChunk( + text="".join(lines), + location=Location( + source_type="file", + uri=path, + line=chunk_start, + ), + ) + ) + chunk_start = line_number + 1 + lines = [] + line_number += 1 + + if lines: + chunks.append(...) + + return chunks +``` + +Each `TextChunk` carries the starting line number in its `Location`, so findings can report where in the file the match occurred. The `errors="replace"` parameter means binary-contaminated text files (common in log files with embedded binary data) will not crash the extractor. + +### Extension Map Construction + +The `_build_extension_map` function in `file_scanner.py` constructs the mapping from extensions to extractors: + +```python +def _build_extension_map() -> dict[str, Extractor]: + extractors: list[Extractor] = [ + PlaintextExtractor(), + PDFExtractor(), + DocxExtractor(), + XlsxExtractor(), + XlsExtractor(), + CsvExtractor(), + JsonExtractor(), + XmlExtractor(), + YamlExtractor(), + ParquetExtractor(), + AvroExtractor(), + ArchiveExtractor(), + EmlExtractor(), + MsgExtractor(), + ] + + ext_map: dict[str, Extractor] = {} + for extractor in extractors: + for ext in extractor.supported_extensions: + ext_map[ext] = extractor + + return ext_map +``` + +Adding a new format means creating an extractor class with `extract` and `supported_extensions`, then adding it to this list. The scanner does not need to know anything about the format. + +### File Scanner Walk Logic + +The `FileScanner._scan_directory` method applies a chain of filters before dispatching to an extractor: + +```python +def _scan_directory(self, directory, result): + iterator = ( + directory.rglob("*") + if self._file_config.recursive + else directory.glob("*") + ) + + for path in iterator: + if not path.is_file(): + continue + if self._is_excluded(path, directory): + continue + + suffix = _get_full_suffix(path) + if suffix not in self._allowed_extensions: + continue + + file_size = path.stat().st_size + if file_size > max_bytes: + continue + if file_size == 0: + continue + + self._scan_file(path, result) + result.targets_scanned += 1 +``` + +The `_get_full_suffix` function handles compound extensions like `.tar.gz` and `.tar.bz2` by checking the filename suffix before falling back to `path.suffix.lower()`. The exclusion check matches against the relative path, the filename, and individual path components, so a pattern like `__pycache__` matches regardless of depth. + +## Network Analysis + +### Scanner Orchestration + +The `NetworkScanner` ties together the network modules into a multi-pass pipeline. The old implementation decoded raw packets as UTF-8 and ran detection directly. The rewrite is protocol-aware: + +```python +def _scan_pcap(self, path, result): + tracker = FlowTracker() + dns_detector = DnsExfilDetector( + entropy_threshold=( + self._net_config.dns_label_entropy_threshold + ), + ) + packet_count = 0 + + for packet in read_pcap( + path, + max_packets=self._net_config.max_packets, + ): + packet_count += 1 + tracker.add_packet(packet) + + if ( + packet.protocol == "udp" + and ( + packet.src_port == DNS_PORT + or packet.dst_port == DNS_PORT + ) + ): + self._process_dns_packet( + packet.payload, packet.src_ip, + packet.dst_ip, path, packet_count, + dns_detector, result, + ) + + if packet.payload: + exfil_indicators = detect_base64_payload( + packet.payload, + src_ip=packet.src_ip, + dst_ip=packet.dst_ip, + ) + for indicator in exfil_indicators: + finding = _indicator_to_finding( + indicator, str(path), packet_count, + ) + result.findings.append(finding) + + txt_indicators = dns_detector.check_txt_volume() + for indicator in txt_indicators: + ... + + self._scan_reassembled_flows(tracker, path, result) +``` + +Three things happen during the packet loop: every packet goes into the `FlowTracker` for later TCP reassembly, UDP packets on port 53 are parsed as DNS and fed to the `DnsExfilDetector`, and every payload is checked for base64/hex-encoded data by `detect_base64_payload`. After the loop, TXT query volume ratios are checked and TCP flows are reassembled for content scanning. + +The reassembled flow scanning uses protocol-aware text extraction: + +```python +def _extract_scannable_text(self, stream, protocol): + if protocol == "http": + return self._extract_http_text(stream) + if protocol in ("tls", "ssh"): + return "" + try: + return stream.decode("utf-8", errors="replace") + except Exception: + return "" +``` + +HTTP flows get parsed by `parse_http`, which extracts URIs, sensitive headers (`cookie`, `authorization`, `set-cookie`), and bodies. TLS and SSH flows are skipped entirely since the content is encrypted and cannot be scanned. Everything else falls through to a UTF-8 decode attempt. + +DNS exfiltration indicators and encoded payload detections are converted to `Finding` objects through `_indicator_to_finding`, which maps indicator types to rule IDs via the `EXFIL_RULE_MAP` lookup table. Regex-based detections from reassembled flows go through `match_to_finding` like the other scanners. + +### PCAP Parsing + +The `read_pcap` function in `network/pcap.py` reads packets using dpkt and yields `PacketInfo` structs: + +```python +def read_pcap(path, max_packets=0): + with open(path, "rb") as f: + try: + pcap = dpkt.pcap.Reader(f) + except ValueError: + f.seek(0) + pcap = dpkt.pcapng.Reader(f) + + count = 0 + for timestamp, buf in pcap: + if max_packets > 0 and count >= max_packets: + break + + packet = _parse_ethernet(timestamp, buf) + if packet is not None: + yield packet + count += 1 +``` + +The try/except fallback handles both PCAP (libpcap) and PCAPNG (Wireshark's newer format). dpkt is used instead of Scapy because it is roughly 100x faster for bulk packet parsing. Scapy constructs rich protocol objects with dissection layers; dpkt does minimal parsing and gives you raw bytes. + +### TCP Flow Reassembly + +The `FlowTracker` in `network/flow_tracker.py` groups packets into flows and reassembles TCP streams: + +```python +def make_flow_key(packet): + forward = ( + packet.src_ip, packet.dst_ip, + packet.src_port, packet.dst_port, + ) + reverse = ( + packet.dst_ip, packet.src_ip, + packet.dst_port, packet.src_port, + ) + return min(forward, reverse) +``` + +The bidirectional key is the lexicographically smaller of the forward and reverse 4-tuples. This means `(A->B)` and `(B->A)` packets land in the same flow. The `reassemble_stream` method sorts segments by TCP sequence number and deduplicates retransmissions: + +```python +def reassemble_stream(self, key): + flow = self._flows.get(key) + if flow is None: + return b"" + + sorted_segments = sorted( + flow.segments, key=lambda s: s[0] + ) + + seen_offsets: set[int] = set() + parts: list[bytes] = [] + for seq, data in sorted_segments: + if seq not in seen_offsets: + seen_offsets.add(seq) + parts.append(data) + + return b"".join(parts) +``` + +TCP retransmissions reuse the same sequence number, so deduplication by sequence number prevents duplicate data in the reassembled stream. This is a simplified reassembly that does not handle overlapping segments (where retransmissions contain different data), but it covers the common case. + +### Protocol Identification + +The `identify_protocol` function in `network/protocols.py` performs Deep Packet Inspection using byte prefix matching: + +```python +def identify_protocol(payload: bytes) -> str: + if not payload: + return "unknown" + + if _is_http_request(payload): + return "http" + if payload.startswith(HTTP_RESPONSE_PREFIX): + return "http" + if (len(payload) > 2 + and payload[:2] == TLS_RECORD_PREFIX): + return "tls" + if payload.startswith(SSH_PREFIX): + return "ssh" + if payload.startswith(SMTP_BANNER_PREFIX): + return "smtp" + + return "unknown" +``` + +HTTP requests are identified by checking if the first word before a space is a known HTTP method (`GET`, `POST`, `PUT`, etc.). TLS records start with `\x16\x03` (ContentType=Handshake + major version 3). SSH banners start with `SSH-`. SMTP server greetings start with `220`. + +This matters for DLP because the same sensitive data requires different handling depending on the transport protocol. An SSN in an HTTP body can be read and flagged with high confidence. The same SSN in a TLS-encrypted stream cannot be read, but you can flag the flow as "encrypted traffic containing unknown data" and correlate with other signals. + +### DNS Exfiltration Detection + +The `DnsExfilDetector` in `network/exfiltration.py` analyzes DNS queries for patterns that suggest data tunneling: + +```python +def _check_subdomain_entropy(self, name, src_ip, dst_ip): + parts = name.split(".") + if len(parts) < 3: + return None + + subdomain = ".".join(parts[:-2]) + if not subdomain: + return None + + entropy = shannon_entropy_str(subdomain) + if entropy > self._entropy_threshold: + return ExfilIndicator( + indicator_type="dns_high_entropy", + description=( + f"High subdomain entropy ({entropy:.2f}) " + f"suggesting DNS tunneling" + ), + confidence=min( + 0.95, + 0.50 + (entropy - 3.0) * 0.15, + ), + source_ip=src_ip, + dst_ip=dst_ip, + evidence=name, + ) +``` + +Legitimate subdomains (`www`, `mail`, `api`, `cdn`) have very low entropy. A query like `aGVsbG8gd29ybGQ.evil.com` has subdomain entropy above 4.0 because the base64-encoded data uses most of the alphanumeric character space. The detector extracts everything before the last two domain labels (the registerable domain), computes Shannon entropy, and flags queries above the threshold. + +The confidence score scales linearly from 0.50 (at entropy 3.0) to 0.95 (at entropy 6.0). This captures the observation that higher entropy means more confident detection: entropy 4.1 might be a CDN hash, but entropy 5.5 is almost certainly encoded data. + +## Compliance and Severity Classification + +### Severity Mapping + +The `score_to_severity` function in `compliance.py` maps confidence scores to severity levels using a threshold table: + +```python +SEVERITY_SCORE_THRESHOLDS = [ + (0.85, "critical"), + (0.65, "high"), + (0.40, "medium"), + (0.20, "low"), +] + +def score_to_severity(score: float) -> Severity: + for threshold, severity in SEVERITY_SCORE_THRESHOLDS: + if score >= threshold: + return severity + return "low" +``` + +The thresholds are tuned so that: +- **Critical** (0.85+): checksum-validated matches with context keywords (e.g., SSN near "social security") +- **High** (0.65+): checksum-validated matches or strong context without validation +- **Medium** (0.40+): pattern matches without strong validation or context +- **Low** (0.20+): weak matches that might be false positives + +### Framework Mapping + +The `RULE_FRAMEWORK_MAP` in `compliance.py` is a static lookup table: + +```python +RULE_FRAMEWORK_MAP = { + "PII_SSN": ["HIPAA", "CCPA", "GLBA", "GDPR"], + "PII_DRIVERS_LICENSE_FL": ["CCPA", "HIPAA"], + "FIN_CREDIT_CARD_VISA": ["PCI_DSS", "GLBA"], + "FIN_CREDIT_CARD_MC": ["PCI_DSS", "GLBA"], + "FIN_IBAN": ["GDPR", "GLBA"], + "HEALTH_NPI": ["HIPAA"], + "NET_DNS_EXFIL_HIGH_ENTROPY": [], + ... +} +``` + +Rule IDs match actual detection rules rather than using generic categories. Credit card rules are split by brand (`FIN_CREDIT_CARD_VISA`, `FIN_CREDIT_CARD_MC`, `FIN_CREDIT_CARD_AMEX`, `FIN_CREDIT_CARD_DISC`), each triggering PCI-DSS and GLBA. State-specific driver's license rules (`PII_DRIVERS_LICENSE_FL`, `PII_DRIVERS_LICENSE_IL`) map to CCPA and HIPAA alongside the generic CA pattern. Network exfiltration indicators (`NET_DNS_EXFIL_*`, `NET_ENCODED_*`) carry empty framework lists since DNS tunneling is a detection concern, not a regulatory data type. + +SSNs trigger four frameworks because they are considered protected health information (HIPAA), personal information (CCPA), financial identifiers (GLBA), and personal data (GDPR). Every rule also has a corresponding entry in `RULE_REMEDIATION_MAP` with specific guidance text. Unknown rules fall back to a generic default. + +The mapping is intentionally conservative. An SSN could trigger SOX if it appears in financial reporting data, but without business context the scanner cannot determine that. The listed frameworks are the ones where the mere presence of the data type creates a compliance obligation. + +## Shared Scoring Module + +The `match_to_finding` function in `scoring.py` centralizes the conversion from `DetectorMatch` to `Finding`. All three scanners import from this single location instead of duplicating the severity/compliance/redaction logic: + +```python +def match_to_finding( + match: DetectorMatch, + text: str, + location: Location, + redaction_style: RedactionStyle, +) -> Finding: + severity = score_to_severity(match.score) + frameworks = get_frameworks_for_rule(match.rule_id) + if match.compliance_frameworks: + combined = ( + set(frameworks) | set(match.compliance_frameworks) + ) + frameworks = sorted(combined) + remediation = get_remediation_for_rule(match.rule_id) + + snippet = redact( + text, match.start, match.end, + style=redaction_style, + ) + + return Finding( + rule_id=match.rule_id, + rule_name=match.rule_name, + severity=severity, + confidence=match.score, + location=location, + redacted_snippet=snippet, + compliance_frameworks=frameworks, + remediation=remediation, + ) +``` + +The function chains severity classification, compliance framework lookup, remediation guidance, and redaction in one call. The framework merging logic handles the case where a detection rule carries its own `compliance_frameworks` list: those are merged with the frameworks from the compliance module, deduplicated, and sorted for deterministic output. + +Each scanner calls this in its match loop: + +```python +for match in matches: + if match.score < min_confidence: + continue + + finding = match_to_finding( + match, chunk.text, chunk.location, + self._redaction_style, + ) + result.findings.append(finding) +``` + +Adding a new compliance framework or changing severity thresholds affects all three scanners uniformly without touching scanner code. + +## Redaction + +The `redact` function in `redaction.py` builds a snippet with masked content: + +```python +def redact(text, start, end, style="partial"): + matched = text[start:end] + + if style == "none": + return _build_snippet(text, start, end, matched) + if style == "full": + return _build_snippet( + text, start, end, REDACTED_LABEL + ) + + redacted = _partial_redact(matched) + return _build_snippet(text, start, end, redacted) +``` + +The `_partial_redact` function applies format-aware masking: + +```python +def _partial_redact(value): + stripped = value.replace("-", "").replace(" ", "") + + if len(stripped) >= 9 and stripped.isdigit(): + return MASK_CHAR * (len(value) - 4) + value[-4:] + + if "@" in value: + local, domain = value.rsplit("@", maxsplit=1) + masked_local = ( + local[0] + MASK_CHAR * (len(local) - 1) + ) + return f"{masked_local}@{domain}" + + if len(value) > 8: + visible = max(4, len(value) // 4) + return ( + MASK_CHAR * (len(value) - visible) + + value[-visible:] + ) + + return MASK_CHAR * len(value) +``` + +For digit sequences (SSNs, credit cards), it preserves the last 4 digits: `***-**-6789`. For emails, it keeps the first character and domain: `j****@example.com`. For other strings (API keys, tokens), it shows the last 25%. Short values under 8 characters are fully masked. + +The `_build_snippet` function adds 20 characters of context on each side and prepends/appends `...` when the context is truncated. This gives analysts enough surrounding text to understand what the data was near without exposing full document contents. + +## CLI Integration + +### Global Option Propagation + +The Typer callback stores global options in Click's context dict: + +```python +@app.callback() +def main(ctx: typer.Context, config: ..., verbose: ..., version: ...): + ctx.ensure_object(dict) + ctx.obj["config_path"] = config + ctx.obj["verbose"] = verbose +``` + +Subcommands retrieve these via `ctx.ensure_object(dict)`: + +```python +def _run_scan(ctx, scan_type, target, output_format, output_file): + obj: dict[str, Any] = ctx.ensure_object(dict) + config_path = obj.get("config_path", "") + verbose = obj.get("verbose", False) +``` + +This pattern lets `dlp-scan -v -c custom.yml file ./data` propagate the verbose flag and config path to the file scan command without duplicating those options on every subcommand. + +### Logging Strategy + +The logging level adapts to the output format: + +```python +if verbose: + configure_logging(level="DEBUG") +elif output_format == "console": + configure_logging(level="INFO") +else: + configure_logging(level="WARNING") +``` + +When output is machine-readable (JSON, SARIF, CSV), logging is set to WARNING so that structlog messages written to stderr do not contaminate stdout. This prevents `dlp-scan file ./data -f json | jq` from breaking because log lines mixed into the JSON output. For console output, INFO-level logging provides progress feedback. Verbose mode enables DEBUG for troubleshooting. + +### Report Conversion + +The `report convert` command reads a JSON scan result and regenerates it in another format: + +```python +@report_app.command("convert") +def convert(input_file, output_format="sarif", output_file=""): + raw = path.read_bytes() + data = orjson.loads(raw) + result = _rebuild_result(data) + + config = ScanConfig() + engine = ScanEngine(config) + + output = engine.generate_report(result, fmt) + ... +``` + +The `_rebuild_result` function deserializes the JSON structure back into `ScanResult`, `Finding`, and `Location` objects. It reads from the `scan_metadata` section for scan-level fields and iterates `findings` to reconstruct each `Finding` with its `Location`. This is necessary because `orjson.loads` produces plain dicts, but the reporters expect typed dataclass instances. + +## Testing Strategy + +### Property-Based Testing + +The project uses Hypothesis for property-based testing of detection rules. Instead of testing a few known inputs, Hypothesis generates random strings constrained by rule formats and verifies that the detection pipeline handles them correctly. + +For validators: Hypothesis generates random digit sequences and verifies that `luhn_check`, `iban_check`, and `nhs_check` only return True for inputs that satisfy the mathematical properties (divisibility by 10, mod 97 = 1, mod 11 check digit match). + +For the context boost: Hypothesis generates random text with embedded keywords at varying distances and verifies that the boost is always between 0 and `CONTEXT_BOOST_MAX`, and that closer keywords produce higher boosts. + +### Running Tests + +```bash +uv run pytest -m unit # fast unit tests +uv run pytest -m integration # tests with file I/O +uv run pytest --cov=src # coverage report +``` + +The test suite uses markers (`unit`, `integration`, `slow`) to separate fast tests from those requiring real filesystem access. The `conftest.py` provides shared fixtures for temporary directories, sample configs, and test data files. + +## Dependencies + +- **typer**: CLI framework with type-hint argument declaration. The `Annotated` style avoids decorators stacking up. +- **rich**: Terminal tables with colors. Used by `ConsoleReporter` for severity-colored output. +- **structlog**: Structured logging with stdlib integration. JSON or console rendering based on config. +- **pydantic**: Config validation. Catches invalid YAML values before the scan starts. +- **orjson**: Fast JSON serialization. 3-10x faster than stdlib json for large finding lists. +- **ruamel.yaml**: YAML parser that handles 1.2 spec and preserves comments. +- **dpkt**: PCAP parsing. ~100x faster than Scapy for bulk packet iteration. +- **pymupdf**: PDF text extraction with layout preservation. +- **python-docx/openpyxl/xlrd**: Office format extraction. +- **defusedxml/lxml**: Safe XML parsing (defusedxml blocks XXE attacks). +- **pyarrow/fastavro**: Columnar format extraction (Parquet, Avro). +- **asyncpg/aiomysql/pymongo/aiosqlite**: Async database drivers. + +## Next Steps + +You have seen how the code works. Now: +1. Try the challenges in [04-CHALLENGES.md](./04-CHALLENGES.md) for extension ideas +2. Modify a detection rule and run the tests to see how the scoring changes +3. Scan your own files with `dlp-scan file ./your-directory` and inspect the output diff --git a/PROJECTS/intermediate/dlp-scanner/learn/04-CHALLENGES.md b/PROJECTS/intermediate/dlp-scanner/learn/04-CHALLENGES.md new file mode 100644 index 0000000..7341b5b --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/learn/04-CHALLENGES.md @@ -0,0 +1,422 @@ +# 04-CHALLENGES.md + +# Extension Challenges + +You have built a DLP scanner with file, database, and network scanning, a confidence scoring pipeline, compliance mapping, and multi-format reporting. These challenges extend it into new territory. + +Ordered by difficulty. The easy ones take an hour or two. The advanced ones are multi-day efforts that teach you skills used in production DLP systems. + +## Easy Challenges + +### Challenge 1: Add a New PII Rule (Date of Birth) + +**What to build:** A detection rule for dates of birth in common formats: `MM/DD/YYYY`, `YYYY-MM-DD`, `DD-Mon-YYYY`. + +**Why it matters:** Date of birth is classified as PHI under HIPAA's 18 identifiers and as personal data under GDPR. The 2015 Anthem breach exposed 78.8 million records including DOBs, and the combination of DOB + name + zip code is enough to uniquely identify 87% of the US population (Latanya Sweeney's research at Carnegie Mellon). + +**What you will learn:** +- Writing regex patterns that match multiple date formats +- Adding a validation function that rejects impossible dates (month 13, day 32, Feb 30) +- Tuning base_score relative to false positive rate (dates appear everywhere) + +**Hints:** +- Create the rule in a new file `detectors/rules/pii_extended.py` and add the rules list to `ALL_RULES` in `registry.py` +- Use a low base_score (0.10-0.15) because date strings are extremely common +- Context keywords like "date of birth", "dob", "birthday", "born on" should provide the majority of the signal +- The validator should parse the matched string into a real date and reject invalid ones +- Add the rule to `RULE_FRAMEWORK_MAP` in `compliance.py` with HIPAA and GDPR + +**Test it works:** Create a text file with "Patient DOB: 03/15/1987" and "Order date: 03/15/1987". The first should score higher than the second due to context keywords. + +### Challenge 2: HTML Report Output + +**What to build:** A new reporter that generates a standalone HTML file with a sortable findings table, severity color coding, and a summary chart. + +**Why it matters:** Compliance teams often need to share scan results with non-technical stakeholders who do not have command-line tools. An HTML report that opens in a browser is more accessible than JSON or CSV. + +**What you will learn:** +- Implementing the reporter pattern (match the existing protocol) +- HTML template generation in Python (string templates or Jinja2) +- Adding a new output format to the CLI without modifying existing code + +**Hints:** +- Create `reporters/html_report.py` with a `HtmlReporter` class +- Add `"html"` to `REPORTER_MAP` in `engine.py` and `VALID_FORMATS` in `commands/scan.py` +- Use inline CSS so the report is a single self-contained HTML file with no external dependencies +- Color severity levels using the same scheme as the console reporter (red for critical, yellow for medium, green for low) +- Include a summary section at the top with counts by severity and framework + +**Test it works:** Run `dlp-scan file ./data -f html -o report.html` and open the file in a browser. The table should be sortable by clicking column headers (add minimal JavaScript for this). + +### Challenge 3: Allowlist by File Path Pattern + +**What to build:** Extend the allowlist system to suppress findings from files matching glob patterns. Currently, `allowlists.file_patterns` exists in the config but is not enforced during scanning. + +**Why it matters:** Test fixtures, mock data, and seed files intentionally contain fake PII. Teams waste hours triaging findings from `tests/fixtures/sample_data.csv` that contain test credit card numbers. Path-based allowlisting eliminates this noise. + +**What you will learn:** +- Connecting config to scan-time behavior +- Glob pattern matching with `fnmatch` +- The difference between value-level and file-level suppression + +**Hints:** +- The `AllowlistConfig.file_patterns` field already exists in `config.py` +- Add the check in `FileScanner._scan_file` before running detection, or in `_scan_directory` before scanning the file +- Match against the relative path from the scan target, not the absolute path +- Patterns like `test_*`, `*_fixture*`, and `mock_*` should match filenames + +**Test it works:** Create a file `test_data.txt` with a valid SSN. Scan with `file_patterns: ["test_*"]` in config. The SSN should not appear in results. + +## Intermediate Challenges + +### Challenge 4: Incremental Scanning with Hash Cache + +**What to build:** A scan cache that stores SHA-256 hashes of scanned files and skips unchanged files on subsequent scans. + +**Why it matters:** Large codebases and file shares contain millions of files. Re-scanning unchanged files wastes time. Symantec DLP and Microsoft Purview both use content hashing to skip unchanged files, reducing scan time by 60-90% on repeated scans. + +**What you will learn:** +- Content-addressable caching strategies +- SQLite as an embedded metadata store +- Cache invalidation (the hardest problem in computer science, and one you actually have to solve) + +**Implementation approach:** + +1. **Create `cache.py`** with a `ScanCache` class backed by SQLite + - Table: `(file_path TEXT, content_hash TEXT, scan_time TEXT, finding_count INTEGER)` + - Hash computation: SHA-256 of file contents + - Lookup: if the file exists in cache with the same hash, skip scanning and load cached finding count + +2. **Integrate with `FileScanner`** + - Before extracting text, check the cache + - After scanning, store the hash and finding count + - Add `--no-cache` flag to force full rescan + +3. **Handle invalidation edge cases:** + - What if detection rules change between scans? (The same file might produce different findings with new rules) + - What if the config changes min_confidence? (Previously-suppressed findings might now be reportable) + - What if a file is deleted? (Stale cache entries should not appear in results) + +**Hints:** +- Store a hash of the active rule set and config in the cache. If either changes, invalidate the entire cache +- Use `aiosqlite` to match the async pattern of the database scanner, or use synchronous sqlite3 since file scanning is already synchronous +- The cache file should live next to the config: `.dlp-scanner-cache.db` + +**Extra credit:** Add `dlp-scan cache stats` and `dlp-scan cache clear` subcommands. + +### Challenge 5: Severity Override by Compliance Framework + +**What to build:** A config option that overrides severity based on compliance framework requirements. For example, any PCI-DSS finding should be at least "high" regardless of confidence score, because PCI-DSS does not have a concept of "low severity" unencrypted card data. + +**Why it matters:** Different compliance frameworks have different severity thresholds. GDPR treats unencrypted email addresses as medium priority for remediation, but PCI-DSS treats any unencrypted PAN as a blocking finding. Production DLP tools let compliance teams configure per-framework severity floors. + +**What you will learn:** +- Adding config-driven behavior to the scoring pipeline +- The tension between confidence-based and policy-based severity +- How production DLP tools balance detection accuracy with compliance requirements + +**Implementation approach:** + +1. **Add to config:** + ```yaml + compliance: + severity_overrides: + PCI_DSS: "high" + HIPAA: "medium" + ``` + +2. **Apply after scoring:** In the `_match_to_finding` function (or equivalent), after computing severity from confidence, check if any of the finding's compliance frameworks have a severity floor, and upgrade if necessary + +3. **Preserve original confidence:** The confidence score should not change. Only the severity classification changes. This lets analysts see that a finding scored 0.35 (normally "low") but was elevated to "high" because of PCI-DSS policy + +**Hints:** +- Add `severity_overrides: dict[str, str]` to `ComplianceConfig` in `config.py` +- Use `SEVERITY_ORDER` from `constants.py` to compare severity levels numerically +- Log when a severity is overridden so analysts understand why a low-confidence finding shows up as high severity + +### Challenge 6: Database Column Name Heuristic Scoring + +**What to build:** A pre-scan heuristic that boosts detection confidence for columns whose names suggest sensitive data (e.g., `ssn`, `credit_card_number`, `patient_dob`). + +**Why it matters:** Database schema names are strong metadata signals. A column named `ssn` in a table named `employees` is almost certainly storing Social Security Numbers, even before you look at the data. The Capital One breach investigation found that the compromised S3 bucket contained CSV exports with column headers like `SSN` and `AccountNumber`, which would have been trivially detectable with column-name analysis. + +**What you will learn:** +- Schema introspection as a detection signal +- Combining metadata and content signals +- How production DLP tools use schema analysis to prioritize scanning + +**Implementation approach:** + +1. **Create a column name classifier** with patterns mapping column names to rule IDs: + ``` + *ssn*, *social_sec* -> PII_SSN + *credit_card*, *card_num*, *pan* -> FIN_CREDIT_CARD + *email*, *e_mail* -> PII_EMAIL + *dob*, *date_of_birth*, *birthday* -> PII_DOB + ``` + +2. **Apply as a context boost** in the database scanner: when a column name matches a pattern, add a pre-boost to the base score before running the normal detection pipeline + +3. **Carry through to findings:** Add the column name match as additional evidence in the finding's metadata + +**Hints:** +- Implement this in `scanners/db_scanner.py` before the detection loop +- Use `fnmatch` for column name pattern matching (same as rule filtering) +- A modest boost (+0.15 to +0.25) is appropriate. Column names are strong signals but not definitive (a column named `ssn_backup_old` might be empty or encrypted) + +## Advanced Challenges + +### Challenge 7: Custom Rule Language + +**What to build:** A YAML-based rule definition format that lets users create detection rules without writing Python. Rules should support regex patterns, base scores, context keywords, and compliance framework tags. + +**Why it matters:** Production DLP tools (Symantec DLP, Netskope) let compliance teams define custom rules through policy editors because not every regulated data type is covered by built-in rules. European IBANs, Brazilian CPFs, Indian Aadhaar numbers, and industry-specific identifiers all need custom patterns. + +**What you will learn:** +- DSL design (keeping it simple enough to be useful, complex enough to be powerful) +- Safe regex compilation (preventing ReDoS) +- Hot reloading user-defined rules + +**Implementation approach:** + +1. **Define the rule schema:** + ```yaml + rules: + - id: CUSTOM_BR_CPF + name: "Brazilian CPF Number" + pattern: '\b\d{3}\.\d{3}\.\d{3}-\d{2}\b' + base_score: 0.40 + context_keywords: ["cpf", "cadastro"] + compliance: ["LGPD"] + validator: "mod11" + ``` + +2. **Build a rule loader** that reads YAML files from a `rules/` directory, compiles regex patterns safely (with timeout protection against catastrophic backtracking), and creates `DetectionRule` objects + +3. **Register custom rules** alongside built-in rules in the `DetectorRegistry` + +4. **Add built-in validator references** (mod11, luhn, mod97) that users can reference by name instead of writing Python + +**Gotchas:** +- Regex compilation must be safe: a user-provided pattern like `(a+)+b` causes catastrophic backtracking. Consider using the `regex` library with timeout, or validate patterns against known ReDoS patterns +- Custom rules should not be able to override or shadow built-in rules. Use ID prefixes (`CUSTOM_`) to namespace them +- Validator functions referenced by name need a registry of their own + +### Challenge 8: Real-Time File Monitoring + +**What to build:** A watch mode that monitors directories for file changes using filesystem events and scans new or modified files automatically. + +**Why it matters:** Batch scanning finds problems after the fact. Real-time monitoring catches sensitive data as soon as it hits disk. This is how endpoint DLP agents (CrowdStrike Falcon DLP, Digital Guardian) work: they hook filesystem events and scan in real time. + +**What you will learn:** +- Filesystem event monitoring with `watchdog` or `inotify` +- Event debouncing (a single file save can trigger multiple events) +- Background scanning without blocking the event loop + +**Architecture changes:** + +``` +┌──────────────────────────────┐ +│ FileSystemEventHandler │ +│ (watchdog or inotify) │ +│ │ +│ on_modified -> debounce │ +│ on_created -> scan_file │ +│ on_moved -> scan_dest │ +└──────────────┬───────────────┘ + │ + ▼ +┌──────────────────────────────┐ +│ ScanQueue (asyncio.Queue) │ +│ │ +│ Dedup by path │ +│ Rate limit scanning │ +└──────────────┬───────────────┘ + │ + ▼ +┌──────────────────────────────┐ +│ FileScanner.scan(file) │ +│ → Finding → Alert │ +└──────────────────────────────┘ +``` + +**Implementation steps:** + +1. Add `watchdog` as a dependency +2. Create `commands/watch.py` with a `dlp-scan watch ./directory` command +3. Implement a debouncer that batches filesystem events within a 500ms window +4. Use the existing `FileScanner._scan_file` for individual file scanning +5. Output findings to console in real time (stream mode, not batch) + +**Gotchas:** +- Editor save operations often create temporary files, write to them, then rename. This generates create, modify, and rename events. You need to scan the final file, not the intermediate temp files +- Large file copies trigger `on_modified` repeatedly as data is written. Debounce by waiting until the file size stabilizes +- The watch mode should respect the same exclude patterns and extension filters as batch scanning + +### Challenge 9: SIEM Integration via Syslog + +**What to build:** A reporter that sends findings to a SIEM (Splunk, Elastic, QRadar) via syslog (RFC 5424) or HTTP Event Collector (Splunk HEC). + +**Why it matters:** DLP findings are useless if they sit in a JSON file that nobody reads. Production DLP deployments send alerts to SIEMs where SOC analysts triage them alongside firewall logs, EDR alerts, and authentication events. Correlating a DLP finding with a VPN login from an unusual location turns a medium-severity alert into an incident. + +**What you will learn:** +- Syslog protocol formatting (RFC 5424 structured data) +- HTTP-based log shipping (Splunk HEC, Elastic Ingest) +- Alert fatigue management (batching, deduplication, severity filtering) + +**Implementation approach:** + +1. **Create `reporters/syslog_reporter.py`** that formats findings as RFC 5424 syslog messages: + ``` + <134>1 2026-04-08T10:30:00Z scanner dlp-scan - - + [finding@dlp rule_id="PII_SSN" severity="critical" + confidence="0.92" uri="employees.csv"] SSN detected + ``` + +2. **Add Splunk HEC support** as an alternative transport: POST JSON payloads to `https://splunk:8088/services/collector/event` with an HEC token + +3. **Add config:** + ```yaml + output: + siem: + type: "syslog" # or "splunk_hec" + host: "siem.corp.com" + port: 514 + protocol: "tcp" # or "udp" + hec_token: "" # for Splunk HEC + ``` + +4. **Implement batching:** Send findings in batches of 50 with a 5-second flush interval to avoid overwhelming the SIEM + +## Expert Challenges + +### Challenge 10: Machine Learning False Positive Reduction + +**What to build:** A feedback loop where analysts can mark findings as true positive or false positive, and a classifier learns to suppress likely false positives on future scans. + +**Why it matters:** The single biggest complaint about DLP tools is false positive volume. Symantec DLP deployments commonly see 40-60% false positive rates on initial rollout. Analysts spend hours dismissing findings that match SSN patterns but are actually serial numbers, batch IDs, or zip+4 codes. A classifier trained on analyst feedback can reduce false positives by 70-80% while maintaining detection recall. + +**What you will learn:** +- Feature engineering from detection signals (confidence, context keywords found, rule type, file type, surrounding text patterns) +- Online learning: updating a model as new feedback arrives without retraining from scratch +- The precision-recall tradeoff in security tooling (a false negative is a missed breach; a false positive is analyst fatigue) + +**Implementation phases:** + +**Phase 1: Feedback Collection** +- Add `dlp-scan feedback --true-positive` and `--false-positive` commands +- Store feedback in a SQLite database: `(finding_id, rule_id, features_json, label, timestamp)` +- Extract features: confidence, rule_id, file extension, context keywords matched, co-occurrence count, surrounding text entropy + +**Phase 2: Classifier** +- Train a logistic regression or gradient boosted tree on accumulated feedback +- Features: one-hot encode rule_id, numeric confidence, boolean context_found, file_extension category +- Use scikit-learn with ONNX export for deployment without the full sklearn dependency + +**Phase 3: Integration** +- After the detection pipeline produces matches, run the classifier as a post-filter +- Matches classified as likely false positives get demoted (severity lowered, or moved to a "suppressed" section) +- Never fully suppress a detection. Always show suppressed findings in a separate section so analysts can audit the classifier + +**Success criteria:** +- [ ] Feedback collection works and stores features +- [ ] Classifier trains on 50+ labeled examples +- [ ] False positive rate drops by at least 30% on held-out test set +- [ ] No true positives are fully suppressed (only demoted) +- [ ] Model retrains automatically when feedback count crosses thresholds (100, 500, 1000) + +## Real-World Integration Challenges + +### Integrate with GitHub Code Scanning + +**The goal:** Upload SARIF output to GitHub Code Scanning so DLP findings appear as annotations on pull requests. + +**What you will learn:** +- GitHub Code Scanning API +- SARIF upload via GitHub Actions +- CI/CD pipeline integration for security tooling + +**Steps:** + +1. Create a GitHub Actions workflow that runs `dlp-scan file . -f sarif -o results.sarif` on pull requests +2. Upload the SARIF file using the `github/codeql-action/upload-sarif` action +3. Configure `on: pull_request` to scan only changed files (use `git diff --name-only` to get the list) +4. Set severity filtering so only high/critical findings block the PR + +### Scan AWS S3 Buckets + +**The goal:** Add an S3 scanner that lists objects in a bucket, downloads them to a temp directory, and scans with the existing file scanner. + +**What you will learn:** +- boto3 integration for S3 object listing and download +- Temporary file management for large object scanning +- Credential handling (IAM roles vs. access keys) + +**Steps:** + +1. Add `dlp-scan s3 s3://bucket-name/prefix` command +2. Use boto3 to list objects, filter by extension +3. Download each object to a temp directory (use `tempfile.mkdtemp`) +4. Scan with `FileScanner` and map findings back to S3 URIs +5. Clean up temp files after scanning + +This directly addresses the Capital One breach scenario: unencrypted PII in S3 buckets that nobody knew existed. + +## Performance Challenge + +### Handle 1 Million Files + +**The goal:** Make the file scanner handle a directory with 1 million files without running out of memory or taking more than an hour. + +**Current bottleneck:** `Path.rglob("*")` generates a list of all files before scanning starts. With 1 million files, this consumes significant memory and delays the first scan result. + +**Optimization approaches:** + +**Approach 1: Streaming directory walk** +- Replace `rglob` with `os.scandir` recursive walk that yields files one at a time +- Process and discard each file before reading the next +- Memory stays constant regardless of directory size + +**Approach 2: Parallel extraction** +- Use `concurrent.futures.ThreadPoolExecutor` for I/O-bound extraction (file reads, PDF parsing) +- Use `concurrent.futures.ProcessPoolExecutor` for CPU-bound detection (regex matching on large texts) +- Tune pool sizes based on profiling + +**Approach 3: Prioritized scanning** +- Scan high-risk extensions first (`.csv`, `.xlsx`, `.sql`) before low-risk ones (`.log`, `.txt`) +- Report findings as they are discovered (streaming output) instead of waiting for the full scan to complete + +**Benchmark it:** + +```bash +time dlp-scan file /large-directory -f json -o results.json +``` + +Target: under 60 minutes for 1 million files with an average file size of 10KB. + +## Challenge Completion + +Track your progress: + +- [ ] Easy 1: Date of Birth Rule +- [ ] Easy 2: HTML Report Output +- [ ] Easy 3: Allowlist by File Path +- [ ] Intermediate 4: Incremental Scanning +- [ ] Intermediate 5: Severity Override +- [ ] Intermediate 6: Column Name Heuristic +- [ ] Advanced 7: Custom Rule Language +- [ ] Advanced 8: Real-Time Monitoring +- [ ] Advanced 9: SIEM Integration +- [ ] Expert 10: ML False Positive Reduction +- [ ] Integration: GitHub Code Scanning +- [ ] Integration: S3 Bucket Scanning +- [ ] Performance: 1 Million Files + +## Study Real Implementations + +Compare your work to production DLP tools: + +- **Nightfall AI**: Cloud-native DLP with ML-based detection. Open-sourced their detection patterns. Look at how they handle multi-format extraction +- **truffleHog**: Focuses on credential detection in git repos. Their entropy-based detection and regex patterns for API keys are similar to this project's credential rules +- **detect-secrets**: Yelp's secret scanner. Compare their plugin architecture to the detector registry pattern in this project +- **Microsoft Purview**: Enterprise DLP with 300+ built-in sensitive information types. Their documentation on exact data match (EDM) and trainable classifiers shows where the field is heading diff --git a/PROJECTS/intermediate/dlp-scanner/pyproject.toml b/PROJECTS/intermediate/dlp-scanner/pyproject.toml new file mode 100644 index 0000000..637bf74 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/pyproject.toml @@ -0,0 +1,181 @@ +# ©AngelaMos | 2026 +# pyproject.toml + +[project] +name = "dlp-scanner" +version = "0.1.0" +description = "Data Loss Prevention scanner for files, databases, and network traffic" +requires-python = ">=3.12" +dependencies = [ + "typer>=0.15.0", + "rich>=14.0.0", + "structlog>=25.0.0", + "pydantic>=2.10.0", + "orjson>=3.10.0", + "ruamel.yaml>=0.18.0", + "pymupdf>=1.25.0", + "python-docx>=1.1.0", + "openpyxl>=3.1.0", + "xlrd>=2.0.0", + "defusedxml>=0.7.0", + "lxml>=5.0.0", + "pyarrow>=16.0.0", + "fastavro>=1.9.0", + "extract-msg>=0.50.0", + "asyncpg>=0.30.0", + "aiomysql>=0.2.0", + "pymongo>=4.10.0", + "aiosqlite>=0.21.0", + "dpkt>=1.9.0", +] + +[project.scripts] +dlp-scan = "dlp_scanner.cli:app" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/dlp_scanner"] + +[dependency-groups] +dev = [ + "ruff>=0.11.0", + "mypy>=1.15.0", + "yapf>=0.43.0", + "pytest>=8.3.0", + "pytest-asyncio>=0.25.0", + "pytest-cov>=6.0.0", + "hypothesis>=6.130.0", +] + +[tool.ruff] +line-length = 75 +indent-width = 4 +target-version = "py312" +src = ["src"] + +[tool.ruff.lint] +select = [ + "E", + "W", + "F", + "B", + "S", + "C90", + "N", + "UP", + "SIM", + "PTH", + "PERF", + "RUF", + "PL", + "TRY", + "LOG", +] +ignore = [ + "S101", + "S112", + "TRY003", + "PLR2004", + "PLR0913", + "PLR0911", + "PLC0415", + "PTH123", + "PERF401", + "E501", +] + +[tool.ruff.lint.per-file-ignores] +"tests/**/*.py" = ["S101", "PLR2004", "S104", "S105", "S106"] +"src/dlp_scanner/detectors/rules/**/*.py" = ["E501"] +"src/dlp_scanner/scanners/db_scanner.py" = ["S608"] +"src/dlp_scanner/network/protocols.py" = ["S110"] +"src/dlp_scanner/extractors/structured.py" = ["N817"] + +[tool.ruff.lint.mccabe] +max-complexity = 12 + +[tool.mypy] +python_version = "3.12" +strict = true +warn_return_any = true +warn_unused_configs = true +show_error_codes = true +show_column_numbers = true +pretty = true +mypy_path = "src" + +[[tool.mypy.overrides]] +module = [ + "dpkt.*", + "extract_msg.*", + "fastavro.*", + "xlrd.*", + "docx.*", + "openpyxl.*", + "fitz.*", + "defusedxml.*", + "lxml.*", + "aiomysql.*", + "pymongo.*", + "asyncpg.*", + "aiosqlite.*", + "pyarrow.*", +] +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = [ + "dlp_scanner.extractors.email", + "dlp_scanner.extractors.office", + "dlp_scanner.extractors.structured", + "dlp_scanner.scanners.db_scanner", + "dlp_scanner.scanners.network_scanner", + "dlp_scanner.network.pcap", +] +disallow_any_expr = false +warn_return_any = false +disable_error_code = ["attr-defined", "unused-coroutine", "no-untyped-call", "import-untyped"] + +[[tool.mypy.overrides]] +module = "tests.*" +disallow_untyped_defs = false + +[tool.pytest.ini_options] +testpaths = ["tests"] +asyncio_mode = "auto" +addopts = [ + "--strict-markers", + "--tb=short", +] +markers = [ + "unit: fast unit tests with no I/O", + "integration: tests requiring real file system or DB", + "slow: long-running tests", +] + +[tool.coverage.run] +source = ["src"] +branch = true +omit = [ + "*/tests/*", + "src/dlp_scanner/extractors/pdf.py", + "src/dlp_scanner/extractors/office.py", + "src/dlp_scanner/extractors/archive.py", + "src/dlp_scanner/extractors/email.py", + "src/dlp_scanner/network/pcap.py", + "src/dlp_scanner/scanners/network_scanner.py", + "src/dlp_scanner/scanners/db_scanner.py", + "src/dlp_scanner/reporters/base.py", + "src/dlp_scanner/scanners/base.py", +] + +[tool.coverage.report] +exclude_lines = [ + "pragma: no cover", + "if TYPE_CHECKING:", + "raise NotImplementedError", + "\\.\\.\\.", +] diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/__init__.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/__init__.py new file mode 100644 index 0000000..4e0a5ce --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/__init__.py @@ -0,0 +1,6 @@ +""" +©AngelaMos | 2026 +__init__.py +""" + +__version__ = "0.1.0" diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/cli.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/cli.py new file mode 100644 index 0000000..1cff363 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/cli.py @@ -0,0 +1,73 @@ +""" +©AngelaMos | 2026 +cli.py +""" + + +from typing import Annotated + +import typer + +from dlp_scanner import __version__ +from dlp_scanner.commands.report import report_app +from dlp_scanner.commands.scan import register + + +app = typer.Typer( + name = "dlp-scan", + help = ( + "Data Loss Prevention scanner for files, " + "databases, and network traffic" + ), + no_args_is_help = True, +) + + +def _version_callback(value: bool) -> None: + """ + Print version and exit + """ + if value: + typer.echo(f"dlp-scanner {__version__}") + raise typer.Exit() + + +@app.callback() +def main( + ctx: typer.Context, + config: Annotated[ + str, + typer.Option( + "--config", + "-c", + help = "Path to config YAML file", + ), + ] = "", + verbose: Annotated[ + bool, + typer.Option( + "--verbose", + "-v", + help = "Enable verbose output", + ), + ] = False, + version: Annotated[ + bool, + typer.Option( + "--version", + callback = _version_callback, + is_eager = True, + help = "Show version and exit", + ), + ] = False, +) -> None: + """ + DLP Scanner - detect sensitive data across files, databases, and network captures + """ + ctx.ensure_object(dict) + ctx.obj["config_path"] = config + ctx.obj["verbose"] = verbose + + +register(app) +app.add_typer(report_app, name = "report") diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/commands/__init__.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/commands/__init__.py new file mode 100644 index 0000000..e1add2a --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/commands/__init__.py @@ -0,0 +1,4 @@ +""" +©AngelaMos | 2026 +__init__.py +""" diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/commands/report.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/commands/report.py new file mode 100644 index 0000000..bae4109 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/commands/report.py @@ -0,0 +1,187 @@ +""" +©AngelaMos | 2026 +report.py +""" + + +from datetime import datetime +from pathlib import Path +from typing import Annotated, Any + +import orjson +import typer + +from dlp_scanner.models import ( + Finding, + Location, + ScanResult, +) + + +report_app = typer.Typer(help = "Report conversion and summary") + +VALID_FORMATS: frozenset[str] = frozenset( + { + "console", + "json", + "sarif", + "csv", + } +) + + +@report_app.command("convert") +def convert( + input_file: Annotated[ + str, + typer.Argument(help = "JSON scan results file"), + ], + output_format: Annotated[ + str, + typer.Option( + "--format", + "-f", + help = "Target format (json, sarif, csv)", + ), + ] = "sarif", + output_file: Annotated[ + str, + typer.Option( + "--output", + "-o", + help = "Write converted report to file", + ), + ] = "", +) -> None: + """ + Convert a JSON scan result to another format + """ + from dlp_scanner.config import ScanConfig + from dlp_scanner.engine import ScanEngine + + if output_format not in VALID_FORMATS: + typer.echo( + f"Invalid format: {output_format}", + err = True, + ) + raise typer.Exit(code = 1) + + path = Path(input_file) + if not path.exists(): + typer.echo( + f"File not found: {input_file}", + err = True, + ) + raise typer.Exit(code = 1) + + raw = path.read_bytes() + data = orjson.loads(raw) + result = _rebuild_result(data) + + config = ScanConfig() + engine = ScanEngine(config) + + output = engine.generate_report(result, output_format) + + if output_file: + Path(output_file).write_text(output) + typer.echo(f"Converted report written to " + f"{output_file}") + else: + typer.echo(output) + + +@report_app.command("summary") +def summary( + input_file: Annotated[ + str, + typer.Argument(help = "JSON scan results file"), + ], +) -> None: + """ + Print summary statistics from a scan result file + """ + path = Path(input_file) + if not path.exists(): + typer.echo( + f"File not found: {input_file}", + err = True, + ) + raise typer.Exit(code = 1) + + raw = path.read_bytes() + data = orjson.loads(raw) + result = _rebuild_result(data) + + from dlp_scanner.reporters.console import ( + ConsoleReporter, + ) + + reporter = ConsoleReporter() + reporter.display(result) + + +def _rebuild_result( + data: dict[str, + Any], +) -> ScanResult: + """ + Rebuild a ScanResult from deserialized JSON report + """ + meta = data.get("scan_metadata", {}) + result = ScanResult( + targets_scanned = meta.get("targets_scanned", + 0), + ) + result.scan_id = meta.get("scan_id", result.scan_id) + + if meta.get("scan_completed_at"): + result.scan_completed_at = ( + datetime.fromisoformat(meta["scan_completed_at"]) + ) + + result.errors = meta.get("errors", []) + + for f_data in data.get("findings", []): + loc_data = f_data.get("location", {}) + location = Location( + source_type = loc_data.get("source_type", + "file"), + uri = loc_data.get("uri", + ""), + line = loc_data.get("line"), + column = loc_data.get("column"), + table_name = loc_data.get("table_name"), + column_name = loc_data.get("column_name"), + ) + + finding = Finding( + rule_id = f_data.get("rule_id", + ""), + rule_name = f_data.get("rule_name", + ""), + severity = f_data.get("severity", + "low"), + confidence = f_data.get("confidence", + 0.0), + location = location, + redacted_snippet = f_data.get("redacted_snippet", + ""), + compliance_frameworks = f_data.get( + "compliance_frameworks", + [] + ), + remediation = f_data.get("remediation", + ""), + ) + + if f_data.get("finding_id"): + finding.finding_id = f_data["finding_id"] + if f_data.get("detected_at"): + finding.detected_at = ( + datetime.fromisoformat(f_data["detected_at"]) + ) + + result.findings.append(finding) + + return result diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/commands/scan.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/commands/scan.py new file mode 100644 index 0000000..cd3fd36 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/commands/scan.py @@ -0,0 +1,195 @@ +""" +©AngelaMos | 2026 +scan.py +""" + + +from pathlib import Path +from typing import Annotated, Any + +import typer + + +FORMAT_HELP: str = ("Output format (console, json, sarif, csv)") +OUTPUT_HELP: str = "Write report to file" +VALID_FORMATS: frozenset[str] = frozenset( + { + "console", + "json", + "sarif", + "csv", + } +) + + +def scan_file( + ctx: typer.Context, + target: Annotated[ + str, + typer.Argument(help = "File or directory path"), + ], + output_format: Annotated[ + str, + typer.Option( + "--format", + "-f", + help = FORMAT_HELP, + ), + ] = "console", + output_file: Annotated[ + str, + typer.Option( + "--output", + "-o", + help = OUTPUT_HELP, + ), + ] = "", +) -> None: + """ + Scan files and directories for sensitive data + """ + _run_scan(ctx, "file", target, output_format, output_file) + + +def scan_db( + ctx: typer.Context, + target: Annotated[ + str, + typer.Argument(help = "Database connection URI"), + ], + output_format: Annotated[ + str, + typer.Option( + "--format", + "-f", + help = FORMAT_HELP, + ), + ] = "console", + output_file: Annotated[ + str, + typer.Option( + "--output", + "-o", + help = OUTPUT_HELP, + ), + ] = "", +) -> None: + """ + Scan database tables for sensitive data + """ + _run_scan(ctx, "db", target, output_format, output_file) + + +def scan_network( + ctx: typer.Context, + target: Annotated[ + str, + typer.Argument(help = "PCAP file path"), + ], + output_format: Annotated[ + str, + typer.Option( + "--format", + "-f", + help = FORMAT_HELP, + ), + ] = "console", + output_file: Annotated[ + str, + typer.Option( + "--output", + "-o", + help = OUTPUT_HELP, + ), + ] = "", +) -> None: + """ + Scan network capture files for sensitive data in transit + """ + _run_scan( + ctx, + "network", + target, + output_format, + output_file, + ) + + +def register(app: typer.Typer) -> None: + """ + Register scan commands on the root app + """ + app.command("file")(scan_file) + app.command("db")(scan_db) + app.command("network")(scan_network) + + +def _run_scan( + ctx: typer.Context, + scan_type: str, + target: str, + output_format: str, + output_file: str, +) -> None: + """ + Shared scan execution logic + """ + from dlp_scanner.config import ( + ScanConfig, + load_config, + ) + from dlp_scanner.engine import ScanEngine + from dlp_scanner.log import configure_logging + + if output_format not in VALID_FORMATS: + typer.echo( + f"Invalid format: {output_format}. " + f"Choose from: " + f"{', '.join(sorted(VALID_FORMATS))}", + err = True, + ) + raise typer.Exit(code = 1) + + obj: dict[str, Any] = ctx.ensure_object(dict) + config_path: str = obj.get("config_path", "") + verbose: bool = obj.get("verbose", False) + + if verbose: + configure_logging(level = "DEBUG") + elif output_format == "console": + configure_logging(level = "INFO") + else: + configure_logging(level = "WARNING") + + config: ScanConfig + cfg_path = Path(config_path) if config_path else None + if cfg_path and cfg_path.exists(): + config = load_config(cfg_path) + else: + config = ScanConfig() + + config.output.format = output_format + if output_file: + config.output.output_file = output_file + + engine = ScanEngine(config) + + scan_methods = { + "file": engine.scan_files, + "db": engine.scan_database, + "network": engine.scan_network, + } + + result = scan_methods[scan_type](target) + + if output_file: + engine.write_report(result, output_file) + typer.echo(f"Report written to {output_file}") + elif output_format == "console": + engine.display_console(result) + else: + output = engine.generate_report(result) + typer.echo(output) + + if result.errors: + raise typer.Exit(code = 1) diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/compliance.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/compliance.py new file mode 100644 index 0000000..8761f09 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/compliance.py @@ -0,0 +1,245 @@ +""" +©AngelaMos | 2026 +compliance.py +""" + + +from dlp_scanner.constants import ( + SEVERITY_SCORE_THRESHOLDS, + Severity, +) + + +RULE_FRAMEWORK_MAP: dict[str, + list[str]] = { + "PII_SSN": ["HIPAA", + "CCPA", + "GLBA", + "GDPR"], + "PII_EMAIL": ["GDPR", + "CCPA"], + "PII_PHONE": ["GDPR", + "CCPA", + "HIPAA"], + "PII_PHONE_INTL": ["GDPR", + "CCPA"], + "PII_PASSPORT_US": ["GDPR", + "CCPA"], + "PII_PASSPORT_UK": ["GDPR"], + "PII_DRIVERS_LICENSE": ["CCPA", + "HIPAA"], + "PII_DRIVERS_LICENSE_FL": ["CCPA", + "HIPAA"], + "PII_DRIVERS_LICENSE_IL": ["CCPA", + "HIPAA"], + "PII_IPV4": ["GDPR"], + "FIN_CREDIT_CARD_VISA": ["PCI_DSS", + "GLBA"], + "FIN_CREDIT_CARD_MC": ["PCI_DSS", + "GLBA"], + "FIN_CREDIT_CARD_AMEX": ["PCI_DSS", + "GLBA"], + "FIN_CREDIT_CARD_DISC": ["PCI_DSS", + "GLBA"], + "FIN_IBAN": ["GDPR", + "GLBA"], + "FIN_NHS_NUMBER": ["GDPR"], + "CRED_AWS_ACCESS_KEY": [], + "CRED_GITHUB_TOKEN": [], + "CRED_GITHUB_FINE_GRAINED": [], + "CRED_GITHUB_OAUTH": [], + "CRED_GITHUB_APP": [], + "CRED_JWT": [], + "CRED_STRIPE_KEY": [], + "CRED_SLACK_TOKEN": [], + "CRED_GENERIC_API_KEY": [], + "CRED_PRIVATE_KEY": [], + "HEALTH_MEDICAL_RECORD": ["HIPAA"], + "HEALTH_DEA_NUMBER": ["HIPAA"], + "HEALTH_NPI": ["HIPAA"], + "NET_HIGH_ENTROPY": [], + "NET_DNS_EXFIL_LONG_LABEL": [], + "NET_DNS_EXFIL_HIGH_ENTROPY": [], + "NET_DNS_EXFIL_LONG_QNAME": [], + "NET_DNS_EXFIL_TXT_VOLUME": [], + "NET_ENCODED_BASE64": [], + "NET_ENCODED_HEX": [], + } + +RULE_REMEDIATION_MAP: dict[ + str, + str] = { + "PII_SSN": ( + "Remove or encrypt SSNs. Use tokenization " + "for storage. Never store in plaintext." + ), + "PII_EMAIL": ( + "Evaluate if email storage is necessary. " + "Hash or pseudonymize where possible." + ), + "PII_PHONE": ( + "Restrict access to phone number fields. " + "Consider masking in non-production environments." + ), + "PII_PHONE_INTL": ( + "Restrict access to phone number fields. " + "Consider masking in non-production environments." + ), + "PII_PASSPORT_US": ( + "Passport numbers must be encrypted at rest. " + "Limit access to identity verification systems." + ), + "PII_PASSPORT_UK": ( + "Passport numbers must be encrypted at rest. " + "Limit access to identity verification systems." + ), + "PII_IPV4": ( + "Evaluate whether IP address storage is necessary. " + "Anonymize or pseudonymize where possible." + ), + "PII_DRIVERS_LICENSE": ( + "Encrypt driver's license numbers at rest. " + "Restrict access per CCPA/HIPAA requirements." + ), + "PII_DRIVERS_LICENSE_FL": ( + "Encrypt driver's license numbers at rest. " + "Restrict access per CCPA/HIPAA requirements." + ), + "PII_DRIVERS_LICENSE_IL": ( + "Encrypt driver's license numbers at rest. " + "Restrict access per CCPA/HIPAA requirements." + ), + "FIN_CREDIT_CARD_VISA": ( + "PCI-DSS requires PANs to be encrypted, hashed, " + "or truncated. Never store in plaintext." + ), + "FIN_CREDIT_CARD_MC": ( + "PCI-DSS requires PANs to be encrypted, hashed, " + "or truncated. Never store in plaintext." + ), + "FIN_CREDIT_CARD_AMEX": ( + "PCI-DSS requires PANs to be encrypted, hashed, " + "or truncated. Never store in plaintext." + ), + "FIN_CREDIT_CARD_DISC": ( + "PCI-DSS requires PANs to be encrypted, hashed, " + "or truncated. Never store in plaintext." + ), + "FIN_IBAN": ( + "Encrypt IBAN numbers at rest. " + "Restrict access to financial systems." + ), + "FIN_NHS_NUMBER": ( + "NHS numbers are personal data under UK GDPR. " + "Encrypt at rest and restrict access." + ), + "CRED_AWS_ACCESS_KEY": ( + "Rotate exposed AWS credentials immediately. " + "Use IAM roles or Vault dynamic secrets." + ), + "CRED_GITHUB_TOKEN": ( + "Revoke the token at github.com/settings/tokens. " + "Use environment variables, not hardcoded values." + ), + "CRED_GITHUB_FINE_GRAINED": ( + "Revoke the token at github.com/settings/tokens. " + "Use environment variables, not hardcoded values." + ), + "CRED_GITHUB_OAUTH": ( + "Revoke the OAuth token in GitHub settings. " + "Store tokens in a secrets manager." + ), + "CRED_GITHUB_APP": ( + "Revoke the app installation token. " + "Rotate app private keys if compromised." + ), + "CRED_JWT": ( + "Rotate the signing key if the JWT secret is " + "exposed. Never hardcode tokens in source." + ), + "CRED_STRIPE_KEY": ( + "Rotate the Stripe key at dashboard.stripe.com. " + "Use restricted keys with minimal permissions." + ), + "CRED_SLACK_TOKEN": ( + "Revoke the Slack token in workspace settings. " + "Use environment variables for bot tokens." + ), + "CRED_GENERIC_API_KEY": ( + "Rotate the exposed API key immediately. " + "Store secrets in a vault, not in source code." + ), + "CRED_PRIVATE_KEY": ( + "Rotate the compromised key pair. Store private " + "keys in a secrets manager, never in source code." + ), + "HEALTH_MEDICAL_RECORD": ( + "MRNs are PHI under HIPAA. Encrypt at rest and " + "apply minimum necessary access controls." + ), + "HEALTH_DEA_NUMBER": ( + "DEA numbers identify prescribers of controlled " + "substances. Encrypt and restrict access per HIPAA." + ), + "HEALTH_NPI": ( + "NPIs are provider identifiers under HIPAA. " + "Restrict access to authorized systems only." + ), + "NET_HIGH_ENTROPY": ( + "High entropy data may indicate encrypted or " + "compressed secrets in transit. Investigate the flow." + ), + "NET_DNS_EXFIL_LONG_LABEL": ( + "Unusually long DNS labels may indicate DNS " + "tunneling. Investigate the queried domain." + ), + "NET_DNS_EXFIL_HIGH_ENTROPY": ( + "High-entropy DNS subdomains suggest data " + "exfiltration via DNS tunneling. Block the domain." + ), + "NET_DNS_EXFIL_LONG_QNAME": ( + "Excessively long DNS QNAMEs may carry encoded " + "data. Investigate and block suspicious domains." + ), + "NET_DNS_EXFIL_TXT_VOLUME": ( + "High ratio of TXT queries to a domain suggests " + "DNS-based command and control. Investigate traffic." + ), + "NET_ENCODED_BASE64": ( + "Base64-encoded payloads in network traffic may " + "carry exfiltrated data. Inspect the content." + ), + "NET_ENCODED_HEX": ( + "Hex-encoded payloads in network traffic may " + "indicate data exfiltration. Inspect the content." + ), + } + +DEFAULT_REMEDIATION: str = ( + "Review and restrict access to this data. " + "Apply encryption at rest if required by policy." +) + + +def get_frameworks_for_rule(rule_id: str) -> list[str]: + """ + Return applicable compliance frameworks for a rule + """ + return RULE_FRAMEWORK_MAP.get(rule_id, []) + + +def get_remediation_for_rule(rule_id: str) -> str: + """ + Return remediation guidance for a rule + """ + return RULE_REMEDIATION_MAP.get(rule_id, DEFAULT_REMEDIATION) + + +def score_to_severity(score: float) -> Severity: + """ + Convert a confidence score to a severity level + """ + for threshold, severity in SEVERITY_SCORE_THRESHOLDS: + if score >= threshold: + return severity + return "low" diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/config.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/config.py new file mode 100644 index 0000000..7381399 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/config.py @@ -0,0 +1,173 @@ +""" +©AngelaMos | 2026 +config.py +""" + + +from pathlib import Path +from typing import Any + +from pydantic import BaseModel, Field +from ruamel.yaml import YAML + +from dlp_scanner.constants import ( + DEFAULT_CONTEXT_WINDOW_TOKENS, + DEFAULT_DB_MAX_ROWS, + DEFAULT_DB_SAMPLE_PERCENTAGE, + DEFAULT_DB_TIMEOUT_SECONDS, + DEFAULT_DNS_ENTROPY_THRESHOLD, + DEFAULT_ENTROPY_THRESHOLD, + DEFAULT_EXCLUDE_PATTERNS, + DEFAULT_MAX_FILE_SIZE_MB, + DEFAULT_MIN_CONFIDENCE, + SCANNABLE_EXTENSIONS, + OutputFormat, + RedactionStyle, + Severity, +) + + +class FileScanConfig(BaseModel): + """ + Configuration for file scanning + """ + max_file_size_mb: int = DEFAULT_MAX_FILE_SIZE_MB + recursive: bool = True + exclude_patterns: list[str] = Field( + default_factory = lambda: list(DEFAULT_EXCLUDE_PATTERNS) + ) + include_extensions: list[str] = Field( + default_factory = lambda: sorted(SCANNABLE_EXTENSIONS) + ) + + +class DatabaseScanConfig(BaseModel): + """ + Configuration for database scanning + """ + sample_percentage: int = DEFAULT_DB_SAMPLE_PERCENTAGE + max_rows_per_table: int = DEFAULT_DB_MAX_ROWS + timeout_seconds: int = DEFAULT_DB_TIMEOUT_SECONDS + exclude_tables: list[str] = Field(default_factory = list) + include_tables: list[str] = Field(default_factory = list) + + +class NetworkScanConfig(BaseModel): + """ + Configuration for network traffic scanning + """ + bpf_filter: str = "" + entropy_threshold: float = DEFAULT_ENTROPY_THRESHOLD + dns_label_entropy_threshold: float = (DEFAULT_DNS_ENTROPY_THRESHOLD) + max_packets: int = 0 + + +class AllowlistConfig(BaseModel): + """ + Allowlists for suppressing known false positives + """ + values: list[str] = Field(default_factory = list) + domains: list[str] = Field(default_factory = list) + file_patterns: list[str] = Field(default_factory = list) + + +class DetectionConfig(BaseModel): + """ + Configuration for detection behavior + """ + min_confidence: float = DEFAULT_MIN_CONFIDENCE + severity_threshold: Severity = "low" + context_window_tokens: int = (DEFAULT_CONTEXT_WINDOW_TOKENS) + enable_rules: list[str] = Field(default_factory = lambda: ["*"]) + disable_rules: list[str] = Field(default_factory = list) + allowlists: AllowlistConfig = Field(default_factory = AllowlistConfig) + + +class ComplianceConfig(BaseModel): + """ + Configuration for compliance framework mapping + """ + frameworks: list[str] = Field( + default_factory = lambda: [ + "HIPAA", + "PCI_DSS", + "GDPR", + "CCPA",] + ) + + +class OutputConfig(BaseModel): + """ + Configuration for output and reporting + """ + format: OutputFormat = "console" + output_file: str = "" + redaction_style: RedactionStyle = "partial" + verbose: bool = False + color: bool = True + + +class LoggingConfig(BaseModel): + """ + Configuration for logging behavior + """ + level: str = "INFO" + json_output: bool = False + log_file: str = "" + + +class ScanConfig(BaseModel): + """ + Root configuration model for the DLP scanner + """ + file: FileScanConfig = Field(default_factory = FileScanConfig) + database: DatabaseScanConfig = Field( + default_factory = DatabaseScanConfig + ) + network: NetworkScanConfig = Field(default_factory = NetworkScanConfig) + detection: DetectionConfig = Field(default_factory = DetectionConfig) + compliance: ComplianceConfig = Field( + default_factory = ComplianceConfig + ) + output: OutputConfig = Field(default_factory = OutputConfig) + logging: LoggingConfig = Field(default_factory = LoggingConfig) + + +def load_config(path: Path | None = None) -> ScanConfig: + """ + Load configuration from a YAML file or return defaults + """ + if path is None: + candidates = [ + Path(".dlp-scanner.yml"), + Path(".dlp-scanner.yaml"), + Path.home() / ".dlp-scanner.yml", + ] + for candidate in candidates: + if candidate.exists(): + path = candidate + break + + if path is None or not path.exists(): + return ScanConfig() + + yaml = YAML(typ = "safe") + raw: dict[str, Any] = yaml.load(path) or {} + + scan_section = raw.get("scan", {}) + return ScanConfig( + file = FileScanConfig(**scan_section.get("file", + {})), + database = DatabaseScanConfig(**scan_section.get("database", + {})), + network = NetworkScanConfig(**scan_section.get("network", + {})), + detection = DetectionConfig(**raw.get("detection", + {})), + compliance = ComplianceConfig(**raw.get("compliance", + {})), + output = OutputConfig(**raw.get("output", + {})), + logging = LoggingConfig(**raw.get("logging", + {})), + ) diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/constants.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/constants.py new file mode 100644 index 0000000..41bf385 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/constants.py @@ -0,0 +1,171 @@ +""" +©AngelaMos | 2026 +constants.py +""" + + +from typing import Literal + + +Severity = Literal["critical", "high", "medium", "low"] +OutputFormat = Literal["console", "json", "sarif", "csv"] +RedactionStyle = Literal["partial", "full", "none"] +ScanTargetType = Literal["file", "database", "network"] + +SEVERITY_ORDER: dict[Severity, + int] = { + "critical": 0, + "high": 1, + "medium": 2, + "low": 3, + } + +SEVERITY_SCORE_THRESHOLDS: list[tuple[float, + Severity]] = [ + (0.85, + "critical"), + (0.65, + "high"), + (0.40, + "medium"), + (0.20, + "low"), + ] + +COMPLIANCE_FRAMEWORKS: list[str] = [ + "HIPAA", + "PCI_DSS", + "GDPR", + "CCPA", + "SOX", + "GLBA", + "FERPA", +] + +DEFAULT_CONTEXT_WINDOW_TOKENS: int = 10 +DEFAULT_MIN_CONFIDENCE: float = 0.20 +DEFAULT_ENTROPY_THRESHOLD: float = 7.2 +DEFAULT_DNS_ENTROPY_THRESHOLD: float = 4.0 +DEFAULT_MAX_FILE_SIZE_MB: int = 100 +DEFAULT_DB_SAMPLE_PERCENTAGE: int = 5 +DEFAULT_DB_MAX_ROWS: int = 10000 +DEFAULT_DB_TIMEOUT_SECONDS: int = 30 + +CHECKSUM_BOOST: float = 0.30 +CONTEXT_BOOST_MAX: float = 0.35 +CONTEXT_BOOST_MIN_FLOOR: float = 0.40 +COOCCURRENCE_BOOST: float = 0.15 + +KNOWN_TEST_VALUES: frozenset[str] = frozenset( + { + "123-45-6789", + "000-00-0000", + "078-05-1120", + "219-09-9999", + "4111111111111111", + "5500000000000004", + "340000000000009", + "6011000000000004", + "test@example.com", + "user@test.com", + } +) + +DEFAULT_EXCLUDE_PATTERNS: list[str] = [ + "*.pyc", + "__pycache__", + ".git", + "node_modules", + ".venv", + ".env", + "*.egg-info", +] + +SCANNABLE_EXTENSIONS: frozenset[str] = frozenset( + { + ".pdf", + ".docx", + ".xlsx", + ".xls", + ".csv", + ".json", + ".xml", + ".yaml", + ".yml", + ".txt", + ".log", + ".cfg", + ".ini", + ".toml", + ".conf", + ".eml", + ".msg", + ".parquet", + ".avro", + ".md", + ".rst", + ".html", + ".htm", + ".tsv", + ".py", + ".js", + ".ts", + ".go", + ".rb", + ".java", + ".c", + ".cpp", + ".h", + ".hpp", + ".rs", + ".env", + ".sh", + ".bat", + ".ps1", + ".tf", + ".hcl", + } +) + +TEXT_DB_COLUMN_TYPES_PG: frozenset[str] = frozenset( + { + "text", + "character varying", + "character", + "json", + "jsonb", + "varchar", + } +) + +TEXT_DB_COLUMN_TYPES_MYSQL: frozenset[str] = frozenset( + { + "varchar", + "text", + "mediumtext", + "longtext", + "json", + "char", + "tinytext", + } +) + +SEVERITY_COLORS: dict[Severity, + str] = { + "critical": "bold red", + "high": "red", + "medium": "yellow", + "low": "green", + } + +SARIF_SEVERITY_MAP: dict[Severity, + str] = { + "critical": "error", + "high": "error", + "medium": "warning", + "low": "note", + } + +MAX_ARCHIVE_DEPTH: int = 3 +MAX_ARCHIVE_MEMBER_SIZE_MB: int = 50 +ZIP_BOMB_RATIO_THRESHOLD: int = 100 diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/__init__.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/__init__.py new file mode 100644 index 0000000..e1add2a --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/__init__.py @@ -0,0 +1,4 @@ +""" +©AngelaMos | 2026 +__init__.py +""" diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/base.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/base.py new file mode 100644 index 0000000..d843843 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/base.py @@ -0,0 +1,51 @@ +""" +©AngelaMos | 2026 +base.py +""" + + +import re +from dataclasses import dataclass, field +from typing import Protocol +from collections.abc import Callable + + +@dataclass(frozen = True, slots = True) +class DetectionRule: + """ + A single detection rule combining regex, validation, and context + """ + rule_id: str + rule_name: str + pattern: re.Pattern[str] + base_score: float + context_keywords: list[str] = field(default_factory = list) + validator: Callable[[str], bool] | None = None + compliance_frameworks: list[str] = field(default_factory = list) + severity_override: str | None = None + + +@dataclass(frozen = True, slots = True) +class DetectorMatch: + """ + A raw match from a detector before scoring + """ + rule_id: str + rule_name: str + start: int + end: int + matched_text: str + score: float + context_keywords: list[str] = field(default_factory = list) + compliance_frameworks: list[str] = field(default_factory = list) + + +class Detector(Protocol): + """ + Protocol for all detection strategies + """ + def detect(self, text: str) -> list[DetectorMatch]: + """ + Scan text and return all matches + """ + ... diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/context.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/context.py new file mode 100644 index 0000000..7330e21 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/context.py @@ -0,0 +1,142 @@ +""" +©AngelaMos | 2026 +context.py +""" + + +from dlp_scanner.constants import ( + CONTEXT_BOOST_MAX, + CONTEXT_BOOST_MIN_FLOOR, + COOCCURRENCE_BOOST, + DEFAULT_CONTEXT_WINDOW_TOKENS, +) +from dlp_scanner.detectors.base import DetectorMatch + + +def apply_context_boost( + text: str, + matches: list[DetectorMatch], + window_tokens: int = DEFAULT_CONTEXT_WINDOW_TOKENS, +) -> list[DetectorMatch]: + """ + Boost match scores based on nearby context keywords + """ + if not matches: + return matches + + tokens = text.lower().split() + boosted: list[DetectorMatch] = [] + + for match in matches: + if not match.context_keywords: + boosted.append(match) + continue + + char_to_token = _char_offset_to_token_index(text, match.start) + window_start = max(0, char_to_token - window_tokens) + window_end = min(len(tokens), char_to_token + window_tokens) + window_text = " ".join(tokens[window_start : window_end]) + + boost = _compute_keyword_boost( + window_text, + match.context_keywords, + window_tokens, + ) + + new_score = min(1.0, match.score + boost) + if boost > 0 and new_score < CONTEXT_BOOST_MIN_FLOOR: + new_score = CONTEXT_BOOST_MIN_FLOOR + + boosted.append( + DetectorMatch( + rule_id = match.rule_id, + rule_name = match.rule_name, + start = match.start, + end = match.end, + matched_text = match.matched_text, + score = new_score, + context_keywords = match.context_keywords, + compliance_frameworks = match.compliance_frameworks, + ) + ) + + return _apply_cooccurrence_boost(boosted) + + +def _compute_keyword_boost( + window_text: str, + keywords: list[str], + window_tokens: int, +) -> float: + """ + Compute score boost based on keyword proximity + """ + best_boost = 0.0 + + for keyword in keywords: + kw_lower = keyword.lower() + pos = window_text.find(kw_lower) + if pos < 0: + continue + + center = len(window_text) // 2 + distance = abs(pos - center) + max_distance = window_tokens * 5 + + proximity_factor = 1.0 - min(1.0, distance / max(1, max_distance)) + boost = CONTEXT_BOOST_MAX * proximity_factor + best_boost = max(best_boost, boost) + + return best_boost + + +def _apply_cooccurrence_boost( + matches: list[DetectorMatch], +) -> list[DetectorMatch]: + """ + Boost scores when multiple PII types appear near each other + """ + if len(matches) < 2: + return matches + + proximity_threshold = 500 + boosted: list[DetectorMatch] = [] + + for i, match in enumerate(matches): + has_neighbor = False + for j, other in enumerate(matches): + if i == j: + continue + if other.rule_id == match.rule_id: + continue + distance = abs(match.start - other.start) + if distance < proximity_threshold: + has_neighbor = True + break + + if has_neighbor: + new_score = min(1.0, match.score + COOCCURRENCE_BOOST) + boosted.append( + DetectorMatch( + rule_id = match.rule_id, + rule_name = match.rule_name, + start = match.start, + end = match.end, + matched_text = match.matched_text, + score = new_score, + context_keywords = match.context_keywords, + compliance_frameworks = match.compliance_frameworks, + ) + ) + else: + boosted.append(match) + + return boosted + + +def _char_offset_to_token_index(text: str, char_offset: int) -> int: + """ + Convert a character offset to an approximate token index + """ + prefix = text[: char_offset] + return len(prefix.split()) diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/entropy.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/entropy.py new file mode 100644 index 0000000..5ed0385 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/entropy.py @@ -0,0 +1,126 @@ +""" +©AngelaMos | 2026 +entropy.py +""" + + +import math +from collections import Counter + +from dlp_scanner.constants import DEFAULT_ENTROPY_THRESHOLD +from dlp_scanner.detectors.base import DetectorMatch + + +WINDOW_SIZE: int = 256 +WINDOW_STEP: int = 128 + + +def shannon_entropy(data: bytes) -> float: + """ + Calculate Shannon entropy in bits per byte + """ + if not data: + return 0.0 + + counts = Counter(data) + total = len(data) + return -sum( + (c / total) * math.log2(c / total) for c in counts.values() + ) + + +def shannon_entropy_str(text: str) -> float: + """ + Calculate Shannon entropy for a string + """ + return shannon_entropy(text.encode("utf-8")) + + +def detect_high_entropy_regions( + data: bytes, + threshold: float = DEFAULT_ENTROPY_THRESHOLD, + window_size: int = WINDOW_SIZE, + step: int = WINDOW_STEP, +) -> list[tuple[int, + int, + float]]: + """ + Find regions of high entropy using a sliding window + + Returns list of (start_offset, end_offset, entropy_value) + """ + if len(data) < window_size: + h = shannon_entropy(data) + if h >= threshold: + return [(0, len(data), h)] + return [] + + regions: list[tuple[int, int, float]] = [] + i = 0 + + while i + window_size <= len(data): + window = data[i : i + window_size] + h = shannon_entropy(window) + + if h >= threshold: + end = i + window_size + while end + step <= len(data): + next_window = data[end - window_size + step : end + step] + next_h = shannon_entropy(next_window) + if next_h < threshold: + break + h = max(h, next_h) + end += step + + regions.append((i, end, h)) + i = end + else: + i += step + + return regions + + +class EntropyDetector: + """ + Detects high-entropy data that may indicate encrypted + or compressed content + """ + def __init__( + self, + threshold: float = DEFAULT_ENTROPY_THRESHOLD, + ) -> None: + self._threshold = threshold + + def detect(self, text: str) -> list[DetectorMatch]: + """ + Scan text for high-entropy regions + """ + data = text.encode("utf-8") + regions = detect_high_entropy_regions( + data, + threshold = self._threshold, + ) + + matches: list[DetectorMatch] = [] + for start, end, entropy_val in regions: + score = min( + 1.0, + (entropy_val - self._threshold) / + (8.0 - self._threshold) * 0.5 + 0.5, + ) + + matches.append( + DetectorMatch( + rule_id = "NET_HIGH_ENTROPY", + rule_name = "High Entropy Data", + start = start, + end = end, + matched_text = + f"[{end - start} bytes, H={entropy_val:.2f}]", + score = score, + context_keywords = [], + compliance_frameworks = [], + ) + ) + + return matches diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/pattern.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/pattern.py new file mode 100644 index 0000000..060c771 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/pattern.py @@ -0,0 +1,68 @@ +""" +©AngelaMos | 2026 +pattern.py +""" + + +from dlp_scanner.constants import CHECKSUM_BOOST, KNOWN_TEST_VALUES +from dlp_scanner.detectors.base import ( + DetectionRule, + DetectorMatch, +) + + +class PatternDetector: + """ + Detects sensitive data using regex patterns with optional + checksum validation + """ + def __init__( + self, + rules: list[DetectionRule], + allowlist_values: frozenset[str] | None = None, + ) -> None: + self._rules = rules + self._allowlist = allowlist_values or KNOWN_TEST_VALUES + + def detect(self, text: str) -> list[DetectorMatch]: + """ + Scan text against all registered patterns + """ + matches: list[DetectorMatch] = [] + + for rule in self._rules: + for m in rule.pattern.finditer(text): + matched_text = m.group() + + if self._is_allowlisted(matched_text): + continue + + score = rule.base_score + + if rule.validator is not None: + if rule.validator(matched_text): + score = min(1.0, score + CHECKSUM_BOOST) + else: + continue + + matches.append( + DetectorMatch( + rule_id = rule.rule_id, + rule_name = rule.rule_name, + start = m.start(), + end = m.end(), + matched_text = matched_text, + score = score, + context_keywords = rule.context_keywords, + compliance_frameworks = rule.compliance_frameworks, + ) + ) + + return matches + + def _is_allowlisted(self, value: str) -> bool: + """ + Check if a matched value is in the allowlist + """ + normalized = value.strip() + return normalized in self._allowlist diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/registry.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/registry.py new file mode 100644 index 0000000..55e2f46 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/registry.py @@ -0,0 +1,112 @@ +""" +©AngelaMos | 2026 +registry.py +""" + + +import fnmatch + +from dlp_scanner.detectors.base import ( + DetectionRule, + DetectorMatch, +) +from dlp_scanner.detectors.context import ( + apply_context_boost, +) +from dlp_scanner.detectors.entropy import EntropyDetector +from dlp_scanner.detectors.pattern import PatternDetector +from dlp_scanner.detectors.rules.credentials import ( + CREDENTIAL_RULES, +) +from dlp_scanner.detectors.rules.financial import ( + FINANCIAL_RULES, +) +from dlp_scanner.detectors.rules.health import HEALTH_RULES +from dlp_scanner.detectors.rules.pii import PII_RULES + + +ALL_RULES: list[DetectionRule] = [ + *PII_RULES, + *FINANCIAL_RULES, + *CREDENTIAL_RULES, + *HEALTH_RULES, +] + + +class DetectorRegistry: + """ + Central registry that loads, filters, and runs all detectors + """ + def __init__( + self, + enable_patterns: list[str] | None = None, + disable_patterns: list[str] | None = None, + allowlist_values: frozenset[str] | None = None, + context_window_tokens: int = 10, + entropy_threshold: float = 7.2, + enable_entropy: bool = True, + ) -> None: + active_rules = _filter_rules( + ALL_RULES, + enable_patterns or ["*"], + disable_patterns or [], + ) + + self._pattern_detector = PatternDetector( + rules = active_rules, + allowlist_values = allowlist_values, + ) + self._entropy_detector = ( + EntropyDetector(threshold = entropy_threshold) + if enable_entropy else None + ) + self._context_window = context_window_tokens + + def detect(self, text: str) -> list[DetectorMatch]: + """ + Run all detectors against text and return scored matches + """ + matches = self._pattern_detector.detect(text) + matches = apply_context_boost( + text, + matches, + window_tokens = self._context_window, + ) + + if self._entropy_detector is not None: + entropy_matches = (self._entropy_detector.detect(text)) + matches.extend(entropy_matches) + + return matches + + @property + def rule_count(self) -> int: + """ + Return the number of active pattern rules + """ + return len(self._pattern_detector._rules) + + +def _filter_rules( + rules: list[DetectionRule], + enable_patterns: list[str], + disable_patterns: list[str], +) -> list[DetectionRule]: + """ + Filter rules by enable/disable glob patterns + """ + filtered: list[DetectionRule] = [] + + for rule in rules: + enabled = any( + fnmatch.fnmatch(rule.rule_id, + pat) for pat in enable_patterns + ) + disabled = any( + fnmatch.fnmatch(rule.rule_id, + pat) for pat in disable_patterns + ) + if enabled and not disabled: + filtered.append(rule) + + return filtered diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/rules/__init__.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/rules/__init__.py new file mode 100644 index 0000000..e1add2a --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/rules/__init__.py @@ -0,0 +1,4 @@ +""" +©AngelaMos | 2026 +__init__.py +""" diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/rules/credentials.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/rules/credentials.py new file mode 100644 index 0000000..afffee1 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/rules/credentials.py @@ -0,0 +1,157 @@ +""" +©AngelaMos | 2026 +credentials.py +""" + + +import re + +from dlp_scanner.detectors.base import DetectionRule + + +AWS_ACCESS_KEY_PATTERN = re.compile(r"\b((?:AKIA|ASIA)[0-9A-Z]{16})\b") + +GITHUB_CLASSIC_PAT_PATTERN = re.compile(r"\bghp_[a-zA-Z0-9]{36}\b") + +GITHUB_FINE_GRAINED_PATTERN = re.compile( + r"\bgithub_pat_[a-zA-Z0-9]{22}_[a-zA-Z0-9]{59}\b" +) + +GITHUB_OAUTH_PATTERN = re.compile(r"\bgho_[a-zA-Z0-9]{36}\b") + +GITHUB_APP_PATTERN = re.compile(r"\bghs_[a-zA-Z0-9]{36}\b") + +JWT_PATTERN = re.compile( + r"\beyJ[a-zA-Z0-9_-]+\.eyJ[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+\b" +) + +STRIPE_KEY_PATTERN = re.compile( + r"\b(?:sk|pk)_(?:test|live)_[a-zA-Z0-9]{24,}\b" +) + +SLACK_TOKEN_PATTERN = re.compile(r"\bxox[baprs]-[a-zA-Z0-9\-]{10,48}\b") + +GENERIC_API_KEY_PATTERN = re.compile( + r"(?i)(?:api[_\-]?key|apikey|api[_\-]?token|access[_\-]?key|secret[_\-]?key)" + r"\s*[:=]\s*['\"]?" + r"([a-zA-Z0-9\-_.]{20,64})" + r"['\"]?" +) + +PRIVATE_KEY_PATTERN = re.compile( + r"-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----" +) + +API_KEY_CONTEXT = [ + "api_key", + "apikey", + "api key", + "secret", + "token", + "authorization", + "bearer", + "credential", + "password", + "access_key", +] + +CREDENTIAL_RULES: list[DetectionRule] = [ + DetectionRule( + rule_id = "CRED_AWS_ACCESS_KEY", + rule_name = "AWS Access Key ID", + pattern = AWS_ACCESS_KEY_PATTERN, + base_score = 0.85, + context_keywords = [ + "aws", + "amazon", + "access_key", + "aws_access_key_id", + ], + ), + DetectionRule( + rule_id = "CRED_GITHUB_TOKEN", + rule_name = "GitHub Personal Access Token", + pattern = GITHUB_CLASSIC_PAT_PATTERN, + base_score = 0.90, + context_keywords = ["github", + "token", + "pat"], + ), + DetectionRule( + rule_id = "CRED_GITHUB_FINE_GRAINED", + rule_name = "GitHub Fine-Grained PAT", + pattern = GITHUB_FINE_GRAINED_PATTERN, + base_score = 0.90, + context_keywords = ["github", + "token"], + ), + DetectionRule( + rule_id = "CRED_GITHUB_OAUTH", + rule_name = "GitHub OAuth Token", + pattern = GITHUB_OAUTH_PATTERN, + base_score = 0.90, + context_keywords = ["github", + "oauth"], + ), + DetectionRule( + rule_id = "CRED_GITHUB_APP", + rule_name = "GitHub App Token", + pattern = GITHUB_APP_PATTERN, + base_score = 0.90, + context_keywords = ["github", + "app"], + ), + DetectionRule( + rule_id = "CRED_JWT", + rule_name = "JSON Web Token", + pattern = JWT_PATTERN, + base_score = 0.70, + context_keywords = [ + "jwt", + "token", + "bearer", + "authorization", + ], + ), + DetectionRule( + rule_id = "CRED_STRIPE_KEY", + rule_name = "Stripe API Key", + pattern = STRIPE_KEY_PATTERN, + base_score = 0.90, + context_keywords = [ + "stripe", + "payment", + "api_key", + ], + ), + DetectionRule( + rule_id = "CRED_SLACK_TOKEN", + rule_name = "Slack Token", + pattern = SLACK_TOKEN_PATTERN, + base_score = 0.85, + context_keywords = [ + "slack", + "token", + "webhook", + ], + ), + DetectionRule( + rule_id = "CRED_GENERIC_API_KEY", + rule_name = "Generic API Key", + pattern = GENERIC_API_KEY_PATTERN, + base_score = 0.50, + context_keywords = API_KEY_CONTEXT, + ), + DetectionRule( + rule_id = "CRED_PRIVATE_KEY", + rule_name = "Private Key", + pattern = PRIVATE_KEY_PATTERN, + base_score = 0.95, + context_keywords = [ + "private key", + "rsa", + "ssh", + "certificate", + ], + ), +] diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/rules/financial.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/rules/financial.py new file mode 100644 index 0000000..8355801 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/rules/financial.py @@ -0,0 +1,184 @@ +""" +©AngelaMos | 2026 +financial.py +""" + + +import re + +from dlp_scanner.detectors.base import DetectionRule + + +VISA_PATTERN = re.compile( + r"\b4[0-9]{3}[-\s]?[0-9]{4}[-\s]?[0-9]{4}[-\s]?[0-9]{4}\b" +) + +MASTERCARD_PATTERN = re.compile( + r"\b(?:5[1-5][0-9]{2}|222[1-9]|22[3-9][0-9]|2[3-6][0-9]{2}|27[01][0-9]|2720)" + r"[-\s]?[0-9]{4}[-\s]?[0-9]{4}[-\s]?[0-9]{4}\b" +) + +AMEX_PATTERN = re.compile(r"\b3[47][0-9]{2}[-\s]?[0-9]{6}[-\s]?[0-9]{5}\b") + +DISCOVER_PATTERN = re.compile( + r"\b6(?:011|5[0-9]{2})[-\s]?[0-9]{4}[-\s]?[0-9]{4}[-\s]?[0-9]{4}\b" +) + +IBAN_PATTERN = re.compile( + r"\b[A-Z]{2}\d{2}[A-Z0-9]{4}\d{7}[A-Z0-9]{0,16}\b" +) + +NHS_PATTERN = re.compile(r"\b\d{3}[-\s]?\d{3}[-\s]?\d{4}\b") + + +def luhn_check(number: str) -> bool: + """ + Validate a number using the Luhn algorithm + """ + digits = [int(d) for d in number if d.isdigit()] + if len(digits) < 13: + return False + + odd_digits = digits[-1 ::-2] + even_digits = digits[-2 ::-2] + total = sum(odd_digits) + for d in even_digits: + total += sum(divmod(d * 2, 10)) + return total % 10 == 0 + + +def iban_check(value: str) -> bool: + """ + Validate an IBAN using the mod-97 algorithm + """ + cleaned = value.replace(" ", "").upper() + if len(cleaned) < 15 or len(cleaned) > 34: + return False + + rearranged = cleaned[4 :] + cleaned[: 4] + numeric = "" + for char in rearranged: + if char.isalpha(): + numeric += str(ord(char) - ord("A") + 10) + else: + numeric += char + + return int(numeric) % 97 == 1 + + +def nhs_check(value: str) -> bool: + """ + Validate a UK NHS number using mod-11 + """ + digits = value.replace("-", "").replace(" ", "") + if len(digits) != 10 or not digits.isdigit(): + return False + + weights = range(10, 1, -1) + total = sum( + int(d) * w for d, w in zip(digits[: 9], weights, strict = False) + ) + remainder = 11 - (total % 11) + if remainder == 11: + remainder = 0 + if remainder == 10: + return False + return remainder == int(digits[9]) + + +CREDIT_CARD_CONTEXT = [ + "credit card", + "card number", + "cc", + "cvv", + "cvc", + "expiry", + "expiration", + "visa", + "mastercard", + "amex", + "card no", + "payment card", + "pan", +] + +IBAN_CONTEXT = [ + "iban", + "bank account", + "account number", + "swift", + "bic", + "wire transfer", + "bank transfer", +] + +NHS_CONTEXT = [ + "nhs", + "nhs number", + "national health", + "health service", + "patient id", + "patient number", +] + +FINANCIAL_RULES: list[DetectionRule] = [ + DetectionRule( + rule_id = "FIN_CREDIT_CARD_VISA", + rule_name = "Visa Credit Card Number", + pattern = VISA_PATTERN, + base_score = 0.50, + context_keywords = CREDIT_CARD_CONTEXT, + validator = luhn_check, + compliance_frameworks = ["PCI_DSS", + "GLBA"], + ), + DetectionRule( + rule_id = "FIN_CREDIT_CARD_MC", + rule_name = "Mastercard Credit Card Number", + pattern = MASTERCARD_PATTERN, + base_score = 0.50, + context_keywords = CREDIT_CARD_CONTEXT, + validator = luhn_check, + compliance_frameworks = ["PCI_DSS", + "GLBA"], + ), + DetectionRule( + rule_id = "FIN_CREDIT_CARD_AMEX", + rule_name = "American Express Card Number", + pattern = AMEX_PATTERN, + base_score = 0.50, + context_keywords = CREDIT_CARD_CONTEXT, + validator = luhn_check, + compliance_frameworks = ["PCI_DSS", + "GLBA"], + ), + DetectionRule( + rule_id = "FIN_CREDIT_CARD_DISC", + rule_name = "Discover Card Number", + pattern = DISCOVER_PATTERN, + base_score = 0.50, + context_keywords = CREDIT_CARD_CONTEXT, + validator = luhn_check, + compliance_frameworks = ["PCI_DSS", + "GLBA"], + ), + DetectionRule( + rule_id = "FIN_IBAN", + rule_name = "IBAN Number", + pattern = IBAN_PATTERN, + base_score = 0.40, + context_keywords = IBAN_CONTEXT, + validator = iban_check, + compliance_frameworks = ["GDPR", + "GLBA"], + ), + DetectionRule( + rule_id = "FIN_NHS_NUMBER", + rule_name = "UK NHS Number", + pattern = NHS_PATTERN, + base_score = 0.15, + context_keywords = NHS_CONTEXT, + validator = nhs_check, + compliance_frameworks = ["GDPR"], + ), +] diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/rules/health.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/rules/health.py new file mode 100644 index 0000000..ead20a9 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/rules/health.py @@ -0,0 +1,140 @@ +""" +©AngelaMos | 2026 +health.py +""" + + +import re + +from dlp_scanner.detectors.base import DetectionRule + + +MEDICAL_RECORD_PATTERN = re.compile( + r"\b(?:MRN|MR#|MED)\s*[-:#]?\s*\d{6,10}\b", + re.IGNORECASE, +) + +DEA_NUMBER_PATTERN = re.compile(r"\b[A-Z][A-Z9]\d{7}\b") + +NPI_PATTERN = re.compile(r"\b\d{10}\b") + +PHI_CONTEXT_KEYWORDS = [ + "patient", + "diagnosis", + "treatment", + "medical", + "health", + "hospital", + "clinical", + "physician", + "prescription", + "medication", + "lab result", + "blood type", + "allergies", + "insurance", + "claim", + "icd", + "cpt", + "hcpcs", + "hipaa", + "phi", + "protected health", + "discharge", + "admission", + "prognosis", +] + +MEDICAL_RECORD_CONTEXT = [ + "medical record", + "mrn", + "patient id", + "chart number", + "record number", + "health record", + "ehr", + "emr", +] + +DEA_CONTEXT = [ + "dea", + "dea number", + "drug enforcement", + "prescriber", + "controlled substance", +] + +NPI_CONTEXT = [ + "npi", + "national provider", + "provider id", + "provider number", + "provider identifier", + "cms", +] + + +def _validate_npi(value: str) -> bool: + """ + Validate an NPI using Luhn with the 80840 prefix + """ + digits = value.replace("-", "").replace(" ", "") + if len(digits) != 10 or not digits.isdigit(): + return False + + prefixed = "80840" + digits + total = 0 + for i, d in enumerate(reversed(prefixed)): + n = int(d) + if i % 2 == 1: + n *= 2 + if n > 9: + n -= 9 + total += n + return total % 10 == 0 + + +def _validate_dea_number(value: str) -> bool: + """ + Validate a DEA number using its check digit algorithm + """ + if len(value) != 9: + return False + digits = value[2 :] + if not digits.isdigit(): + return False + + odd_sum = (int(digits[0]) + int(digits[2]) + int(digits[4])) + even_sum = (int(digits[1]) + int(digits[3]) + int(digits[5])) + check = (odd_sum + even_sum * 2) % 10 + return check == int(digits[6]) + + +HEALTH_RULES: list[DetectionRule] = [ + DetectionRule( + rule_id = "HEALTH_MEDICAL_RECORD", + rule_name = "Medical Record Number", + pattern = MEDICAL_RECORD_PATTERN, + base_score = 0.55, + context_keywords = MEDICAL_RECORD_CONTEXT, + compliance_frameworks = ["HIPAA"], + ), + DetectionRule( + rule_id = "HEALTH_DEA_NUMBER", + rule_name = "DEA Registration Number", + pattern = DEA_NUMBER_PATTERN, + base_score = 0.35, + context_keywords = DEA_CONTEXT, + validator = _validate_dea_number, + compliance_frameworks = ["HIPAA"], + ), + DetectionRule( + rule_id = "HEALTH_NPI", + rule_name = "National Provider Identifier", + pattern = NPI_PATTERN, + base_score = 0.10, + context_keywords = NPI_CONTEXT, + validator = _validate_npi, + compliance_frameworks = ["HIPAA"], + ), +] diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/rules/pii.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/rules/pii.py new file mode 100644 index 0000000..c543c78 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/detectors/rules/pii.py @@ -0,0 +1,209 @@ +""" +©AngelaMos | 2026 +pii.py +""" + + +import re + +from dlp_scanner.detectors.base import DetectionRule + + +SSN_PATTERN = re.compile( + r"\b(?!000|666|9\d{2})\d{3}" + r"[-\s]?" + r"(?!00)\d{2}" + r"[-\s]?" + r"(?!0000)\d{4}\b" +) + +EMAIL_PATTERN = re.compile( + r"\b[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}\b" +) + +PHONE_US_PATTERN = re.compile( + r"\b(?:\+?1[-.\s]?)?" + r"(?:\(?[2-9]\d{2}\)?[-.\s]?)" + r"[2-9]\d{2}[-.\s]?\d{4}\b" +) + +PHONE_E164_PATTERN = re.compile(r"\+[1-9]\d{6,14}\b") + +PASSPORT_US_PATTERN = re.compile(r"\b[A-Z]{1,2}\d{6,7}\b") + +PASSPORT_UK_PATTERN = re.compile(r"\b\d{9}\b") + +IPV4_PATTERN = re.compile( + r"\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}" + r"(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b" +) + +DRIVERS_LICENSE_CA_PATTERN = re.compile(r"\b[A-Z]\d{7}\b") +DRIVERS_LICENSE_FL_PATTERN = re.compile(r"\b[A-Z]\d{12}\b") +DRIVERS_LICENSE_IL_PATTERN = re.compile(r"\b[A-Z]\d{11}\b") + + +def _validate_ssn(value: str) -> bool: + """ + Validate SSN area, group, and serial numbers + """ + digits = value.replace("-", "").replace(" ", "") + if len(digits) != 9 or not digits.isdigit(): + return False + + area = int(digits[0 : 3]) + group = int(digits[3 : 5]) + serial = int(digits[5 : 9]) + + if area in {0, 666} or area >= 900: + return False + if group == 0: + return False + return serial != 0 + + +SSN_CONTEXT = [ + "ssn", + "social security", + "social security number", + "ss#", + "taxpayer id", + "sin", + "tax id", +] + +EMAIL_CONTEXT = [ + "email", + "e-mail", + "mail", + "contact", + "reach at", +] + +PHONE_CONTEXT = [ + "phone", + "mobile", + "cell", + "tel", + "telephone", + "fax", + "contact number", + "call", +] + +PASSPORT_CONTEXT = [ + "passport", + "pass no", + "travel document", + "passport number", + "document number", +] + +DRIVERS_LICENSE_CONTEXT = [ + "driver's license", + "drivers license", + "driver license", + "dl#", + "dl number", + "license number", + "licence number", +] + +PII_RULES: list[DetectionRule] = [ + DetectionRule( + rule_id = "PII_SSN", + rule_name = "US Social Security Number", + pattern = SSN_PATTERN, + base_score = 0.45, + context_keywords = SSN_CONTEXT, + validator = _validate_ssn, + compliance_frameworks = [ + "HIPAA", + "CCPA", + "GLBA", + "GDPR", + ], + ), + DetectionRule( + rule_id = "PII_EMAIL", + rule_name = "Email Address", + pattern = EMAIL_PATTERN, + base_score = 0.30, + context_keywords = EMAIL_CONTEXT, + compliance_frameworks = ["GDPR", + "CCPA"], + ), + DetectionRule( + rule_id = "PII_PHONE", + rule_name = "US Phone Number", + pattern = PHONE_US_PATTERN, + base_score = 0.25, + context_keywords = PHONE_CONTEXT, + compliance_frameworks = [ + "GDPR", + "CCPA", + "HIPAA", + ], + ), + DetectionRule( + rule_id = "PII_PHONE_INTL", + rule_name = "International Phone Number", + pattern = PHONE_E164_PATTERN, + base_score = 0.30, + context_keywords = PHONE_CONTEXT, + compliance_frameworks = ["GDPR", + "CCPA"], + ), + DetectionRule( + rule_id = "PII_PASSPORT_US", + rule_name = "US Passport Number", + pattern = PASSPORT_US_PATTERN, + base_score = 0.15, + context_keywords = PASSPORT_CONTEXT, + compliance_frameworks = ["GDPR", + "CCPA"], + ), + DetectionRule( + rule_id = "PII_PASSPORT_UK", + rule_name = "UK Passport Number", + pattern = PASSPORT_UK_PATTERN, + base_score = 0.10, + context_keywords = PASSPORT_CONTEXT, + compliance_frameworks = ["GDPR"], + ), + DetectionRule( + rule_id = "PII_IPV4", + rule_name = "IPv4 Address", + pattern = IPV4_PATTERN, + base_score = 0.15, + context_keywords = [], + compliance_frameworks = ["GDPR"], + ), + DetectionRule( + rule_id = "PII_DRIVERS_LICENSE", + rule_name = "US Driver's License (CA)", + pattern = DRIVERS_LICENSE_CA_PATTERN, + base_score = 0.10, + context_keywords = DRIVERS_LICENSE_CONTEXT, + compliance_frameworks = ["CCPA", + "HIPAA"], + ), + DetectionRule( + rule_id = "PII_DRIVERS_LICENSE_FL", + rule_name = "US Driver's License (FL)", + pattern = DRIVERS_LICENSE_FL_PATTERN, + base_score = 0.10, + context_keywords = DRIVERS_LICENSE_CONTEXT, + compliance_frameworks = ["CCPA", + "HIPAA"], + ), + DetectionRule( + rule_id = "PII_DRIVERS_LICENSE_IL", + rule_name = "US Driver's License (IL)", + pattern = DRIVERS_LICENSE_IL_PATTERN, + base_score = 0.10, + context_keywords = DRIVERS_LICENSE_CONTEXT, + compliance_frameworks = ["CCPA", + "HIPAA"], + ), +] diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/engine.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/engine.py new file mode 100644 index 0000000..b45f5b1 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/engine.py @@ -0,0 +1,146 @@ +""" +©AngelaMos | 2026 +engine.py +""" + + +import structlog + +from dlp_scanner.config import ScanConfig +from dlp_scanner.constants import OutputFormat +from dlp_scanner.detectors.registry import ( + DetectorRegistry, +) +from dlp_scanner.models import ScanResult +from dlp_scanner.reporters.console import ( + ConsoleReporter, +) +from dlp_scanner.reporters.csv_report import ( + CsvReporter, +) +from dlp_scanner.reporters.json_report import ( + JsonReporter, +) +from dlp_scanner.reporters.sarif import SarifReporter +from dlp_scanner.scanners.db_scanner import ( + DatabaseScanner, +) +from dlp_scanner.scanners.file_scanner import ( + FileScanner, +) +from dlp_scanner.scanners.network_scanner import ( + NetworkScanner, +) + + +log = structlog.get_logger() + +REPORTER_MAP: dict[str, + type] = { + "console": ConsoleReporter, + "json": JsonReporter, + "sarif": SarifReporter, + "csv": CsvReporter, + } + + +class ScanEngine: + """ + Orchestrates the full scan pipeline + """ + def __init__(self, config: ScanConfig) -> None: + self._config = config + detection = config.detection + allowlist_vals = detection.allowlists.values + self._registry = DetectorRegistry( + enable_patterns = detection.enable_rules, + disable_patterns = detection.disable_rules, + allowlist_values = ( + frozenset(allowlist_vals) if allowlist_vals else None + ), + context_window_tokens = (detection.context_window_tokens), + ) + + def scan_files(self, target: str) -> ScanResult: + """ + Scan filesystem target for sensitive data + """ + scanner = FileScanner(self._config, self._registry) + result = scanner.scan(target) + log.info( + "file_scan_complete", + target = target, + findings = len(result.findings), + targets = result.targets_scanned, + ) + return result + + def scan_database(self, target: str) -> ScanResult: + """ + Scan database target for sensitive data + """ + scanner = DatabaseScanner(self._config, self._registry) + result = scanner.scan(target) + log.info( + "database_scan_complete", + target = target, + findings = len(result.findings), + targets = result.targets_scanned, + ) + return result + + def scan_network(self, target: str) -> ScanResult: + """ + Scan network capture file for sensitive data + """ + scanner = NetworkScanner(self._config, self._registry) + result = scanner.scan(target) + log.info( + "network_scan_complete", + target = target, + findings = len(result.findings), + targets = result.targets_scanned, + ) + return result + + def generate_report( + self, + result: ScanResult, + output_format: OutputFormat | None = None, + ) -> str: + """ + Generate report string in the requested format + """ + fmt = output_format or self._config.output.format + reporter_cls = REPORTER_MAP[fmt] + reporter = reporter_cls() + output: str = reporter.generate(result) + return output + + def display_console( + self, + result: ScanResult, + ) -> None: + """ + Display Rich-formatted results to console + """ + reporter = ConsoleReporter() + reporter.display(result) + + def write_report( + self, + result: ScanResult, + output_path: str, + output_format: OutputFormat | None = None, + ) -> None: + """ + Generate report and write to file + """ + content = self.generate_report(result, output_format) + with open(output_path, "w") as f: + f.write(content) + log.info( + "report_written", + path = output_path, + format = output_format or self._config.output.format, + ) diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/extractors/__init__.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/extractors/__init__.py new file mode 100644 index 0000000..e1add2a --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/extractors/__init__.py @@ -0,0 +1,4 @@ +""" +©AngelaMos | 2026 +__init__.py +""" diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/extractors/archive.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/extractors/archive.py new file mode 100644 index 0000000..18fe4a3 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/extractors/archive.py @@ -0,0 +1,191 @@ +""" +©AngelaMos | 2026 +archive.py +""" + + +import tarfile +import zipfile + +import structlog + +from dlp_scanner.constants import ( + MAX_ARCHIVE_DEPTH, + MAX_ARCHIVE_MEMBER_SIZE_MB, + ZIP_BOMB_RATIO_THRESHOLD, +) +from dlp_scanner.models import Location, TextChunk + + +log = structlog.get_logger() + +ARCHIVE_EXTENSIONS: frozenset[str] = frozenset( + { + ".zip", + ".tar", + ".tar.gz", + ".tgz", + ".tar.bz2", + } +) + +MAX_MEMBER_BYTES: int = MAX_ARCHIVE_MEMBER_SIZE_MB * 1024 * 1024 + + +class ArchiveExtractor: + """ + Extracts text content from archive files with security guards + """ + @property + def supported_extensions(self) -> frozenset[str]: + """ + File extensions this extractor handles + """ + return ARCHIVE_EXTENSIONS + + def extract( + self, + path: str, + depth: int = 0, + ) -> list[TextChunk]: + """ + Extract text from archive members + """ + if depth >= MAX_ARCHIVE_DEPTH: + log.warning( + "archive_depth_exceeded", + path = path, + depth = depth, + ) + return [] + + if path.endswith(".zip"): + return self._extract_zip(path, depth) + + if any(path.endswith(ext) + for ext in (".tar", ".tar.gz", ".tgz", ".tar.bz2")): + return self._extract_tar(path, depth) + + return [] + + def _extract_zip(self, path: str, depth: int) -> list[TextChunk]: + """ + Extract from ZIP with bomb and traversal protection + """ + chunks: list[TextChunk] = [] + + try: + with zipfile.ZipFile(path, "r") as zf: + for info in zf.infolist(): + if not self._is_safe_zip_member(info): + continue + + data = zf.read(info.filename) + if not data: + continue + + try: + text = data.decode("utf-8", errors = "replace") + except Exception: + continue + + if text.strip(): + chunks.append( + TextChunk( + text = text, + location = Location( + source_type = "archive", + uri = f"{path}!{info.filename}", + ), + ) + ) + + except Exception: + log.warning("zip_extract_failed", path = path) + + return chunks + + def _extract_tar(self, path: str, depth: int) -> list[TextChunk]: + """ + Extract from TAR with traversal protection + """ + chunks: list[TextChunk] = [] + + try: + with tarfile.open(path) as tf: + for member in tf.getmembers(): + if not member.isfile(): + continue + + if not self._is_safe_tar_member(member): + continue + + if member.size > MAX_MEMBER_BYTES: + continue + + extracted = tf.extractfile(member) + if extracted is None: + continue + + data = extracted.read() + try: + text = data.decode("utf-8", errors = "replace") + except Exception: + continue + + if text.strip(): + chunks.append( + TextChunk( + text = text, + location = Location( + source_type = "archive", + uri = f"{path}!{member.name}", + ), + ) + ) + + except Exception: + log.warning("tar_extract_failed", path = path) + + return chunks + + def _is_safe_zip_member(self, info: zipfile.ZipInfo) -> bool: + """ + Check a ZIP member for path traversal and bomb indicators + """ + if ".." in info.filename or info.filename.startswith("/"): + log.warning( + "zip_path_traversal_blocked", + filename = info.filename, + ) + return False + + if "\x00" in info.filename: + return False + + if info.file_size > MAX_MEMBER_BYTES: + return False + + if (info.compress_size > 0 and info.file_size / info.compress_size + > ZIP_BOMB_RATIO_THRESHOLD): + log.warning( + "zip_bomb_detected", + filename = info.filename, + ratio = info.file_size / info.compress_size, + ) + return False + + return True + + def _is_safe_tar_member(self, member: tarfile.TarInfo) -> bool: + """ + Check a TAR member for path traversal + """ + if ".." in member.name or member.name.startswith("/"): + log.warning( + "tar_path_traversal_blocked", + filename = member.name, + ) + return False + + return not (member.issym() or member.islnk()) diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/extractors/base.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/extractors/base.py new file mode 100644 index 0000000..2e008b1 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/extractors/base.py @@ -0,0 +1,27 @@ +""" +©AngelaMos | 2026 +base.py +""" + + +from typing import Protocol + +from dlp_scanner.models import TextChunk + + +class Extractor(Protocol): + """ + Protocol for text extraction from different file formats + """ + def extract(self, path: str) -> list[TextChunk]: + """ + Extract text chunks from a file at the given path + """ + ... + + @property + def supported_extensions(self) -> frozenset[str]: + """ + File extensions this extractor handles + """ + ... diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/extractors/email.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/extractors/email.py new file mode 100644 index 0000000..7dba9cf --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/extractors/email.py @@ -0,0 +1,123 @@ +""" +©AngelaMos | 2026 +email.py +""" + + +import email as email_lib +from email import policy + +import structlog + +from dlp_scanner.models import Location, TextChunk + + +log = structlog.get_logger() + +EMAIL_EXTENSIONS: frozenset[str] = frozenset({ + ".eml", + ".msg", +}) + + +class EmlExtractor: + """ + Extracts text from RFC 2822 EML files + """ + @property + def supported_extensions(self) -> frozenset[str]: + """ + File extensions this extractor handles + """ + return frozenset({".eml"}) + + def extract(self, path: str) -> list[TextChunk]: + """ + Parse EML and extract headers and body text + """ + chunks: list[TextChunk] = [] + + try: + with open(path, "rb") as f: + msg = email_lib.message_from_binary_file( + f, + policy = policy.default + ) + + parts: list[str] = [] + + for header in ("From", "To", "Cc", "Subject"): + value = msg.get(header) + if value: + parts.append(f"{header}: {value}") + + body = msg.get_body(preferencelist = ("plain", "html")) + if body is not None: + content = body.get_content() + if content: + parts.append(content) + + if parts: + chunks.append( + TextChunk( + text = "\n".join(parts), + location = Location( + source_type = "file", + uri = path, + ), + ) + ) + + except Exception: + log.warning("eml_extract_failed", path = path) + + return chunks + + +class MsgExtractor: + """ + Extracts text from Outlook MSG files + """ + @property + def supported_extensions(self) -> frozenset[str]: + """ + File extensions this extractor handles + """ + return frozenset({".msg"}) + + def extract(self, path: str) -> list[TextChunk]: + """ + Parse MSG and extract headers and body text + """ + import extract_msg + + chunks: list[TextChunk] = [] + + try: + with extract_msg.Message(path) as msg: + parts: list[str] = [] + + if msg.sender: + parts.append(f"From: {msg.sender}") + if msg.to: + parts.append(f"To: {msg.to}") + if msg.subject: + parts.append(f"Subject: {msg.subject}") + if msg.body: + parts.append(msg.body) + + if parts: + chunks.append( + TextChunk( + text = "\n".join(parts), + location = Location( + source_type = "file", + uri = path, + ), + ) + ) + + except Exception: + log.warning("msg_extract_failed", path = path) + + return chunks diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/extractors/office.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/extractors/office.py new file mode 100644 index 0000000..25f6497 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/extractors/office.py @@ -0,0 +1,178 @@ +""" +©AngelaMos | 2026 +office.py +""" + + +import structlog + +from dlp_scanner.models import Location, TextChunk + + +log = structlog.get_logger() + +DOCX_EXTENSIONS: frozenset[str] = frozenset({".docx"}) +XLSX_EXTENSIONS: frozenset[str] = frozenset({".xlsx"}) +XLS_EXTENSIONS: frozenset[str] = frozenset({".xls"}) +OFFICE_EXTENSIONS: frozenset[str] = ( + DOCX_EXTENSIONS | XLSX_EXTENSIONS | XLS_EXTENSIONS +) + + +class DocxExtractor: + """ + Extracts text from DOCX files + """ + @property + def supported_extensions(self) -> frozenset[str]: + """ + File extensions this extractor handles + """ + return DOCX_EXTENSIONS + + def extract(self, path: str) -> list[TextChunk]: + """ + Extract text from paragraphs, tables, and metadata + """ + from docx import Document + + chunks: list[TextChunk] = [] + + try: + doc = Document(path) + paragraphs: list[str] = [] + + for para in doc.paragraphs: + if para.text.strip(): + paragraphs.append(para.text) + + for table in doc.tables: + for row in table.rows: + cells = [ + cell.text + for cell in row.cells + if cell.text.strip() + ] + if cells: + paragraphs.append(" | ".join(cells)) + + if doc.core_properties.author: + paragraphs.append(f"Author: {doc.core_properties.author}") + if doc.core_properties.title: + paragraphs.append(f"Title: {doc.core_properties.title}") + + if paragraphs: + chunks.append( + TextChunk( + text = "\n".join(paragraphs), + location = Location( + source_type = "file", + uri = path, + ), + ) + ) + + except Exception: + log.warning("docx_extract_failed", path = path) + + return chunks + + +class XlsxExtractor: + """ + Extracts text from XLSX files using openpyxl + """ + @property + def supported_extensions(self) -> frozenset[str]: + """ + File extensions this extractor handles + """ + return XLSX_EXTENSIONS + + def extract(self, path: str) -> list[TextChunk]: + """ + Extract text from all sheets and cells + """ + from openpyxl import load_workbook + + chunks: list[TextChunk] = [] + + try: + wb = load_workbook( + path, + read_only = True, + data_only = True, + ) + for sheet in wb.worksheets: + rows: list[str] = [] + for row in sheet.iter_rows(values_only = True): + cell_values = [str(c) for c in row if c is not None] + if cell_values: + rows.append(" | ".join(cell_values)) + + if rows: + chunks.append( + TextChunk( + text = "\n".join(rows), + location = Location( + source_type = "file", + uri = path, + sheet_name = sheet.title, + ), + ) + ) + wb.close() + except Exception: + log.warning("xlsx_extract_failed", path = path) + + return chunks + + +class XlsExtractor: + """ + Extracts text from legacy XLS files using xlrd + """ + @property + def supported_extensions(self) -> frozenset[str]: + """ + File extensions this extractor handles + """ + return XLS_EXTENSIONS + + def extract(self, path: str) -> list[TextChunk]: + """ + Extract text from legacy Excel workbooks + """ + import xlrd + + chunks: list[TextChunk] = [] + + try: + wb = xlrd.open_workbook(path) + for sheet in wb.sheets(): + rows: list[str] = [] + for row_idx in range(sheet.nrows): + cell_values = [ + str(sheet.cell_value(row_idx, + col)) + for col in range(sheet.ncols) + if sheet.cell_value(row_idx, col) + ] + if cell_values: + rows.append(" | ".join(cell_values)) + + if rows: + chunks.append( + TextChunk( + text = "\n".join(rows), + location = Location( + source_type = "file", + uri = path, + sheet_name = sheet.name, + ), + ) + ) + except Exception: + log.warning("xls_extract_failed", path = path) + + return chunks diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/extractors/pdf.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/extractors/pdf.py new file mode 100644 index 0000000..07d938a --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/extractors/pdf.py @@ -0,0 +1,56 @@ +""" +©AngelaMos | 2026 +pdf.py +""" + + +import structlog + +from dlp_scanner.models import Location, TextChunk + + +log = structlog.get_logger() + +PDF_EXTENSIONS: frozenset[str] = frozenset({".pdf"}) + + +class PDFExtractor: + """ + Extracts text from PDF files using PyMuPDF + """ + @property + def supported_extensions(self) -> frozenset[str]: + """ + File extensions this extractor handles + """ + return PDF_EXTENSIONS + + def extract(self, path: str) -> list[TextChunk]: + """ + Extract text from each page of a PDF + """ + import fitz + + chunks: list[TextChunk] = [] + + try: + doc = fitz.open(path) + for page_num in range(len(doc)): + page = doc[page_num] + text = page.get_text("text") + if text.strip(): + chunks.append( + TextChunk( + text = text, + location = Location( + source_type = "file", + uri = path, + line = page_num + 1, + ), + ) + ) + doc.close() + except Exception: + log.warning("pdf_extract_failed", path = path) + + return chunks diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/extractors/plaintext.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/extractors/plaintext.py new file mode 100644 index 0000000..5771447 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/extractors/plaintext.py @@ -0,0 +1,112 @@ +""" +©AngelaMos | 2026 +plaintext.py +""" + + +import structlog + +from dlp_scanner.models import Location, TextChunk + + +log = structlog.get_logger() + +PLAINTEXT_EXTENSIONS: frozenset[str] = frozenset( + { + ".txt", + ".log", + ".cfg", + ".ini", + ".conf", + ".toml", + ".md", + ".rst", + ".html", + ".htm", + ".tsv", + ".env", + ".sh", + ".bat", + ".ps1", + ".py", + ".js", + ".ts", + ".go", + ".rb", + ".java", + ".c", + ".cpp", + ".h", + ".hpp", + ".rs", + ".tf", + ".hcl", + } +) + +CHUNK_MAX_LINES: int = 500 + + +class PlaintextExtractor: + """ + Extracts text from plaintext and source code files + """ + @property + def supported_extensions(self) -> frozenset[str]: + """ + File extensions this extractor handles + """ + return PLAINTEXT_EXTENSIONS + + def extract(self, path: str) -> list[TextChunk]: + """ + Read a text file and return chunks + """ + chunks: list[TextChunk] = [] + + try: + with open( + path, + encoding = "utf-8", + errors = "replace", + ) as f: + lines: list[str] = [] + line_number = 1 + chunk_start = 1 + + for line in f: + lines.append(line) + if len(lines) >= CHUNK_MAX_LINES: + chunks.append( + TextChunk( + text = "".join(lines), + location = Location( + source_type = "file", + uri = path, + line = chunk_start, + ), + ) + ) + chunk_start = line_number + 1 + lines = [] + line_number += 1 + + if lines: + chunks.append( + TextChunk( + text = "".join(lines), + location = Location( + source_type = "file", + uri = path, + line = chunk_start, + ), + ) + ) + + except OSError: + log.warning( + "file_read_failed", + path = path, + ) + + return chunks diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/extractors/structured.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/extractors/structured.py new file mode 100644 index 0000000..52eec32 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/extractors/structured.py @@ -0,0 +1,327 @@ +""" +©AngelaMos | 2026 +structured.py +""" + + +import csv +import json +from typing import Any + +import structlog + +from dlp_scanner.models import Location, TextChunk + + +log = structlog.get_logger() + + +class CsvExtractor: + """ + Extracts text from CSV and TSV files + """ + @property + def supported_extensions(self) -> frozenset[str]: + """ + File extensions this extractor handles + """ + return frozenset({".csv", ".tsv"}) + + def extract(self, path: str) -> list[TextChunk]: + """ + Read CSV row by row and concatenate cell values + """ + chunks: list[TextChunk] = [] + + try: + with open( + path, + newline = "", + encoding = "utf-8-sig", + ) as f: + dialect = csv.Sniffer().sniff(f.read(4096)) + f.seek(0) + reader = csv.reader(f, dialect) + rows: list[str] = [] + + for _row_num, row in enumerate(reader, 1): + cells = [c for c in row if c.strip()] + if cells: + rows.append(" | ".join(cells)) + + if rows: + chunks.append( + TextChunk( + text = "\n".join(rows), + location = Location( + source_type = "file", + uri = path, + line = 1, + ), + ) + ) + + except Exception: + log.warning("csv_extract_failed", path = path) + + return chunks + + +class JsonExtractor: + """ + Extracts text values from JSON files + """ + @property + def supported_extensions(self) -> frozenset[str]: + """ + File extensions this extractor handles + """ + return frozenset({".json"}) + + def extract(self, path: str) -> list[TextChunk]: + """ + Parse JSON and extract all string values recursively + """ + chunks: list[TextChunk] = [] + + try: + with open(path, encoding = "utf-8") as f: + data = json.load(f) + + strings = _extract_json_strings(data) + if strings: + chunks.append( + TextChunk( + text = "\n".join(strings), + location = Location( + source_type = "file", + uri = path, + ), + ) + ) + + except Exception: + log.warning("json_extract_failed", path = path) + + return chunks + + +class XmlExtractor: + """ + Extracts text from XML files using defusedxml + """ + @property + def supported_extensions(self) -> frozenset[str]: + """ + File extensions this extractor handles + """ + return frozenset({".xml"}) + + def extract(self, path: str) -> list[TextChunk]: + """ + Parse XML safely and extract all text content + """ + import defusedxml.ElementTree as ET + + chunks: list[TextChunk] = [] + + try: + tree = ET.parse(path) + root = tree.getroot() + texts: list[str] = [] + + for elem in root.iter(): + if elem.text and elem.text.strip(): + texts.append(elem.text.strip()) + if elem.tail and elem.tail.strip(): + texts.append(elem.tail.strip()) + for attr_val in elem.attrib.values(): + if attr_val.strip(): + texts.append(attr_val.strip()) + + if texts: + chunks.append( + TextChunk( + text = "\n".join(texts), + location = Location( + source_type = "file", + uri = path, + ), + ) + ) + + except Exception: + log.warning("xml_extract_failed", path = path) + + return chunks + + +class YamlExtractor: + """ + Extracts text from YAML files + """ + @property + def supported_extensions(self) -> frozenset[str]: + """ + File extensions this extractor handles + """ + return frozenset({".yaml", ".yml"}) + + def extract(self, path: str) -> list[TextChunk]: + """ + Parse YAML safely and extract string values + """ + from ruamel.yaml import YAML + + chunks: list[TextChunk] = [] + + try: + yaml = YAML(typ = "safe") + with open(path) as f: + data = yaml.load(f) + + if data: + strings = _extract_json_strings(data) + if strings: + chunks.append( + TextChunk( + text = "\n".join(strings), + location = Location( + source_type = "file", + uri = path, + ), + ) + ) + + except Exception: + log.warning("yaml_extract_failed", path = path) + + return chunks + + +class ParquetExtractor: + """ + Extracts text from Parquet files + """ + @property + def supported_extensions(self) -> frozenset[str]: + """ + File extensions this extractor handles + """ + return frozenset({".parquet"}) + + def extract(self, path: str) -> list[TextChunk]: + """ + Read Parquet file and extract string columns + """ + import pyarrow.parquet as pq + + chunks: list[TextChunk] = [] + + try: + pf = pq.ParquetFile(path) + schema = pf.schema_arrow + + string_cols = [ + field.name for field in schema if str(field.type) in ( + "string", + "large_string", + "utf8", + "large_utf8",) + ] + + if not string_cols: + return chunks + + for batch in pf.iter_batches( + batch_size = 5000, + columns = string_cols, + ): + rows: list[str] = [] + table_dict = batch.to_pydict() + for col_name, values in table_dict.items(): + for val in values: + if val is not None and str(val).strip(): + rows.append(f"{col_name}: {val}") + if rows: + chunks.append( + TextChunk( + text = "\n".join(rows), + location = Location( + source_type = "file", + uri = path, + ), + ) + ) + + except Exception: + log.warning("parquet_extract_failed", path = path) + + return chunks + + +class AvroExtractor: + """ + Extracts text from Avro files + """ + @property + def supported_extensions(self) -> frozenset[str]: + """ + File extensions this extractor handles + """ + return frozenset({".avro"}) + + def extract(self, path: str) -> list[TextChunk]: + """ + Read Avro file and extract string fields + """ + from fastavro import reader + + chunks: list[TextChunk] = [] + + try: + with open(path, "rb") as f: + rows: list[str] = [] + for record in reader(f): + strings = _extract_json_strings(record) + rows.extend(strings) + + if rows: + chunks.append( + TextChunk( + text = "\n".join(rows), + location = Location( + source_type = "file", + uri = path, + ), + ) + ) + + except Exception: + log.warning("avro_extract_failed", path = path) + + return chunks + + +def _extract_json_strings( + data: Any, + prefix: str = "", +) -> list[str]: + """ + Recursively extract all string values from a JSON-like structure + """ + strings: list[str] = [] + + if isinstance(data, str): + if data.strip(): + label = f"{prefix}: {data}" if prefix else data + strings.append(label) + elif isinstance(data, dict): + for key, val in data.items(): + key_path = (f"{prefix}.{key}" if prefix else str(key)) + strings.extend(_extract_json_strings(val, key_path)) + elif isinstance(data, list): + for item in data: + strings.extend(_extract_json_strings(item, prefix)) + + return strings diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/log.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/log.py new file mode 100644 index 0000000..e9ee554 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/log.py @@ -0,0 +1,80 @@ +""" +©AngelaMos | 2026 +log.py +""" + + +import logging +import sys +from typing import Any + +import orjson +import structlog + + +def _orjson_serializer( + data: Any, + **_kwargs: Any, +) -> str: + """ + Serialize log data using orjson for performance + """ + return orjson.dumps(data).decode("utf-8") + + +def configure_logging( + level: str = "INFO", + json_output: bool = False, + log_file: str = "", +) -> None: + """ + Set up structlog with stdlib integration + """ + shared_processors: list[structlog.types.Processor] = [ + structlog.contextvars.merge_contextvars, + structlog.stdlib.add_log_level, + structlog.stdlib.add_logger_name, + structlog.processors.TimeStamper(fmt = "iso"), + structlog.stdlib.PositionalArgumentsFormatter(), + structlog.processors.StackInfoRenderer(), + ] + + if json_output: + renderer: structlog.types.Processor = ( + structlog.processors.JSONRenderer( + serializer = _orjson_serializer + ) + ) + else: + renderer = structlog.dev.ConsoleRenderer(colors = True) + + structlog.configure( + processors = [ + *shared_processors, + structlog.stdlib.ProcessorFormatter.wrap_for_formatter, + ], + logger_factory = structlog.stdlib.LoggerFactory(), + wrapper_class = structlog.stdlib.BoundLogger, + cache_logger_on_first_use = True, + ) + + formatter = structlog.stdlib.ProcessorFormatter( + foreign_pre_chain = shared_processors, + processors = [ + structlog.stdlib.ProcessorFormatter.remove_processors_meta, + renderer, + ], + ) + + handler: logging.Handler = logging.StreamHandler(sys.stderr) + handler.setFormatter(formatter) + + root_logger = logging.getLogger() + root_logger.handlers.clear() + root_logger.addHandler(handler) + root_logger.setLevel(getattr(logging, level.upper())) + + if log_file: + file_handler = logging.FileHandler(log_file) + file_handler.setFormatter(formatter) + root_logger.addHandler(file_handler) diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/models.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/models.py new file mode 100644 index 0000000..2c5edab --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/models.py @@ -0,0 +1,112 @@ +""" +©AngelaMos | 2026 +models.py +""" + +import uuid +from dataclasses import dataclass, field +from datetime import datetime, UTC + +from dlp_scanner.constants import Severity + + +@dataclass(frozen = True, slots = True) +class Location: + """ + Where a finding was detected + """ + source_type: str + uri: str + line: int | None = None + column: int | None = None + byte_offset: int | None = None + table_name: str | None = None + column_name: str | None = None + sheet_name: str | None = None + + +@dataclass(slots = True) +class Finding: + """ + A fully scored and classified detection result + """ + finding_id: str = field( + default_factory = lambda: uuid.uuid4().hex[: 12] + ) + rule_id: str = "" + rule_name: str = "" + severity: Severity = "low" + confidence: float = 0.0 + location: Location = field( + default_factory = lambda: Location( + source_type = "unknown", + uri = "",) + ) + redacted_snippet: str = "" + compliance_frameworks: list[str] = field(default_factory = list) + remediation: str = "" + detected_at: datetime = field( + default_factory = lambda: datetime.now(UTC) + ) + + +@dataclass(slots = True) +class ScanResult: + """ + Aggregated results from a complete scan run + """ + scan_id: str = field(default_factory = lambda: uuid.uuid4().hex[: 16]) + tool_version: str = "0.1.0" + scan_started_at: datetime = field( + default_factory = lambda: datetime.now(UTC) + ) + scan_completed_at: datetime | None = None + targets_scanned: int = 0 + findings: list[Finding] = field(default_factory = list) + errors: list[str] = field(default_factory = list) + + @property + def findings_by_severity(self) -> dict[str, int]: + """ + Count findings grouped by severity level + """ + counts: dict[str, + int] = { + "critical": 0, + "high": 0, + "medium": 0, + "low": 0, + } + for f in self.findings: + counts[f.severity] = counts.get(f.severity, 0) + 1 + return counts + + @property + def findings_by_rule(self) -> dict[str, int]: + """ + Count findings grouped by rule ID + """ + counts: dict[str, int] = {} + for f in self.findings: + counts[f.rule_id] = counts.get(f.rule_id, 0) + 1 + return counts + + @property + def findings_by_framework(self) -> dict[str, int]: + """ + Count findings grouped by compliance framework + """ + counts: dict[str, int] = {} + for f in self.findings: + for fw in f.compliance_frameworks: + counts[fw] = counts.get(fw, 0) + 1 + return counts + + +@dataclass(frozen = True, slots = True) +class TextChunk: + """ + A piece of extracted text with its source location + """ + text: str + location: Location diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/network/__init__.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/network/__init__.py new file mode 100644 index 0000000..e1add2a --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/network/__init__.py @@ -0,0 +1,4 @@ +""" +©AngelaMos | 2026 +__init__.py +""" diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/network/exfiltration.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/network/exfiltration.py new file mode 100644 index 0000000..b10f484 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/network/exfiltration.py @@ -0,0 +1,279 @@ +""" +©AngelaMos | 2026 +exfiltration.py +""" + + +import re +from collections import defaultdict +from dataclasses import dataclass + +import structlog + +from dlp_scanner.constants import ( + DEFAULT_DNS_ENTROPY_THRESHOLD, +) +from dlp_scanner.detectors.entropy import ( + shannon_entropy_str, +) +from dlp_scanner.network.protocols import DnsQuery + + +log = structlog.get_logger() + +DNS_LABEL_MAX_NORMAL: int = 50 +DNS_QNAME_MAX_NORMAL: int = 100 +TXT_VOLUME_THRESHOLD: float = 0.05 + +BASE64_PATTERN = re.compile(rb"[A-Za-z0-9+/]{40,}={0,2}") +HEX_PATTERN = re.compile(rb"[0-9A-Fa-f]{64,}") + + +@dataclass(frozen = True, slots = True) +class ExfilIndicator: + """ + An indicator of potential data exfiltration + """ + + indicator_type: str + description: str + confidence: float + source_ip: str + dest_ip: str + evidence: str + + +class DnsExfilDetector: + """ + Detects DNS-based data exfiltration patterns + """ + def __init__( + self, + entropy_threshold: float = (DEFAULT_DNS_ENTROPY_THRESHOLD), + ) -> None: + self._entropy_threshold = entropy_threshold + self._indicators: list[ExfilIndicator] = [] + self._domain_txt_counts: dict[str, int] = defaultdict(int) + self._domain_total_counts: dict[str, int] = defaultdict(int) + + def analyze_query( + self, + query: DnsQuery, + src_ip: str, + dst_ip: str, + ) -> ExfilIndicator | None: + """ + Analyze a single DNS query for exfiltration + """ + name = query.name + domain = _extract_base_domain(name) + + self._domain_total_counts[domain] += 1 + if query.query_type == "TXT": + self._domain_txt_counts[domain] += 1 + + indicator = self._check_label_length(name, src_ip, dst_ip) + if indicator is not None: + self._indicators.append(indicator) + return indicator + + indicator = self._check_subdomain_entropy(name, src_ip, dst_ip) + if indicator is not None: + self._indicators.append(indicator) + return indicator + + indicator = self._check_qname_length(name, src_ip, dst_ip) + if indicator is not None: + self._indicators.append(indicator) + return indicator + + return None + + def check_txt_volume( + self, + ) -> list[ExfilIndicator]: + """ + Check for suspicious TXT query volume ratios + """ + indicators: list[ExfilIndicator] = [] + + for domain, txt_count in (self._domain_txt_counts.items()): + total = self._domain_total_counts.get(domain, 0) + if total == 0: + continue + + ratio = txt_count / total + if ratio > TXT_VOLUME_THRESHOLD: + indicator = ExfilIndicator( + indicator_type = "dns_txt_volume", + description = ( + f"High TXT query ratio " + f"({ratio:.1%}) for " + f"{domain}" + ), + confidence = min(0.90, + 0.50 + ratio), + source_ip = "", + dest_ip = "", + evidence = (f"{txt_count} TXT / " + f"{total} total"), + ) + indicators.append(indicator) + + self._indicators.extend(indicators) + return indicators + + def get_indicators( + self, + ) -> list[ExfilIndicator]: + """ + Return all collected exfiltration indicators + """ + return list(self._indicators) + + def _check_label_length( + self, + name: str, + src_ip: str, + dst_ip: str, + ) -> ExfilIndicator | None: + """ + Flag suspiciously long DNS labels + """ + for label in name.split("."): + if len(label) > DNS_LABEL_MAX_NORMAL: + return ExfilIndicator( + indicator_type = ("dns_long_label"), + description = ( + f"DNS label length " + f"{len(label)} exceeds " + f"normal threshold" + ), + confidence = 0.75, + source_ip = src_ip, + dest_ip = dst_ip, + evidence = name, + ) + return None + + def _check_subdomain_entropy( + self, + name: str, + src_ip: str, + dst_ip: str, + ) -> ExfilIndicator | None: + """ + Flag high-entropy subdomains suggesting tunneling + """ + parts = name.split(".") + if len(parts) < 3: + return None + + subdomain = ".".join(parts[:-2]) + if not subdomain: + return None + + entropy = shannon_entropy_str(subdomain) + if entropy > self._entropy_threshold: + return ExfilIndicator( + indicator_type = ("dns_high_entropy"), + description = ( + f"High subdomain entropy " + f"({entropy:.2f}) suggesting " + f"DNS tunneling" + ), + confidence = min( + 0.95, + 0.50 + (entropy - 3.0) * 0.15, + ), + source_ip = src_ip, + dest_ip = dst_ip, + evidence = name, + ) + return None + + def _check_qname_length( + self, + name: str, + src_ip: str, + dst_ip: str, + ) -> ExfilIndicator | None: + """ + Flag excessively long QNAMEs + """ + if len(name) > DNS_QNAME_MAX_NORMAL: + return ExfilIndicator( + indicator_type = "dns_long_qname", + description = ( + f"QNAME length {len(name)} " + f"exceeds normal threshold" + ), + confidence = 0.65, + source_ip = src_ip, + dest_ip = dst_ip, + evidence = name, + ) + return None + + +def detect_base64_payload( + data: bytes, + src_ip: str = "", + dst_ip: str = "", +) -> list[ExfilIndicator]: + """ + Detect base64 or hex-encoded data in payloads + """ + indicators: list[ExfilIndicator] = [] + + for m in BASE64_PATTERN.finditer(data): + matched = m.group() + indicators.append( + ExfilIndicator( + indicator_type = "base64_payload", + description = ( + f"Base64-encoded data " + f"({len(matched)} bytes) " + f"in network payload" + ), + confidence = 0.55, + source_ip = src_ip, + dest_ip = dst_ip, + evidence = matched[: 80].decode( + "ascii", + errors = "replace" + ), + ) + ) + + for m in HEX_PATTERN.finditer(data): + matched = m.group() + indicators.append( + ExfilIndicator( + indicator_type = "hex_payload", + description = ( + f"Hex-encoded data " + f"({len(matched)} bytes) " + f"in network payload" + ), + confidence = 0.45, + source_ip = src_ip, + dest_ip = dst_ip, + evidence = matched[: 80].decode( + "ascii", + errors = "replace" + ), + ) + ) + + return indicators + + +def _extract_base_domain(name: str) -> str: + """ + Extract the registerable domain from a QNAME + """ + parts = name.rstrip(".").split(".") + if len(parts) >= 2: + return ".".join(parts[-2 :]) + return name diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/network/flow_tracker.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/network/flow_tracker.py new file mode 100644 index 0000000..b5fb651 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/network/flow_tracker.py @@ -0,0 +1,126 @@ +""" +©AngelaMos | 2026 +flow_tracker.py +""" + + +from dataclasses import dataclass, field + +import structlog + +from dlp_scanner.network.pcap import PacketInfo + + +log = structlog.get_logger() + +FlowKey = tuple[str, str, int, int] + + +@dataclass(slots = True) +class FlowStats: + """ + Aggregated statistics for a network flow + """ + + src_ip: str = "" + dst_ip: str = "" + src_port: int = 0 + dst_port: int = 0 + protocol: str = "" + packet_count: int = 0 + total_bytes: int = 0 + start_time: float = 0.0 + end_time: float = 0.0 + segments: list[tuple[int, bytes]] = field(default_factory = list) + + +class FlowTracker: + """ + Tracks and reassembles network flows from packets + """ + def __init__(self) -> None: + self._flows: dict[FlowKey, FlowStats] = {} + + def add_packet(self, packet: PacketInfo) -> None: + """ + Add a packet to its corresponding flow + """ + key = make_flow_key(packet) + flow = self._flows.get(key) + + if flow is None: + flow = FlowStats( + src_ip = packet.src_ip, + dst_ip = packet.dst_ip, + src_port = packet.src_port, + dst_port = packet.dst_port, + protocol = packet.protocol, + start_time = packet.timestamp, + ) + self._flows[key] = flow + + flow.packet_count += 1 + flow.total_bytes += len(packet.payload) + flow.end_time = packet.timestamp + + if packet.payload: + flow.segments.append((packet.tcp_seq, packet.payload)) + + def get_flows(self) -> list[FlowStats]: + """ + Return all tracked flows + """ + return list(self._flows.values()) + + def get_flow(self, key: FlowKey) -> FlowStats | None: + """ + Get a specific flow by key + """ + return self._flows.get(key) + + def reassemble_stream(self, key: FlowKey) -> bytes: + """ + Reassemble TCP payload ordered by sequence number + """ + flow = self._flows.get(key) + if flow is None: + return b"" + + sorted_segments = sorted(flow.segments, key = lambda s: s[0]) + + seen_offsets: set[int] = set() + parts: list[bytes] = [] + for seq, data in sorted_segments: + if seq not in seen_offsets: + seen_offsets.add(seq) + parts.append(data) + + return b"".join(parts) + + @property + def flow_count(self) -> int: + """ + Return the number of tracked flows + """ + return len(self._flows) + + +def make_flow_key( + packet: PacketInfo, +) -> FlowKey: + """ + Create a bidirectional flow key from a packet + """ + forward = ( + packet.src_ip, + packet.dst_ip, + packet.src_port, + packet.dst_port, + ) + reverse = ( + packet.dst_ip, + packet.src_ip, + packet.dst_port, + packet.src_port, + ) + return min(forward, reverse) diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/network/pcap.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/network/pcap.py new file mode 100644 index 0000000..6bd1f4b --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/network/pcap.py @@ -0,0 +1,115 @@ +""" +©AngelaMos | 2026 +pcap.py +""" + + +import socket +from collections.abc import Iterator +from dataclasses import dataclass +from pathlib import Path + +import structlog + + +log = structlog.get_logger() + +TCP_PROTO: int = 6 +UDP_PROTO: int = 17 + + +@dataclass(frozen = True, slots = True) +class PacketInfo: + """ + Parsed network packet with extracted metadata + """ + + timestamp: float + src_ip: str + dst_ip: str + src_port: int + dst_port: int + protocol: str + payload: bytes + raw_length: int + tcp_flags: int = 0 + tcp_seq: int = 0 + + +def read_pcap( + path: Path, + max_packets: int = 0, +) -> Iterator[PacketInfo]: + """ + Read packets from a PCAP or PCAPNG file + """ + import dpkt + + with open(path, "rb") as f: + try: + pcap = dpkt.pcap.Reader(f) + except ValueError: + f.seek(0) + pcap = dpkt.pcapng.Reader(f) + + count = 0 + for timestamp, buf in pcap: + if max_packets > 0 and count >= max_packets: + break + + packet = _parse_ethernet(timestamp, buf) + if packet is not None: + yield packet + count += 1 + + +def _parse_ethernet( + timestamp: float, + buf: bytes, +) -> PacketInfo | None: + """ + Parse an Ethernet frame into a PacketInfo + """ + import dpkt + + try: + eth = dpkt.ethernet.Ethernet(buf) + except (dpkt.NeedData, dpkt.UnpackError): + return None + + if not isinstance(eth.data, dpkt.ip.IP): + return None + + ip_pkt = eth.data + src_ip = socket.inet_ntoa(ip_pkt.src) + dst_ip = socket.inet_ntoa(ip_pkt.dst) + + if isinstance(ip_pkt.data, dpkt.tcp.TCP): + tcp = ip_pkt.data + return PacketInfo( + timestamp = timestamp, + src_ip = src_ip, + dst_ip = dst_ip, + src_port = tcp.sport, + dst_port = tcp.dport, + protocol = "tcp", + payload = bytes(tcp.data), + raw_length = len(buf), + tcp_flags = tcp.flags, + tcp_seq = tcp.seq, + ) + + if isinstance(ip_pkt.data, dpkt.udp.UDP): + udp = ip_pkt.data + return PacketInfo( + timestamp = timestamp, + src_ip = src_ip, + dst_ip = dst_ip, + src_port = udp.sport, + dst_port = udp.dport, + protocol = "udp", + payload = bytes(udp.data), + raw_length = len(buf), + ) + + return None diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/network/protocols.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/network/protocols.py new file mode 100644 index 0000000..edf2abd --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/network/protocols.py @@ -0,0 +1,250 @@ +""" +©AngelaMos | 2026 +protocols.py +""" + + +import socket +from dataclasses import dataclass, field + +import structlog + + +log = structlog.get_logger() + +HTTP_METHODS: frozenset[bytes] = frozenset( + { + b"GET", + b"POST", + b"PUT", + b"DELETE", + b"HEAD", + b"OPTIONS", + b"PATCH", + } +) + +HTTP_RESPONSE_PREFIX: bytes = b"HTTP/" +TLS_RECORD_PREFIX: bytes = b"\x16\x03" +SSH_PREFIX: bytes = b"SSH-" +SMTP_BANNER_PREFIX: bytes = b"220 " +DNS_PORT: int = 53 + +DNS_QTYPES: dict[int, + str] = { + 1: "A", + 2: "NS", + 5: "CNAME", + 6: "SOA", + 12: "PTR", + 15: "MX", + 16: "TXT", + 28: "AAAA", + 33: "SRV", + 255: "ANY", + } + + +@dataclass(frozen = True, slots = True) +class HttpMessage: + """ + Parsed HTTP request or response + """ + + method: str + uri: str + version: str + headers: dict[str, str] + body: str + is_request: bool + + +@dataclass(frozen = True, slots = True) +class DnsQuery: + """ + A single DNS query entry + """ + + name: str + query_type: str + query_class: str + + +@dataclass(frozen = True, slots = True) +class DnsRecord: + """ + Parsed DNS message with queries and answers + """ + + queries: list[DnsQuery] = field(default_factory = list) + answers: list[str] = field(default_factory = list) + is_response: bool = False + transaction_id: int = 0 + + +def parse_http( + payload: bytes, +) -> HttpMessage | None: + """ + Parse HTTP request or response from raw payload + """ + import dpkt + + try: + if _is_http_request(payload): + req = dpkt.http.Request(payload) + headers = dict(req.headers) + body = _decode_body(req.body) + return HttpMessage( + method = req.method, + uri = req.uri, + version = req.version, + headers = headers, + body = body, + is_request = True, + ) + + if payload.startswith(HTTP_RESPONSE_PREFIX): + resp = dpkt.http.Response(payload) + headers = dict(resp.headers) + body = _decode_body(resp.body) + return HttpMessage( + method = "", + uri = "", + version = resp.version, + headers = headers, + body = body, + is_request = False, + ) + except (dpkt.NeedData, dpkt.UnpackError): + return None + + return None + + +def parse_dns( + payload: bytes, +) -> DnsRecord | None: + """ + Parse DNS message from raw UDP payload + """ + import dpkt + + try: + dns = dpkt.dns.DNS(payload) + except (dpkt.NeedData, dpkt.UnpackError): + return None + + queries: list[DnsQuery] = [] + for qd in dns.qd: + qtype = DNS_QTYPES.get(qd.type, str(qd.type)) + queries.append( + DnsQuery( + name = qd.name, + query_type = qtype, + query_class = str(qd.cls), + ) + ) + + answers: list[str] = [] + for an in dns.an: + _parse_answer(an, answers) + + return DnsRecord( + queries = queries, + answers = answers, + is_response = bool(dns.qr), + transaction_id = dns.id, + ) + + +def identify_protocol( + payload: bytes, +) -> str: + """ + Identify application-layer protocol via DPI + """ + if not payload: + return "unknown" + + if _is_http_request(payload): + return "http" + + if payload.startswith(HTTP_RESPONSE_PREFIX): + return "http" + + if (len(payload) > 2 and payload[: 2] == TLS_RECORD_PREFIX): + return "tls" + + if payload.startswith(SSH_PREFIX): + return "ssh" + + if payload.startswith(SMTP_BANNER_PREFIX): + return "smtp" + + return "unknown" + + +def _is_http_request(payload: bytes) -> bool: + """ + Check if payload starts with an HTTP method + """ + first_space = payload.find(b" ") + if first_space < 3 or first_space > 7: + return False + return payload[: first_space] in HTTP_METHODS + + +def _decode_body(body: bytes | str) -> str: + """ + Decode HTTP body bytes to string + """ + if isinstance(body, str): + return body + if not body: + return "" + try: + return body.decode("utf-8", errors = "replace") + except Exception: + return "" + + +def _parse_answer( + an: object, + answers: list[str], +) -> None: + """ + Parse a single DNS answer record + """ + try: + an_type = getattr(an, "type", 0) + rdata = getattr(an, "rdata", b"") + + if an_type == 1 and len(rdata) == 4: + answers.append(socket.inet_ntoa(rdata)) + elif an_type == 16 and rdata: + answers.append(_parse_txt_rdata(rdata)) + elif hasattr(an, "cname") and an.cname: + answers.append(an.cname) + elif hasattr(an, "name") and an.name: + answers.append(an.name) + except Exception: + pass + + +def _parse_txt_rdata(rdata: bytes) -> str: + """ + Parse TXT record rdata (length-prefixed strings) + """ + parts: list[str] = [] + i = 0 + while i < len(rdata): + length = rdata[i] + i += 1 + if i + length <= len(rdata): + chunk = rdata[i : i + length] + parts.append(chunk.decode("utf-8", errors = "replace")) + i += length + else: + break + return " ".join(parts) diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/redaction.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/redaction.py new file mode 100644 index 0000000..2982866 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/redaction.py @@ -0,0 +1,84 @@ +""" +©AngelaMos | 2026 +redaction.py +""" + + +from dlp_scanner.constants import RedactionStyle + + +REDACTED_LABEL: str = "[REDACTED]" +MASK_CHAR: str = "*" +SNIPPET_CONTEXT_CHARS: int = 20 + + +def redact( + text: str, + start: int, + end: int, + style: RedactionStyle = "partial", +) -> str: + """ + Redact matched text according to the chosen strategy + """ + matched = text[start : end] + + if style == "none": + return _build_snippet(text, start, end, matched) + + if style == "full": + return _build_snippet(text, start, end, REDACTED_LABEL) + + redacted = _partial_redact(matched) + return _build_snippet(text, start, end, redacted) + + +def _partial_redact(value: str) -> str: + """ + Partially mask a value, keeping the last few chars visible + """ + stripped = value.replace("-", "").replace(" ", "") + + if len(stripped) >= 9 and stripped.isdigit(): + return MASK_CHAR * (len(value) - 4) + value[-4 :] + + if "@" in value: + local, domain = value.rsplit("@", maxsplit = 1) + masked_local = local[0] + MASK_CHAR * (len(local) - 1) + return f"{masked_local}@{domain}" + + if len(value) > 8: + visible = max(4, len(value) // 4) + return (MASK_CHAR * (len(value) - visible) + value[-visible :]) + + return MASK_CHAR * len(value) + + +def _build_snippet( + text: str, + start: int, + end: int, + replacement: str, +) -> str: + """ + Build a snippet with context around the redacted match + """ + context_start = max(0, start - SNIPPET_CONTEXT_CHARS) + context_end = min(len(text), end + SNIPPET_CONTEXT_CHARS) + + prefix = text[context_start : start] + suffix = text[end : context_end] + + prefix = prefix.replace("\n", " ").strip() + suffix = suffix.replace("\n", " ").strip() + + parts: list[str] = [] + if context_start > 0: + parts.append("...") + parts.append(prefix) + parts.append(replacement) + parts.append(suffix) + if context_end < len(text): + parts.append("...") + + return "".join(parts) diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/reporters/__init__.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/reporters/__init__.py new file mode 100644 index 0000000..e1add2a --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/reporters/__init__.py @@ -0,0 +1,4 @@ +""" +©AngelaMos | 2026 +__init__.py +""" diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/reporters/base.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/reporters/base.py new file mode 100644 index 0000000..05940a2 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/reporters/base.py @@ -0,0 +1,20 @@ +""" +©AngelaMos | 2026 +base.py +""" + + +from typing import Protocol + +from dlp_scanner.models import ScanResult + + +class Reporter(Protocol): + """ + Protocol for all report output formats + """ + def generate(self, result: ScanResult) -> str: + """ + Generate report content as a string + """ + ... diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/reporters/console.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/reporters/console.py new file mode 100644 index 0000000..7c3f631 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/reporters/console.py @@ -0,0 +1,162 @@ +""" +©AngelaMos | 2026 +console.py +""" + + +from rich.console import Console +from rich.table import Table + +from dlp_scanner.constants import SEVERITY_COLORS +from dlp_scanner.models import ScanResult + + +TRUNCATE_SNIPPET: int = 60 + + +class ConsoleReporter: + """ + Rich console output with severity-colored tables + """ + def __init__( + self, + console: Console | None = None, + ) -> None: + self._console = console or Console() + + def generate(self, result: ScanResult) -> str: + """ + Generate plain-text table for piping + """ + lines: list[str] = [] + lines.append( + f"Scan {result.scan_id} | " + f"{len(result.findings)} findings | " + f"{result.targets_scanned} targets" + ) + lines.append("") + + for finding in result.findings: + loc = finding.location.uri + if finding.location.line is not None: + loc += f":{finding.location.line}" + if finding.location.table_name: + loc += (f" [{finding.location.table_name}]") + + snippet = finding.redacted_snippet + if len(snippet) > TRUNCATE_SNIPPET: + snippet = (snippet[: TRUNCATE_SNIPPET] + "...") + + frameworks = ", ".join(finding.compliance_frameworks) + + lines.append( + f"[{finding.severity.upper()}] " + f"{finding.rule_name} | " + f"{loc} | " + f"{finding.confidence:.0%} | " + f"{snippet} | " + f"{frameworks}" + ) + + lines.append("") + lines.append(_format_summary(result)) + return "\n".join(lines) + + def display(self, result: ScanResult) -> None: + """ + Print Rich-formatted table to console + """ + self._console.print() + + if not result.findings: + self._console.print("[green]No findings detected.[/green]") + _print_summary(self._console, result) + return + + table = Table( + title = ( + f"DLP Scan Results " + f"({len(result.findings)} findings)" + ), + show_lines = True, + ) + + table.add_column("Severity", width = 10, justify = "center") + table.add_column("Rule", width = 25) + table.add_column("Location", width = 30) + table.add_column("Confidence", width = 10) + table.add_column("Snippet", width = 40) + table.add_column("Compliance", width = 20) + + for finding in result.findings: + color = SEVERITY_COLORS.get(finding.severity, "white") + + loc = finding.location.uri + if finding.location.line is not None: + loc += f":{finding.location.line}" + if finding.location.table_name: + loc += (f"\n[{finding.location.table_name}]") + + snippet = finding.redacted_snippet + if len(snippet) > TRUNCATE_SNIPPET: + snippet = (snippet[: TRUNCATE_SNIPPET] + "...") + + frameworks = "\n".join(finding.compliance_frameworks) + + table.add_row( + f"[{color}]{finding.severity.upper()}" + f"[/{color}]", + finding.rule_name, + loc, + f"{finding.confidence:.0%}", + snippet, + frameworks, + ) + + self._console.print(table) + _print_summary(self._console, result) + + if result.errors: + self._console.print() + self._console.print( + f"[yellow]{len(result.errors)} " + f"error(s) during scan[/yellow]" + ) + + +def _format_summary(result: ScanResult) -> str: + """ + Format summary statistics as plain text + """ + by_sev = result.findings_by_severity + parts: list[str] = [] + for sev in ("critical", "high", "medium", "low"): + count = by_sev.get(sev, 0) + if count > 0: + parts.append(f"{sev}: {count}") + + summary = " | ".join(parts) if parts else "clean" + return ( + f"Summary: {summary} " + f"({result.targets_scanned} targets scanned)" + ) + + +def _print_summary(console: Console, result: ScanResult) -> None: + """ + Print formatted summary using Rich + """ + console.print() + by_sev = result.findings_by_severity + parts: list[str] = [] + for sev in ("critical", "high", "medium", "low"): + count = by_sev.get(sev, 0) + if count > 0: + color = SEVERITY_COLORS.get(sev, "white") + parts.append(f"[{color}]{sev}: {count}[/{color}]") + + summary = (" | ".join(parts) if parts else "[green]clean") + console.print( + f"Summary: {summary} " + f"({result.targets_scanned} targets)" + ) diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/reporters/csv_report.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/reporters/csv_report.py new file mode 100644 index 0000000..6675a9a --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/reporters/csv_report.py @@ -0,0 +1,64 @@ +""" +©AngelaMos | 2026 +csv_report.py +""" + + +import csv +import io + +from dlp_scanner.models import ScanResult + + +CSV_COLUMNS: list[str] = [ + "finding_id", + "scan_date", + "severity", + "confidence", + "rule_id", + "rule_name", + "source_type", + "uri", + "line", + "column", + "table_name", + "redacted_snippet", + "compliance_frameworks", + "remediation", +] + + +class CsvReporter: + """ + CSV export for compliance team consumption + """ + def generate(self, result: ScanResult) -> str: + """ + Generate CSV report as a string + """ + output = io.StringIO() + writer = csv.writer(output) + writer.writerow(CSV_COLUMNS) + + for finding in result.findings: + frameworks = ";".join(finding.compliance_frameworks) + writer.writerow( + [ + finding.finding_id, + finding.detected_at.isoformat(), + finding.severity, + f"{finding.confidence:.4f}", + finding.rule_id, + finding.rule_name, + finding.location.source_type, + finding.location.uri, + finding.location.line or "", + finding.location.column or "", + finding.location.table_name or "", + finding.redacted_snippet, + frameworks, + finding.remediation, + ] + ) + + return output.getvalue() diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/reporters/json_report.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/reporters/json_report.py new file mode 100644 index 0000000..bb91c52 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/reporters/json_report.py @@ -0,0 +1,110 @@ +""" +©AngelaMos | 2026 +json_report.py +""" + + +from typing import Any + +import orjson + +from dlp_scanner.models import ScanResult + + +class JsonReporter: + """ + Structured JSON report with metadata and summary + """ + def generate(self, result: ScanResult) -> str: + """ + Generate JSON report as a formatted string + """ + report = _build_report(result) + return orjson.dumps( + report, + option = (orjson.OPT_INDENT_2 + | orjson.OPT_NON_STR_KEYS), + ).decode("utf-8") + + +def _build_report( + result: ScanResult, +) -> dict[str, + Any]: + """ + Build the complete report structure + """ + return { + "scan_metadata": _build_metadata(result), + "findings": [_serialize_finding(f) for f in result.findings], + "summary": _build_summary(result), + } + + +def _build_metadata( + result: ScanResult, +) -> dict[str, + Any]: + """ + Build scan metadata section + """ + return { + "scan_id": + result.scan_id, + "tool_version": + result.tool_version, + "scan_started_at": (result.scan_started_at.isoformat()), + "scan_completed_at": ( + result.scan_completed_at.isoformat() + if result.scan_completed_at else None + ), + "targets_scanned": + result.targets_scanned, + "total_findings": + len(result.findings), + "errors": + result.errors, + } + + +def _serialize_finding( + finding: Any, +) -> dict[str, + Any]: + """ + Serialize a single finding to dict + """ + return { + "finding_id": finding.finding_id, + "rule_id": finding.rule_id, + "rule_name": finding.rule_name, + "severity": finding.severity, + "confidence": round(finding.confidence, + 4), + "location": { + "source_type": (finding.location.source_type), + "uri": finding.location.uri, + "line": finding.location.line, + "column": finding.location.column, + "table_name": (finding.location.table_name), + "column_name": (finding.location.column_name), + }, + "redacted_snippet": (finding.redacted_snippet), + "compliance_frameworks": (finding.compliance_frameworks), + "remediation": finding.remediation, + "detected_at": (finding.detected_at.isoformat()), + } + + +def _build_summary( + result: ScanResult, +) -> dict[str, + Any]: + """ + Build summary statistics section + """ + return { + "by_severity": result.findings_by_severity, + "by_rule": result.findings_by_rule, + "by_framework": result.findings_by_framework, + } diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/reporters/sarif.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/reporters/sarif.py new file mode 100644 index 0000000..3d309eb --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/reporters/sarif.py @@ -0,0 +1,171 @@ +""" +©AngelaMos | 2026 +sarif.py +""" + + +from typing import Any + +import orjson + +from dlp_scanner.constants import SARIF_SEVERITY_MAP +from dlp_scanner.models import Finding, ScanResult + + +SARIF_SCHEMA: str = ( + "https://raw.githubusercontent.com/" + "oasis-tcs/sarif-spec/main/sarif-2.1/" + "schema/sarif-schema-2.1.0.json" +) +SARIF_VERSION: str = "2.1.0" +TOOL_NAME: str = "dlp-scanner" + + +class SarifReporter: + """ + SARIF 2.1.0 output for CI/CD integration + """ + def generate(self, result: ScanResult) -> str: + """ + Generate SARIF 2.1.0 report as JSON string + """ + sarif = _build_sarif(result) + return orjson.dumps( + sarif, + option = (orjson.OPT_INDENT_2 + | orjson.OPT_NON_STR_KEYS), + ).decode("utf-8") + + +def _build_sarif( + result: ScanResult, +) -> dict[str, + Any]: + """ + Build complete SARIF document + """ + rules = _collect_rules(result.findings) + results = [_build_result(f, rules) for f in result.findings] + + return { + "$schema": + SARIF_SCHEMA, + "version": + SARIF_VERSION, + "runs": [ + { + "tool": { + "driver": { + "name": TOOL_NAME, + "version": (result.tool_version), + "rules": list(rules.values()), + } + }, + "results": results, + } + ], + } + + +def _collect_rules( + findings: list[Finding], +) -> dict[str, + dict[str, + Any]]: + """ + Collect unique rules from findings + """ + rules: dict[str, dict[str, Any]] = {} + + for finding in findings: + if finding.rule_id in rules: + continue + + rules[finding.rule_id] = { + "id": finding.rule_id, + "name": finding.rule_name, + "shortDescription": { + "text": finding.rule_name, + }, + "properties": { + "compliance_frameworks": (finding.compliance_frameworks), + }, + } + + return rules + + +def _build_result( + finding: Finding, + rules: dict[str, + dict[str, + Any]], +) -> dict[str, + Any]: + """ + Build a single SARIF result entry + """ + level = SARIF_SEVERITY_MAP.get(finding.severity, "note") + + location = _build_location(finding) + + return { + "ruleId": finding.rule_id, + "ruleIndex": list(rules.keys()).index(finding.rule_id), + "level": level, + "message": { + "text": ( + f"{finding.rule_name} detected " + f"with {finding.confidence:.0%} " + f"confidence" + ), + }, + "locations": [location], + "properties": { + "confidence": round(finding.confidence, + 4), + "redactedSnippet": (finding.redacted_snippet), + "complianceFrameworks": (finding.compliance_frameworks), + "remediation": finding.remediation, + }, + } + + +def _build_location( + finding: Finding, +) -> dict[str, + Any]: + """ + Build SARIF location from finding + """ + loc = finding.location + + physical: dict[str, + Any] = { + "artifactLocation": { + "uri": loc.uri + }, + } + + region: dict[str, Any] = {} + if loc.line is not None: + region["startLine"] = loc.line + if loc.column is not None: + region["startColumn"] = loc.column + if region: + physical["region"] = region + + result: dict[str, + Any] = { + "physicalLocation": physical, + } + + if loc.table_name: + result["logicalLocations"] = [ + { + "name": loc.table_name, + "kind": "table", + } + ] + + return result diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/scanners/__init__.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/scanners/__init__.py new file mode 100644 index 0000000..e1add2a --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/scanners/__init__.py @@ -0,0 +1,4 @@ +""" +©AngelaMos | 2026 +__init__.py +""" diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/scanners/base.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/scanners/base.py new file mode 100644 index 0000000..74f56f8 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/scanners/base.py @@ -0,0 +1,20 @@ +""" +©AngelaMos | 2026 +base.py +""" + + +from typing import Protocol + +from dlp_scanner.models import ScanResult + + +class Scanner(Protocol): + """ + Protocol for all scan strategies + """ + def scan(self, target: str) -> ScanResult: + """ + Scan the target and return aggregated results + """ + ... diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/scanners/db_scanner.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/scanners/db_scanner.py new file mode 100644 index 0000000..125a632 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/scanners/db_scanner.py @@ -0,0 +1,530 @@ +""" +©AngelaMos | 2026 +db_scanner.py +""" + + +import asyncio +from datetime import datetime, UTC +from typing import Any +from urllib.parse import urlparse + +import structlog + +from dlp_scanner.config import ScanConfig +from dlp_scanner.constants import ( + TEXT_DB_COLUMN_TYPES_MYSQL, + TEXT_DB_COLUMN_TYPES_PG, +) +from dlp_scanner.detectors.base import DetectorMatch +from dlp_scanner.detectors.registry import DetectorRegistry +from dlp_scanner.models import ( + Location, + ScanResult, +) +from dlp_scanner.scoring import match_to_finding + + +log = structlog.get_logger() + +POSTGRES_SCHEMES: frozenset[str] = frozenset({ + "postgresql", + "postgres", +}) +MYSQL_SCHEMES: frozenset[str] = frozenset({ + "mysql", + "mysql+aiomysql", +}) +MONGODB_SCHEMES: frozenset[str] = frozenset({ + "mongodb", + "mongodb+srv", +}) +SQLITE_SCHEMES: frozenset[str] = frozenset({ + "sqlite", +}) + + +class DatabaseScanner: + """ + Scans database tables for sensitive data in text columns + """ + def __init__( + self, + config: ScanConfig, + registry: DetectorRegistry, + ) -> None: + self._db_config = config.database + self._detection_config = config.detection + self._redaction_style = config.output.redaction_style + self._registry = registry + + def scan(self, target: str) -> ScanResult: + """ + Scan a database identified by connection URI + """ + return asyncio.run(self._scan_async(target)) + + async def _scan_async( + self, + connection_uri: str, + ) -> ScanResult: + """ + Dispatch to the appropriate database scanner + """ + result = ScanResult() + parsed = urlparse(connection_uri) + scheme = parsed.scheme.lower() + + try: + if scheme in POSTGRES_SCHEMES: + await self._scan_postgres(connection_uri, result) + elif scheme in MYSQL_SCHEMES: + await self._scan_mysql(connection_uri, result) + elif scheme in MONGODB_SCHEMES: + await self._scan_mongodb(connection_uri, result) + elif scheme in SQLITE_SCHEMES: + await self._scan_sqlite(connection_uri, result) + else: + result.errors.append( + f"Unsupported database scheme: " + f"{scheme}" + ) + except Exception as exc: + log.warning( + "database_scan_failed", + scheme = scheme, + error = str(exc), + ) + result.errors.append(f"Database scan failed: {exc}") + + result.scan_completed_at = datetime.now(UTC) + return result + + async def _scan_postgres( + self, + uri: str, + result: ScanResult, + ) -> None: + """ + Scan PostgreSQL using asyncpg with TABLESAMPLE + """ + import asyncpg + + conn = await asyncpg.connect( + uri, + timeout = self._db_config.timeout_seconds, + ) + + try: + tables = await self._get_pg_tables(conn) + tables = self._filter_tables(tables) + + for table_name in tables: + text_cols = ( + await self._get_pg_text_columns(conn, + table_name) + ) + if not text_cols: + continue + + col_list = ", ".join(f'"{c}"' for c in text_cols) + query = ( + f"SELECT {col_list} " + f'FROM "{table_name}" ' + f"TABLESAMPLE BERNOULLI(" + f"{self._db_config.sample_percentage}" + f") LIMIT " + f"{self._db_config.max_rows_per_table}" + ) + + rows = await conn.fetch(query) + self._process_record_rows( + rows, + text_cols, + table_name, + uri, + result, + ) + result.targets_scanned += 1 + finally: + await conn.close() + + async def _get_pg_tables( + self, + conn: Any, + ) -> list[str]: + """ + List user tables in PostgreSQL + """ + rows = await conn.fetch( + "SELECT table_name " + "FROM information_schema.tables " + "WHERE table_schema = 'public' " + "AND table_type = 'BASE TABLE'" + ) + return [r["table_name"] for r in rows] + + async def _get_pg_text_columns( + self, + conn: Any, + table_name: str, + ) -> list[str]: + """ + Find text-type columns in a PostgreSQL table + """ + rows = await conn.fetch( + "SELECT column_name " + "FROM information_schema.columns " + "WHERE table_name = $1 " + "AND data_type = ANY($2::text[])", + table_name, + list(TEXT_DB_COLUMN_TYPES_PG), + ) + return [r["column_name"] for r in rows] + + async def _scan_mysql( + self, + uri: str, + result: ScanResult, + ) -> None: + """ + Scan MySQL using aiomysql with random sampling + """ + import aiomysql + + parsed = urlparse(uri) + conn = await aiomysql.connect( + host = parsed.hostname or "localhost", + port = parsed.port or 3306, + user = parsed.username or "root", + password = parsed.password or "", + db = parsed.path.lstrip("/"), + connect_timeout = (self._db_config.timeout_seconds), + ) + + try: + async with conn.cursor(aiomysql.DictCursor) as cur: + await cur.execute( + "SELECT table_name " + "FROM information_schema.tables " + "WHERE table_schema = DATABASE() " + "AND table_type = 'BASE TABLE'" + ) + raw_tables = await cur.fetchall() + tables = [r["TABLE_NAME"] for r in raw_tables] + tables = self._filter_tables(tables) + + for table_name in tables: + text_cols = ( + await self._get_mysql_text_cols(cur, + table_name) + ) + if not text_cols: + continue + + col_list = ", ".join(f"`{c}`" for c in text_cols) + limit = (self._db_config.max_rows_per_table) + await cur.execute( + f"SELECT {col_list} " + f"FROM `{table_name}` " + f"ORDER BY RAND() " + f"LIMIT {limit}" + ) + rows = await cur.fetchall() + self._process_dict_rows( + rows, + text_cols, + table_name, + uri, + result, + ) + result.targets_scanned += 1 + finally: + conn.close() + + async def _get_mysql_text_cols( + self, + cursor: Any, + table_name: str, + ) -> list[str]: + """ + Find text-type columns in a MySQL table + """ + placeholders = ",".join(["%s"] * len(TEXT_DB_COLUMN_TYPES_MYSQL)) + await cursor.execute( + "SELECT column_name " + "FROM information_schema.columns " + "WHERE table_name = %s " + "AND table_schema = DATABASE() " + f"AND data_type IN ({placeholders})", + (table_name, + *TEXT_DB_COLUMN_TYPES_MYSQL), + ) + rows = await cursor.fetchall() + return [r["COLUMN_NAME"] for r in rows] + + async def _scan_mongodb( + self, + uri: str, + result: ScanResult, + ) -> None: + """ + Scan MongoDB collections using pymongo async + """ + from pymongo import AsyncMongoClient + + parsed = urlparse(uri) + db_name = parsed.path.lstrip("/").split("?")[0] + + if not db_name: + result.errors.append("MongoDB URI must include database name") + return + + client: AsyncMongoClient[dict[str, Any]] = (AsyncMongoClient(uri)) + + try: + db = client[db_name] + collections = (await db.list_collection_names()) + collections = self._filter_tables(collections) + + for coll_name in collections: + coll = db[coll_name] + sample_size = (self._db_config.max_rows_per_table) + cursor = coll.aggregate( + [{ + "$sample": { + "size": sample_size + } + }] + ) + + async for doc in cursor: + text_parts: list[str] = [] + _extract_mongo_strings(doc, text_parts) + if not text_parts: + continue + + combined = "\n".join(text_parts) + matches = self._registry.detect(combined) + self._append_findings( + matches, + combined, + table_name = coll_name, + uri = uri, + result = result, + ) + + result.targets_scanned += 1 + finally: + client.close() + + async def _scan_sqlite( + self, + uri: str, + result: ScanResult, + ) -> None: + """ + Scan SQLite database using aiosqlite + """ + import aiosqlite + + parsed = urlparse(uri) + db_path = parsed.path + while db_path.startswith("//"): + db_path = db_path[1 :] + + async with aiosqlite.connect(db_path) as db: + cursor = await db.execute( + "SELECT name FROM sqlite_master " + "WHERE type = 'table' " + "AND name NOT LIKE 'sqlite_%'" + ) + rows = await cursor.fetchall() + tables = [r[0] for r in rows] + tables = self._filter_tables(tables) + + for table_name in tables: + text_cols = ( + await self._get_sqlite_text_cols(db, + table_name) + ) + if not text_cols: + continue + + col_list = ", ".join(f'"{c}"' for c in text_cols) + limit = (self._db_config.max_rows_per_table) + cursor = await db.execute( + f"SELECT {col_list} " + f'FROM "{table_name}" ' + f"ORDER BY RANDOM() " + f"LIMIT {limit}" + ) + fetched = await cursor.fetchall() + for row in fetched: + for idx, col_name in enumerate(text_cols): + val = row[idx] + if val is None: + continue + text = str(val) + if not text.strip(): + continue + matches = self._registry.detect(text) + self._append_findings( + matches, + text, + table_name = table_name, + column_name = col_name, + uri = uri, + result = result, + ) + result.targets_scanned += 1 + + async def _get_sqlite_text_cols( + self, + db: Any, + table_name: str, + ) -> list[str]: + """ + Find text-type columns in a SQLite table + """ + cursor = await db.execute(f'PRAGMA table_info("{table_name}")') + rows = await cursor.fetchall() + text_types = frozenset({"text", "varchar", "char", "clob"}) + return [ + r[1] + for r in rows + if r[2].lower() in text_types or "text" in r[2].lower() + ] + + def _filter_tables( + self, + tables: list[str], + ) -> list[str]: + """ + Apply include/exclude table filters + """ + include = self._db_config.include_tables + exclude = frozenset(self._db_config.exclude_tables) + + if include: + include_set = frozenset(include) + tables = [t for t in tables if t in include_set] + + return [t for t in tables if t not in exclude] + + def _process_record_rows( + self, + rows: list[Any], + columns: list[str], + table_name: str, + uri: str, + result: ScanResult, + ) -> None: + """ + Process asyncpg Record rows through detection + """ + for row in rows: + for col_name in columns: + val = row[col_name] + if val is None: + continue + text = str(val) + if not text.strip(): + continue + matches = self._registry.detect(text) + self._append_findings( + matches, + text, + table_name = table_name, + column_name = col_name, + uri = uri, + result = result, + ) + + def _process_dict_rows( + self, + rows: list[dict[str, + Any]], + columns: list[str], + table_name: str, + uri: str, + result: ScanResult, + ) -> None: + """ + Process dictionary rows through detection + """ + for row in rows: + for col_name in columns: + val = row.get(col_name) + if val is None: + continue + text = str(val) + if not text.strip(): + continue + matches = self._registry.detect(text) + self._append_findings( + matches, + text, + table_name = table_name, + column_name = col_name, + uri = uri, + result = result, + ) + + def _append_findings( + self, + matches: list[DetectorMatch], + text: str, + table_name: str, + uri: str, + result: ScanResult, + column_name: str = "", + ) -> None: + """ + Convert detector matches to findings and append + """ + min_confidence = (self._detection_config.min_confidence) + + location = Location( + source_type = "database", + uri = uri, + table_name = table_name, + column_name = column_name or None, + ) + + for match in matches: + if match.score < min_confidence: + continue + + finding = match_to_finding( + match, + text, + location, + self._redaction_style, + ) + result.findings.append(finding) + + +def _extract_mongo_strings( + doc: dict[str, + Any], + parts: list[str], + prefix: str = "", +) -> None: + """ + Recursively extract string values from a MongoDB document + """ + for key, val in doc.items(): + if key == "_id": + continue + key_path = (f"{prefix}.{key}" if prefix else key) + if isinstance(val, str) and val.strip(): + parts.append(f"{key_path}: {val}") + elif isinstance(val, dict): + _extract_mongo_strings(val, parts, key_path) + elif isinstance(val, list): + for item in val: + if (isinstance(item, str) and item.strip()): + parts.append(f"{key_path}: {item}") + elif isinstance(item, dict): + _extract_mongo_strings(item, parts, key_path) diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/scanners/file_scanner.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/scanners/file_scanner.py new file mode 100644 index 0000000..b83ac5a --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/scanners/file_scanner.py @@ -0,0 +1,224 @@ +""" +©AngelaMos | 2026 +file_scanner.py +""" + + +import fnmatch +from datetime import datetime, UTC +from pathlib import Path + +import structlog + +from dlp_scanner.config import ScanConfig +from dlp_scanner.detectors.registry import DetectorRegistry +from dlp_scanner.extractors.archive import ArchiveExtractor +from dlp_scanner.extractors.base import Extractor +from dlp_scanner.extractors.email import ( + EmlExtractor, + MsgExtractor, +) +from dlp_scanner.extractors.office import ( + DocxExtractor, + XlsExtractor, + XlsxExtractor, +) +from dlp_scanner.extractors.pdf import PDFExtractor +from dlp_scanner.extractors.plaintext import ( + PlaintextExtractor, +) +from dlp_scanner.extractors.structured import ( + AvroExtractor, + CsvExtractor, + JsonExtractor, + ParquetExtractor, + XmlExtractor, + YamlExtractor, +) +from dlp_scanner.models import ( + ScanResult, + TextChunk, +) +from dlp_scanner.scoring import match_to_finding + + +log = structlog.get_logger() + +MB_BYTES: int = 1024 * 1024 + + +class FileScanner: + """ + Scans files in a directory tree for sensitive data + """ + def __init__( + self, + config: ScanConfig, + registry: DetectorRegistry, + ) -> None: + self._file_config = config.file + self._detection_config = config.detection + self._redaction_style = config.output.redaction_style + self._registry = registry + self._extension_map = _build_extension_map() + self._allowed_extensions = frozenset( + self._file_config.include_extensions + ) + + def scan(self, target: str) -> ScanResult: + """ + Walk a directory and scan all matching files + """ + result = ScanResult() + target_path = Path(target) + + if target_path.is_file(): + self._scan_file(target_path, result) + result.targets_scanned = 1 + elif target_path.is_dir(): + self._scan_directory(target_path, result) + else: + result.errors.append(f"Target not found: {target}") + + result.scan_completed_at = datetime.now(UTC) + return result + + def _scan_directory( + self, + directory: Path, + result: ScanResult, + ) -> None: + """ + Recursively walk a directory and scan matching files + """ + max_bytes = (self._file_config.max_file_size_mb * MB_BYTES) + iterator = ( + directory.rglob("*") + if self._file_config.recursive else directory.glob("*") + ) + + for path in iterator: + if not path.is_file(): + continue + + if self._is_excluded(path, directory): + continue + + suffix = _get_full_suffix(path) + if suffix not in self._allowed_extensions: + continue + + try: + file_size = path.stat().st_size + except OSError: + continue + + if file_size > max_bytes: + log.debug( + "file_skipped_too_large", + path = str(path), + size = file_size, + ) + continue + + if file_size == 0: + continue + + self._scan_file(path, result) + result.targets_scanned += 1 + + def _scan_file( + self, + path: Path, + result: ScanResult, + ) -> None: + """ + Extract text from a single file and run detection + """ + suffix = _get_full_suffix(path) + extractor = self._extension_map.get(suffix) + + if extractor is None: + return + + try: + chunks = extractor.extract(str(path)) + except Exception: + log.warning("extraction_failed", path = str(path)) + result.errors.append(f"Extraction failed: {path}") + return + + min_confidence = (self._detection_config.min_confidence) + + for chunk in chunks: + matches = self._registry.detect(chunk.text) + for match in matches: + if match.score < min_confidence: + continue + + finding = match_to_finding( + match, + chunk.text, + chunk.location, + self._redaction_style, + ) + result.findings.append(finding) + + def _is_excluded( + self, + path: Path, + base: Path, + ) -> bool: + """ + Check if a path matches any exclude pattern + """ + relative = str(path.relative_to(base)) + for pattern in self._file_config.exclude_patterns: + if fnmatch.fnmatch(relative, pattern): + return True + if fnmatch.fnmatch(path.name, pattern): + return True + if any(fnmatch.fnmatch(part, pattern) for part in path.parts): + return True + return False + + +def _build_extension_map() -> dict[str, Extractor]: + """ + Build a mapping from file extension to extractor instance + """ + extractors: list[Extractor] = [ + PlaintextExtractor(), + PDFExtractor(), + DocxExtractor(), + XlsxExtractor(), + XlsExtractor(), + CsvExtractor(), + JsonExtractor(), + XmlExtractor(), + YamlExtractor(), + ParquetExtractor(), + AvroExtractor(), + ArchiveExtractor(), + EmlExtractor(), + MsgExtractor(), + ] + + ext_map: dict[str, Extractor] = {} + for extractor in extractors: + for ext in extractor.supported_extensions: + ext_map[ext] = extractor + + return ext_map + + +def _get_full_suffix(path: Path) -> str: + """ + Get full suffix including compound extensions + """ + name = path.name + if name.endswith(".tar.gz"): + return ".tar.gz" + if name.endswith(".tar.bz2"): + return ".tar.bz2" + return path.suffix.lower() diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/scanners/network_scanner.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/scanners/network_scanner.py new file mode 100644 index 0000000..2ebc54a --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/scanners/network_scanner.py @@ -0,0 +1,338 @@ +""" +©AngelaMos | 2026 +network_scanner.py +""" + + +from datetime import datetime, UTC +from pathlib import Path + +import structlog + +from dlp_scanner.config import ScanConfig +from dlp_scanner.detectors.base import DetectorMatch +from dlp_scanner.detectors.registry import DetectorRegistry +from dlp_scanner.models import ( + Finding, + Location, + ScanResult, +) +from dlp_scanner.network.exfiltration import ( + DnsExfilDetector, + ExfilIndicator, + detect_base64_payload, +) +from dlp_scanner.network.flow_tracker import ( + FlowTracker, + make_flow_key, +) +from dlp_scanner.network.pcap import read_pcap +from dlp_scanner.network.protocols import ( + DNS_PORT, + identify_protocol, + parse_dns, + parse_http, +) +from dlp_scanner.scoring import match_to_finding + + +log = structlog.get_logger() + +EXFIL_RULE_MAP: dict[str, tuple[str, str]] = { + "dns_long_label": ( + "NET_DNS_EXFIL_LONG_LABEL", + "DNS Exfiltration: Long Label", + ), + "dns_high_entropy": ( + "NET_DNS_EXFIL_HIGH_ENTROPY", + "DNS Exfiltration: High Entropy Subdomain", + ), + "dns_long_qname": ( + "NET_DNS_EXFIL_LONG_QNAME", + "DNS Exfiltration: Long QNAME", + ), + "dns_txt_volume": ( + "NET_DNS_EXFIL_TXT_VOLUME", + "DNS Exfiltration: High TXT Volume", + ), + "base64_payload": ( + "NET_ENCODED_BASE64", + "Encoded Payload: Base64", + ), + "hex_payload": ( + "NET_ENCODED_HEX", + "Encoded Payload: Hex", + ), +} + + +class NetworkScanner: + """ + Scans network capture files for sensitive data in transit + """ + def __init__( + self, + config: ScanConfig, + registry: DetectorRegistry, + ) -> None: + self._net_config = config.network + self._detection_config = config.detection + self._redaction_style = config.output.redaction_style + self._registry = registry + + def scan(self, target: str) -> ScanResult: + """ + Scan a PCAP file for sensitive data in payloads + """ + result = ScanResult() + target_path = Path(target) + + if not target_path.exists(): + result.errors.append(f"PCAP file not found: {target}") + result.scan_completed_at = datetime.now(UTC) + return result + + try: + self._scan_pcap(target_path, result) + except Exception as exc: + log.warning( + "pcap_scan_failed", + path = str(target_path), + error = str(exc), + ) + result.errors.append(f"PCAP scan failed: {exc}") + + result.scan_completed_at = datetime.now(UTC) + return result + + def _scan_pcap( + self, + path: Path, + result: ScanResult, + ) -> None: + """ + Read packets, reassemble flows, and run detection + """ + tracker = FlowTracker() + dns_detector = DnsExfilDetector( + entropy_threshold = ( + self._net_config.dns_label_entropy_threshold + ), + ) + packet_count = 0 + + for packet in read_pcap( + path, + max_packets = self._net_config.max_packets, + ): + packet_count += 1 + tracker.add_packet(packet) + + if ( + packet.protocol == "udp" + and ( + packet.src_port == DNS_PORT + or packet.dst_port == DNS_PORT + ) + ): + self._process_dns_packet( + packet.payload, + packet.src_ip, + packet.dst_ip, + path, + packet_count, + dns_detector, + result, + ) + + if packet.payload: + exfil_indicators = detect_base64_payload( + packet.payload, + src_ip = packet.src_ip, + dst_ip = packet.dst_ip, + ) + for indicator in exfil_indicators: + finding = _indicator_to_finding( + indicator, + str(path), + packet_count, + ) + result.findings.append(finding) + + txt_indicators = dns_detector.check_txt_volume() + for indicator in txt_indicators: + finding = _indicator_to_finding( + indicator, + str(path), + packet_count, + ) + result.findings.append(finding) + + self._scan_reassembled_flows(tracker, path, result) + + result.targets_scanned = packet_count + + def _process_dns_packet( + self, + payload: bytes, + src_ip: str, + dst_ip: str, + path: Path, + packet_num: int, + dns_detector: DnsExfilDetector, + result: ScanResult, + ) -> None: + """ + Parse DNS and check for exfiltration patterns + """ + dns_record = parse_dns(payload) + if dns_record is None: + return + + for query in dns_record.queries: + indicator = dns_detector.analyze_query( + query, + src_ip, + dst_ip, + ) + if indicator is not None: + finding = _indicator_to_finding( + indicator, + str(path), + packet_num, + ) + result.findings.append(finding) + + def _scan_reassembled_flows( + self, + tracker: FlowTracker, + path: Path, + result: ScanResult, + ) -> None: + """ + Reassemble TCP streams and scan for sensitive data + """ + min_confidence = self._detection_config.min_confidence + + for flow in tracker.get_flows(): + key = ( + flow.src_ip, + flow.dst_ip, + flow.src_port, + flow.dst_port, + ) + stream = tracker.reassemble_stream(key) + if not stream: + continue + + protocol = identify_protocol(stream) + text = self._extract_scannable_text( + stream, + protocol, + ) + + if not text or not text.strip(): + continue + + matches = self._registry.detect(text) + + location = Location( + source_type = "network", + uri = str(path), + ) + + for match in matches: + if match.score < min_confidence: + continue + + finding = match_to_finding( + match, + text, + location, + self._redaction_style, + ) + result.findings.append(finding) + + def _extract_scannable_text( + self, + stream: bytes, + protocol: str, + ) -> str: + """ + Extract text content from a reassembled stream + """ + if protocol == "http": + return self._extract_http_text(stream) + + if protocol in ("tls", "ssh"): + return "" + + try: + return stream.decode("utf-8", errors = "replace") + except Exception: + return "" + + def _extract_http_text( + self, + stream: bytes, + ) -> str: + """ + Extract scannable text from HTTP messages + """ + http_msg = parse_http(stream) + if http_msg is None: + try: + return stream.decode( + "utf-8", + errors = "replace", + ) + except Exception: + return "" + + parts: list[str] = [] + + if http_msg.is_request and http_msg.uri: + parts.append(http_msg.uri) + + for header_name in ("cookie", "authorization", "set-cookie"): + val = http_msg.headers.get(header_name, "") + if val: + parts.append(val) + + if http_msg.body: + parts.append(http_msg.body) + + return "\n".join(parts) + + +def _indicator_to_finding( + indicator: ExfilIndicator, + uri: str, + packet_num: int, +) -> Finding: + """ + Convert an exfiltration indicator to a Finding + """ + rule_id, rule_name = EXFIL_RULE_MAP.get( + indicator.indicator_type, + ("NET_EXFIL_UNKNOWN", "Network Exfiltration Indicator"), + ) + + severity = "high" if indicator.confidence >= 0.70 else "medium" + + location = Location( + source_type = "network", + uri = uri, + byte_offset = packet_num, + ) + + return Finding( + rule_id = rule_id, + rule_name = rule_name, + severity = severity, + confidence = indicator.confidence, + location = location, + redacted_snippet = indicator.evidence[:120], + compliance_frameworks = [], + remediation = indicator.description, + ) diff --git a/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/scoring.py b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/scoring.py new file mode 100644 index 0000000..48b5682 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/src/dlp_scanner/scoring.py @@ -0,0 +1,52 @@ +""" +©AngelaMos | 2026 +scoring.py +""" + + +from dlp_scanner.compliance import ( + get_frameworks_for_rule, + get_remediation_for_rule, + score_to_severity, +) +from dlp_scanner.constants import RedactionStyle +from dlp_scanner.detectors.base import DetectorMatch +from dlp_scanner.models import Finding, Location +from dlp_scanner.redaction import redact + + +def match_to_finding( + match: DetectorMatch, + text: str, + location: Location, + redaction_style: RedactionStyle, +) -> Finding: + """ + Convert a detector match into a fully classified finding + """ + severity = score_to_severity(match.score) + frameworks = get_frameworks_for_rule(match.rule_id) + if match.compliance_frameworks: + combined = ( + set(frameworks) | set(match.compliance_frameworks) + ) + frameworks = sorted(combined) + remediation = get_remediation_for_rule(match.rule_id) + + snippet = redact( + text, + match.start, + match.end, + style = redaction_style, + ) + + return Finding( + rule_id = match.rule_id, + rule_name = match.rule_name, + severity = severity, + confidence = match.score, + location = location, + redacted_snippet = snippet, + compliance_frameworks = frameworks, + remediation = remediation, + ) diff --git a/PROJECTS/intermediate/dlp-scanner/tests/__init__.py b/PROJECTS/intermediate/dlp-scanner/tests/__init__.py new file mode 100644 index 0000000..e1add2a --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/__init__.py @@ -0,0 +1,4 @@ +""" +©AngelaMos | 2026 +__init__.py +""" diff --git a/PROJECTS/intermediate/dlp-scanner/tests/conftest.py b/PROJECTS/intermediate/dlp-scanner/tests/conftest.py new file mode 100644 index 0000000..910839d --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/conftest.py @@ -0,0 +1,87 @@ +""" +©AngelaMos | 2026 +conftest.py +""" + + +import tempfile +from pathlib import Path +from collections.abc import Generator + +import pytest + +from dlp_scanner.config import ScanConfig +from dlp_scanner.models import Finding, Location + + +@pytest.fixture +def default_config() -> ScanConfig: + """ + Provide a default ScanConfig instance + """ + return ScanConfig() + + +@pytest.fixture +def sample_location() -> Location: + """ + Provide a sample file location + """ + return Location( + source_type = "file", + uri = "test/employees.csv", + line = 42, + column = 15, + ) + + +@pytest.fixture +def sample_finding(sample_location: Location) -> Finding: + """ + Provide a sample finding + """ + return Finding( + rule_id = "PII_SSN", + rule_name = "US Social Security Number", + severity = "critical", + confidence = 0.95, + location = sample_location, + redacted_snippet = "...SSN: ***-**-6789...", + compliance_frameworks = ["HIPAA", + "CCPA"], + remediation = "Encrypt or remove SSN data", + ) + + +@pytest.fixture +def temp_dir() -> Generator[Path, None, None]: + """ + Provide a temporary directory for test files + """ + with tempfile.TemporaryDirectory() as tmpdir: + yield Path(tmpdir) + + +@pytest.fixture +def temp_dir_with_pii( + temp_dir: Path, +) -> Path: + """ + Provide a temp directory containing files with known PII + """ + csv_path = temp_dir / "employees.csv" + csv_path.write_text( + "name,ssn,email\n" + "John Doe,123-45-6789,john@example.com\n" + "Jane Smith,987-65-4321,jane@example.com\n" + ) + + txt_path = temp_dir / "clean.txt" + txt_path.write_text("No sensitive data here at all.") + + json_path = temp_dir / "config.json" + json_path.write_text( + '{"api_key": "sk_live_abc123def456ghi789jkl012mno345"}\n' + ) + + return temp_dir diff --git a/PROJECTS/intermediate/dlp-scanner/tests/test_cli.py b/PROJECTS/intermediate/dlp-scanner/tests/test_cli.py new file mode 100644 index 0000000..6d28a67 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/test_cli.py @@ -0,0 +1,312 @@ +""" +©AngelaMos | 2026 +test_cli.py +""" + + +import json +import tempfile +from pathlib import Path +from collections.abc import Generator + +import pytest +from typer.testing import CliRunner + +from dlp_scanner.cli import app + + +runner = CliRunner() + + +@pytest.fixture +def pii_dir() -> Generator[Path, None, None]: + """ + Provide a temp directory with valid detectable SSNs + """ + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + csv_path = root / "employees.csv" + csv_path.write_text( + "name,ssn\n" + "Alice,456-78-9012\n" + "Bob,234-56-7890\n" + ) + yield root + + +@pytest.fixture +def json_result_file() -> Generator[Path, None, None]: + """ + Provide a JSON scan results file for report tests + """ + data = { + "scan_metadata": { + "scan_id": "test-123", + "tool_version": "0.1.0", + "scan_started_at": ("2026-01-01T00:00:00+00:00"), + "scan_completed_at": ("2026-01-01T00:01:00+00:00"), + "targets_scanned": 1, + "total_findings": 1, + "errors": [], + }, + "findings": [ + { + "finding_id": "f-001", + "rule_id": "PII_SSN", + "rule_name": ("US Social Security Number"), + "severity": "critical", + "confidence": 0.95, + "location": { + "source_type": "file", + "uri": "data.csv", + "line": 5, + "column": None, + "table_name": None, + "column_name": None, + }, + "redacted_snippet": "***-**-6789", + "compliance_frameworks": [ + "HIPAA", + "CCPA", + ], + "remediation": "Encrypt data", + "detected_at": ("2026-01-01T00:00:30+00:00"), + } + ], + "summary": { + "by_severity": { + "critical": 1 + }, + "by_rule": { + "PII_SSN": 1 + }, + "by_framework": { + "HIPAA": 1, + "CCPA": 1, + }, + }, + } + + with tempfile.NamedTemporaryFile( + suffix = ".json", + delete = False, + mode = "w", + ) as f: + json.dump(data, f) + path = Path(f.name) + + yield path + path.unlink(missing_ok = True) + + +class TestCliHelp: + def test_help_shows_commands(self) -> None: + result = runner.invoke(app, ["--help"]) + assert result.exit_code == 0 + assert "file" in result.output + assert "db" in result.output + assert "network" in result.output + assert "report" in result.output + + def test_version(self) -> None: + result = runner.invoke(app, ["--version"]) + assert result.exit_code == 0 + assert "0.1.0" in result.output + + def test_file_help(self) -> None: + result = runner.invoke(app, ["file", "--help"]) + assert result.exit_code == 0 + assert "TARGET" in result.output + + def test_db_help(self) -> None: + result = runner.invoke(app, ["db", "--help"]) + assert result.exit_code == 0 + assert "TARGET" in result.output + + def test_network_help(self) -> None: + result = runner.invoke(app, ["network", "--help"]) + assert result.exit_code == 0 + assert "TARGET" in result.output + + def test_report_help(self) -> None: + result = runner.invoke(app, ["report", "--help"]) + assert result.exit_code == 0 + assert "convert" in result.output + assert "summary" in result.output + + +class TestFileScan: + def test_scan_console_output(self, pii_dir: Path) -> None: + result = runner.invoke(app, ["file", str(pii_dir)]) + assert result.exit_code == 0 + + def test_scan_json_output(self, pii_dir: Path) -> None: + result = runner.invoke( + app, + ["file", + str(pii_dir), + "-f", + "json"], + ) + assert result.exit_code == 0 + data = json.loads(result.output) + assert "findings" in data + + def test_scan_to_file(self, pii_dir: Path) -> None: + with tempfile.NamedTemporaryFile(suffix = ".json", + delete = False) as f: + out_path = f.name + + result = runner.invoke( + app, + [ + "file", + str(pii_dir), + "-f", + "json", + "-o", + out_path, + ], + ) + assert "Report written" in result.output + content = Path(out_path).read_text() + assert "findings" in content + Path(out_path).unlink(missing_ok = True) + + def test_scan_nonexistent_target(self) -> None: + result = runner.invoke(app, ["file", "/no/such/path"]) + assert result.exit_code == 1 + + def test_invalid_format(self, pii_dir: Path) -> None: + result = runner.invoke( + app, + [ + "file", + str(pii_dir), + "-f", + "invalid", + ], + ) + assert result.exit_code == 1 + + def test_with_config_flag(self, pii_dir: Path) -> None: + result = runner.invoke( + app, + [ + "--config", + "nonexistent.yml", + "file", + str(pii_dir), + ], + ) + assert result.exit_code == 0 + + def test_with_verbose_flag(self, pii_dir: Path) -> None: + result = runner.invoke( + app, + ["--verbose", + "file", + str(pii_dir)], + ) + assert result.exit_code == 0 + + +class TestReportCommands: + def test_convert_to_sarif(self, json_result_file: Path) -> None: + result = runner.invoke( + app, + [ + "report", + "convert", + str(json_result_file), + "-f", + "sarif", + ], + ) + assert result.exit_code == 0 + data = json.loads(result.output) + assert data["version"] == "2.1.0" + + def test_convert_to_csv(self, json_result_file: Path) -> None: + result = runner.invoke( + app, + [ + "report", + "convert", + str(json_result_file), + "-f", + "csv", + ], + ) + assert result.exit_code == 0 + assert "PII_SSN" in result.output + + def test_convert_to_file(self, json_result_file: Path) -> None: + with tempfile.NamedTemporaryFile(suffix = ".sarif", + delete = False) as f: + out_path = f.name + + result = runner.invoke( + app, + [ + "report", + "convert", + str(json_result_file), + "-f", + "sarif", + "-o", + out_path, + ], + ) + assert result.exit_code == 0 + assert "Converted" in result.output + content = Path(out_path).read_text() + data = json.loads(content) + assert data["version"] == "2.1.0" + Path(out_path).unlink(missing_ok = True) + + def test_convert_missing_file(self) -> None: + result = runner.invoke( + app, + [ + "report", + "convert", + "/no/such/file.json", + ], + ) + assert result.exit_code == 1 + + def test_convert_invalid_format(self, json_result_file: Path) -> None: + result = runner.invoke( + app, + [ + "report", + "convert", + str(json_result_file), + "-f", + "invalid", + ], + ) + assert result.exit_code == 1 + + def test_summary(self, json_result_file: Path) -> None: + result = runner.invoke( + app, + [ + "report", + "summary", + str(json_result_file), + ], + ) + assert result.exit_code == 0 + + def test_summary_missing_file(self) -> None: + result = runner.invoke( + app, + [ + "report", + "summary", + "/no/such/file.json", + ], + ) + assert result.exit_code == 1 diff --git a/PROJECTS/intermediate/dlp-scanner/tests/test_compliance.py b/PROJECTS/intermediate/dlp-scanner/tests/test_compliance.py new file mode 100644 index 0000000..220bd67 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/test_compliance.py @@ -0,0 +1,71 @@ +""" +©AngelaMos | 2026 +test_compliance.py +""" + + +from dlp_scanner.compliance import ( + get_frameworks_for_rule, + get_remediation_for_rule, + score_to_severity, + DEFAULT_REMEDIATION, +) + + +class TestScoreToSeverity: + def test_critical_threshold(self) -> None: + assert score_to_severity(0.85) == "critical" + assert score_to_severity(0.99) == "critical" + assert score_to_severity(1.0) == "critical" + + def test_high_threshold(self) -> None: + assert score_to_severity(0.65) == "high" + assert score_to_severity(0.84) == "high" + + def test_medium_threshold(self) -> None: + assert score_to_severity(0.40) == "medium" + assert score_to_severity(0.64) == "medium" + + def test_low_threshold(self) -> None: + assert score_to_severity(0.20) == "low" + assert score_to_severity(0.39) == "low" + + def test_below_minimum(self) -> None: + assert score_to_severity(0.19) == "low" + assert score_to_severity(0.0) == "low" + + +class TestFrameworkMapping: + def test_ssn_maps_to_hipaa_and_ccpa(self) -> None: + frameworks = get_frameworks_for_rule("PII_SSN") + assert "HIPAA" in frameworks + assert "CCPA" in frameworks + assert "GLBA" in frameworks + + def test_credit_card_maps_to_pci(self) -> None: + for rule_id in ( + "FIN_CREDIT_CARD_VISA", + "FIN_CREDIT_CARD_MC", + "FIN_CREDIT_CARD_AMEX", + "FIN_CREDIT_CARD_DISC", + ): + frameworks = get_frameworks_for_rule(rule_id) + assert "PCI_DSS" in frameworks + + def test_unknown_rule_returns_empty(self) -> None: + assert get_frameworks_for_rule("UNKNOWN") == [] + + def test_credential_rules_have_no_frameworks( + self, + ) -> None: + frameworks = get_frameworks_for_rule("CRED_AWS_ACCESS_KEY") + assert frameworks == [] + + +class TestRemediation: + def test_known_rule_has_remediation(self) -> None: + text = get_remediation_for_rule("PII_SSN") + assert "encrypt" in text.lower() or "tokeniz" in text.lower() + + def test_unknown_rule_returns_default(self) -> None: + assert get_remediation_for_rule("UNKNOWN") == DEFAULT_REMEDIATION diff --git a/PROJECTS/intermediate/dlp-scanner/tests/test_config.py b/PROJECTS/intermediate/dlp-scanner/tests/test_config.py new file mode 100644 index 0000000..7b2110b --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/test_config.py @@ -0,0 +1,81 @@ +""" +©AngelaMos | 2026 +test_config.py +""" + + +from pathlib import Path + +from dlp_scanner.config import ScanConfig, load_config +from dlp_scanner.constants import ( + DEFAULT_DB_SAMPLE_PERCENTAGE, + DEFAULT_ENTROPY_THRESHOLD, + DEFAULT_MAX_FILE_SIZE_MB, + DEFAULT_MIN_CONFIDENCE, +) + + +class TestScanConfig: + def test_defaults(self) -> None: + config = ScanConfig() + assert config.file.max_file_size_mb == DEFAULT_MAX_FILE_SIZE_MB + assert config.file.recursive is True + assert config.database.sample_percentage == DEFAULT_DB_SAMPLE_PERCENTAGE + assert config.network.entropy_threshold == DEFAULT_ENTROPY_THRESHOLD + assert config.detection.min_confidence == DEFAULT_MIN_CONFIDENCE + assert config.output.format == "console" + assert config.output.redaction_style == "partial" + + def test_exclude_patterns_populated(self) -> None: + config = ScanConfig() + assert "*.pyc" in config.file.exclude_patterns + assert ".git" in config.file.exclude_patterns + + def test_include_extensions_populated(self) -> None: + config = ScanConfig() + assert ".pdf" in config.file.include_extensions + assert ".csv" in config.file.include_extensions + + def test_default_frameworks(self) -> None: + config = ScanConfig() + assert "HIPAA" in config.compliance.frameworks + assert "PCI_DSS" in config.compliance.frameworks + + +class TestLoadConfig: + def test_load_missing_file_returns_defaults(self) -> None: + config = load_config(Path("/nonexistent/config.yml")) + assert config == ScanConfig() + + def test_load_none_returns_defaults(self) -> None: + config = load_config(None) + assert isinstance(config, ScanConfig) + + def test_load_yaml_config(self, tmp_path: Path) -> None: + config_path = tmp_path / ".dlp-scanner.yml" + config_path.write_text( + "scan:\n" + " file:\n" + " max_file_size_mb: 50\n" + " recursive: false\n" + "detection:\n" + " min_confidence: 0.5\n" + "output:\n" + " format: json\n" + ) + config = load_config(config_path) + assert config.file.max_file_size_mb == 50 + assert config.file.recursive is False + assert config.detection.min_confidence == 0.5 + assert config.output.format == "json" + + def test_load_partial_config_fills_defaults( + self, + tmp_path: Path + ) -> None: + config_path = tmp_path / ".dlp-scanner.yml" + config_path.write_text("output:\n format: sarif\n") + config = load_config(config_path) + assert config.output.format == "sarif" + assert config.file.max_file_size_mb == DEFAULT_MAX_FILE_SIZE_MB + assert config.detection.min_confidence == DEFAULT_MIN_CONFIDENCE diff --git a/PROJECTS/intermediate/dlp-scanner/tests/test_detectors/__init__.py b/PROJECTS/intermediate/dlp-scanner/tests/test_detectors/__init__.py new file mode 100644 index 0000000..e1add2a --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/test_detectors/__init__.py @@ -0,0 +1,4 @@ +""" +©AngelaMos | 2026 +__init__.py +""" diff --git a/PROJECTS/intermediate/dlp-scanner/tests/test_detectors/test_context.py b/PROJECTS/intermediate/dlp-scanner/tests/test_detectors/test_context.py new file mode 100644 index 0000000..2c7361b --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/test_detectors/test_context.py @@ -0,0 +1,120 @@ +""" +©AngelaMos | 2026 +test_context.py +""" + + +from dlp_scanner.detectors.base import DetectorMatch +from dlp_scanner.detectors.context import ( + apply_context_boost, + _apply_cooccurrence_boost, +) + + +def _make_match( + rule_id: str = "PII_SSN", + start: int = 20, + end: int = 31, + score: float = 0.45, + keywords: list[str] | None = None, +) -> DetectorMatch: + resolved_keywords = ( + keywords if keywords is not None else ["ssn", + "social security"] + ) + return DetectorMatch( + rule_id = rule_id, + rule_name = "Test Rule", + start = start, + end = end, + matched_text = "234-56-7890", + score = score, + context_keywords = resolved_keywords, + compliance_frameworks = [], + ) + + +class TestContextBoost: + def test_boost_with_keyword_present(self) -> None: + text = "Employee SSN: 234-56-7890 on file" + match = _make_match(start = 14, end = 25) + boosted = apply_context_boost(text, [match]) + assert boosted[0].score > match.score + + def test_no_boost_without_keyword(self) -> None: + text = "Some random number 234-56-7890 here" + match = _make_match( + start = 19, + end = 30, + keywords = ["nonexistent_keyword"], + ) + boosted = apply_context_boost(text, [match]) + assert boosted[0].score == match.score + + def test_no_boost_with_empty_keywords(self) -> None: + text = "SSN: 234-56-7890" + match = _make_match(start = 5, end = 16, keywords = []) + boosted = apply_context_boost(text, [match]) + assert boosted[0].score == match.score + + def test_empty_matches_returns_empty(self) -> None: + result = apply_context_boost("any text", []) + assert result == [] + + +class TestCooccurrenceBoost: + def test_nearby_different_rules_boosted(self) -> None: + matches = [ + _make_match(rule_id = "PII_SSN", + start = 10, + end = 21), + _make_match( + rule_id = "PII_EMAIL", + start = 30, + end = 50, + keywords = ["email"], + ), + ] + boosted = _apply_cooccurrence_boost(matches) + assert all( + b.score > m.score + for b, m in zip(boosted, matches, strict = False) + ) + + def test_same_rule_not_boosted(self) -> None: + matches = [ + _make_match(rule_id = "PII_SSN", + start = 10, + end = 21), + _make_match(rule_id = "PII_SSN", + start = 50, + end = 61), + ] + boosted = _apply_cooccurrence_boost(matches) + assert all( + b.score == m.score + for b, m in zip(boosted, matches, strict = False) + ) + + def test_distant_matches_not_boosted(self) -> None: + matches = [ + _make_match(rule_id = "PII_SSN", + start = 10, + end = 21), + _make_match( + rule_id = "PII_EMAIL", + start = 1000, + end = 1020, + keywords = ["email"], + ), + ] + boosted = _apply_cooccurrence_boost(matches) + assert all( + b.score == m.score + for b, m in zip(boosted, matches, strict = False) + ) + + def test_single_match_not_boosted(self) -> None: + matches = [_make_match()] + boosted = _apply_cooccurrence_boost(matches) + assert boosted[0].score == matches[0].score diff --git a/PROJECTS/intermediate/dlp-scanner/tests/test_detectors/test_entropy.py b/PROJECTS/intermediate/dlp-scanner/tests/test_detectors/test_entropy.py new file mode 100644 index 0000000..095a39f --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/test_detectors/test_entropy.py @@ -0,0 +1,83 @@ +""" +©AngelaMos | 2026 +test_entropy.py +""" + + +import os + +from dlp_scanner.detectors.entropy import ( + shannon_entropy, + shannon_entropy_str, + detect_high_entropy_regions, + EntropyDetector, +) + + +class TestShannonEntropy: + def test_all_same_bytes_is_zero(self) -> None: + data = b"\x00" * 100 + assert shannon_entropy(data) == 0.0 + + def test_empty_data_is_zero(self) -> None: + assert shannon_entropy(b"") == 0.0 + + def test_english_text_in_expected_range(self) -> None: + text = b"the quick brown fox jumps over the lazy dog" + h = shannon_entropy(text) + assert 3.5 <= h <= 5.0 + + def test_random_bytes_near_maximum(self) -> None: + data = os.urandom(10000) + h = shannon_entropy(data) + assert h > 7.5 + + def test_two_byte_values_is_one_bit(self) -> None: + data = b"\x00\x01" * 50 + h = shannon_entropy(data) + assert abs(h - 1.0) < 0.01 + + def test_string_entropy_matches_bytes(self) -> None: + text = "hello world" + h_str = shannon_entropy_str(text) + h_bytes = shannon_entropy(text.encode("utf-8")) + assert abs(h_str - h_bytes) < 0.001 + + +class TestHighEntropyRegions: + def test_random_data_detected(self) -> None: + data = os.urandom(1024) + regions = detect_high_entropy_regions(data, threshold = 7.0) + assert len(regions) > 0 + + def test_plaintext_not_detected(self) -> None: + data = b"the quick brown fox " * 100 + regions = detect_high_entropy_regions(data, threshold = 7.0) + assert len(regions) == 0 + + def test_short_data_below_window(self) -> None: + data = os.urandom(100) + regions = detect_high_entropy_regions( + data, + threshold = 7.0, + window_size = 256 + ) + assert len(regions) <= 1 + + +class TestEntropyDetector: + def test_detect_high_entropy_text(self) -> None: + import base64 + + detector = EntropyDetector(threshold = 5.5) + raw = os.urandom(2048) + high_entropy_text = base64.b85encode(raw).decode("ascii") + matches = detector.detect(high_entropy_text) + assert len(matches) > 0 + assert all(m.rule_id == "NET_HIGH_ENTROPY" for m in matches) + + def test_no_detection_in_normal_text(self) -> None: + detector = EntropyDetector(threshold = 7.0) + text = "This is a normal text document with nothing suspicious." + matches = detector.detect(text) + assert len(matches) == 0 diff --git a/PROJECTS/intermediate/dlp-scanner/tests/test_detectors/test_pattern.py b/PROJECTS/intermediate/dlp-scanner/tests/test_detectors/test_pattern.py new file mode 100644 index 0000000..f1e4afa --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/test_detectors/test_pattern.py @@ -0,0 +1,58 @@ +""" +©AngelaMos | 2026 +test_pattern.py +""" + + +from dlp_scanner.detectors.pattern import PatternDetector +from dlp_scanner.detectors.rules.pii import PII_RULES + + +class TestPatternDetector: + def test_detects_ssn_in_text(self) -> None: + detector = PatternDetector( + rules = PII_RULES, + allowlist_values = frozenset() + ) + text = "Employee SSN is 234-56-7890 on file." + matches = detector.detect(text) + ssn_matches = [m for m in matches if m.rule_id == "PII_SSN"] + assert len(ssn_matches) == 1 + assert ssn_matches[0].matched_text == "234-56-7890" + + def test_skips_allowlisted_values(self) -> None: + detector = PatternDetector(rules = PII_RULES) + text = "Test SSN: 123-45-6789" + matches = detector.detect(text) + ssn_matches = [m for m in matches if m.rule_id == "PII_SSN"] + assert len(ssn_matches) == 0 + + def test_detects_email(self) -> None: + detector = PatternDetector( + rules = PII_RULES, + allowlist_values = frozenset() + ) + text = "Contact: alice@company.com for details." + matches = detector.detect(text) + email_matches = [m for m in matches if m.rule_id == "PII_EMAIL"] + assert len(email_matches) == 1 + + def test_no_matches_in_clean_text(self) -> None: + detector = PatternDetector(rules = PII_RULES) + text = "This is a perfectly clean document." + matches = detector.detect(text) + assert len(matches) == 0 + + def test_multiple_matches_in_one_text(self) -> None: + detector = PatternDetector( + rules = PII_RULES, + allowlist_values = frozenset() + ) + text = ( + "Name: John, SSN: 234-56-7890, " + "Email: john@test.org, Phone: (555) 234-5678" + ) + matches = detector.detect(text) + rule_ids = {m.rule_id for m in matches} + assert "PII_SSN" in rule_ids + assert "PII_EMAIL" in rule_ids diff --git a/PROJECTS/intermediate/dlp-scanner/tests/test_detectors/test_rules/__init__.py b/PROJECTS/intermediate/dlp-scanner/tests/test_detectors/test_rules/__init__.py new file mode 100644 index 0000000..e1add2a --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/test_detectors/test_rules/__init__.py @@ -0,0 +1,4 @@ +""" +©AngelaMos | 2026 +__init__.py +""" diff --git a/PROJECTS/intermediate/dlp-scanner/tests/test_detectors/test_rules/test_credentials.py b/PROJECTS/intermediate/dlp-scanner/tests/test_detectors/test_rules/test_credentials.py new file mode 100644 index 0000000..e0289f4 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/test_detectors/test_rules/test_credentials.py @@ -0,0 +1,123 @@ +""" +©AngelaMos | 2026 +test_credentials.py +""" + + +import pytest + +from dlp_scanner.detectors.rules.credentials import ( + AWS_ACCESS_KEY_PATTERN, + GITHUB_CLASSIC_PAT_PATTERN, + GITHUB_FINE_GRAINED_PATTERN, + JWT_PATTERN, + STRIPE_KEY_PATTERN, + SLACK_TOKEN_PATTERN, + PRIVATE_KEY_PATTERN, + GENERIC_API_KEY_PATTERN, +) + + +class TestAWSAccessKey: + def test_long_term_key_matches(self) -> None: + assert ( + AWS_ACCESS_KEY_PATTERN.search("AKIAIOSFODNN7EXAMPLE") + is not None + ) + + def test_session_key_matches(self) -> None: + assert ( + AWS_ACCESS_KEY_PATTERN.search("ASIAQWERTYUIOP123456") + is not None + ) + + def test_invalid_prefix_rejected(self) -> None: + assert ( + AWS_ACCESS_KEY_PATTERN.search("ABCDIOSFODNN7EXAMPLE") is None + ) + + +class TestGitHubTokens: + def test_classic_pat_matches(self) -> None: + token = "ghp_" + "a" * 36 + assert (GITHUB_CLASSIC_PAT_PATTERN.search(token) is not None) + + def test_fine_grained_pat_matches(self) -> None: + token = "github_pat_" + "a" * 22 + "_" + "b" * 59 + assert (GITHUB_FINE_GRAINED_PATTERN.search(token) is not None) + + def test_invalid_prefix_rejected(self) -> None: + assert ( + GITHUB_CLASSIC_PAT_PATTERN.search("xyz_" + "a" * 36) is None + ) + + +class TestJWT: + def test_jwt_matches(self) -> None: + token = ( + "eyJhbGciOiJIUzI1NiJ9" + ".eyJzdWIiOiIxMjM0NTY3ODkwIn0" + ".abc123def456" + ) + assert JWT_PATTERN.search(token) is not None + + def test_non_jwt_rejected(self) -> None: + assert (JWT_PATTERN.search("not.a.jwt.token") is None) + + +class TestStripeKey: + @pytest.mark.parametrize( + "key", + [ + "sk_test_" + "a" * 24, + "sk_live_" + "b" * 24, + "pk_test_" + "c" * 24, + "pk_live_" + "d" * 30, + ], + ) + def test_stripe_keys_match(self, key: str) -> None: + assert STRIPE_KEY_PATTERN.search(key) is not None + + def test_invalid_stripe_key(self) -> None: + assert (STRIPE_KEY_PATTERN.search("sk_invalid_abc") is None) + + +class TestSlackToken: + @pytest.mark.parametrize( + "token", + [ + "xoxb-" + "a" * 20, + "xoxp-" + "b" * 30, + "xoxa-" + "c" * 15, + ], + ) + def test_slack_tokens_match(self, token: str) -> None: + assert (SLACK_TOKEN_PATTERN.search(token) is not None) + + +class TestPrivateKey: + @pytest.mark.parametrize( + "header", + [ + "-----BEGIN RSA PRIVATE KEY-----", + "-----BEGIN EC PRIVATE KEY-----", + "-----BEGIN PRIVATE KEY-----", + "-----BEGIN OPENSSH PRIVATE KEY-----", + ], + ) + def test_private_key_headers_match(self, header: str) -> None: + assert (PRIVATE_KEY_PATTERN.search(header) is not None) + + +class TestGenericAPIKey: + @pytest.mark.parametrize( + "text", + [ + 'api_key = "abcdef1234567890abcdef"', + "API_KEY: abcdef1234567890abcdef", + "secret_key='very_secret_key_value_12345'", + 'access_key = "abc123def456ghi789jkl012"', + ], + ) + def test_generic_api_keys_match(self, text: str) -> None: + assert (GENERIC_API_KEY_PATTERN.search(text) is not None) diff --git a/PROJECTS/intermediate/dlp-scanner/tests/test_detectors/test_rules/test_financial.py b/PROJECTS/intermediate/dlp-scanner/tests/test_detectors/test_rules/test_financial.py new file mode 100644 index 0000000..76ee6a9 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/test_detectors/test_rules/test_financial.py @@ -0,0 +1,125 @@ +""" +©AngelaMos | 2026 +test_financial.py +""" + + +import pytest + +from dlp_scanner.detectors.rules.financial import ( + VISA_PATTERN, + MASTERCARD_PATTERN, + AMEX_PATTERN, + IBAN_PATTERN, + luhn_check, + iban_check, + nhs_check, +) + + +class TestLuhnAlgorithm: + @pytest.mark.parametrize( + "number", + [ + "4532015112830366", + "4916338506082832", + "5425233430109903", + "2223000048410010", + "374245455400126", + "6011000990139424", + ], + ) + def test_valid_cards_pass_luhn(self, number: str) -> None: + assert luhn_check(number) is True + + @pytest.mark.parametrize( + "number", + [ + "4532015112830367", + "1234567890123456", + "1111111111111112", + "9999999999999991", + ], + ) + def test_invalid_cards_fail_luhn(self, number: str) -> None: + assert luhn_check(number) is False + + def test_too_short_fails(self) -> None: + assert luhn_check("123456") is False + + def test_with_spaces(self) -> None: + assert luhn_check("4532 0151 1283 0366") is True + + def test_with_dashes(self) -> None: + assert luhn_check("4532-0151-1283-0366") is True + + +class TestIBANCheck: + @pytest.mark.parametrize( + "iban", + [ + "GB29NWBK60161331926819", + "DE89370400440532013000", + "FR7630006000011234567890189", + "NL91ABNA0417164300", + ], + ) + def test_valid_ibans(self, iban: str) -> None: + assert iban_check(iban) is True + + @pytest.mark.parametrize( + "iban", + [ + "GB29NWBK60161331926818", + "XX00INVALID", + "DE00000000000000000000", + "SHORT", + ], + ) + def test_invalid_ibans(self, iban: str) -> None: + assert iban_check(iban) is False + + def test_iban_with_spaces(self) -> None: + assert iban_check("GB29 NWBK 6016 1331 9268 19") is True + + +class TestNHSCheck: + def test_valid_nhs_number(self) -> None: + assert nhs_check("9434765919") is True + + def test_invalid_nhs_number(self) -> None: + assert nhs_check("1234567890") is False + + def test_nhs_too_short(self) -> None: + assert nhs_check("12345") is False + + def test_nhs_non_numeric(self) -> None: + assert nhs_check("abcdefghij") is False + + +class TestCreditCardPatterns: + def test_visa_pattern_matches(self) -> None: + assert VISA_PATTERN.search("4532015112830366") is not None + + def test_mastercard_classic_matches(self) -> None: + assert MASTERCARD_PATTERN.search("5425233430109903") is not None + + def test_mastercard_2series_matches(self) -> None: + assert MASTERCARD_PATTERN.search("2223000048410010") is not None + + def test_amex_matches(self) -> None: + assert AMEX_PATTERN.search("374245455400126") is not None + + def test_visa_with_spaces(self) -> None: + assert VISA_PATTERN.search("4532 0151 1283 0366") is not None + + def test_visa_with_dashes(self) -> None: + assert VISA_PATTERN.search("4532-0151-1283-0366") is not None + + +class TestIBANPattern: + def test_iban_pattern_matches_gb(self) -> None: + assert IBAN_PATTERN.search("GB29NWBK60161331926819") is not None + + def test_iban_pattern_matches_de(self) -> None: + assert IBAN_PATTERN.search("DE89370400440532013000") is not None diff --git a/PROJECTS/intermediate/dlp-scanner/tests/test_detectors/test_rules/test_health.py b/PROJECTS/intermediate/dlp-scanner/tests/test_detectors/test_rules/test_health.py new file mode 100644 index 0000000..0f0b7d2 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/test_detectors/test_rules/test_health.py @@ -0,0 +1,112 @@ +""" +©AngelaMos | 2026 +test_health.py +""" + + +import pytest + +from dlp_scanner.detectors.rules.health import ( + MEDICAL_RECORD_PATTERN, + DEA_NUMBER_PATTERN, + NPI_PATTERN, + _validate_dea_number, + _validate_npi, +) + + +class TestMedicalRecordPattern: + @pytest.mark.parametrize( + "text", + [ + "MRN: 123456", + "MRN:12345678", + "MR# 9876543210", + "MED-1234567890", + "mrn 00123456", + ], + ) + def test_valid_mrns_match(self, text: str) -> None: + assert ( + MEDICAL_RECORD_PATTERN.search(text) is not None + ) + + @pytest.mark.parametrize( + "text", + [ + "MRN: 12345", + "MORNING coffee", + "random text without MRN", + ], + ) + def test_invalid_mrns_rejected(self, text: str) -> None: + assert ( + MEDICAL_RECORD_PATTERN.search(text) is None + ) + + +class TestDEANumberPattern: + def test_valid_dea_format_matches(self) -> None: + assert ( + DEA_NUMBER_PATTERN.search("AB1234563") is not None + ) + + def test_lowercase_rejected(self) -> None: + assert ( + DEA_NUMBER_PATTERN.search("ab1234563") is None + ) + + def test_too_short_rejected(self) -> None: + assert ( + DEA_NUMBER_PATTERN.search("AB12345") is None + ) + + +class TestDEAValidation: + def test_valid_dea_number(self) -> None: + assert _validate_dea_number("AB1234563") is True + + def test_invalid_check_digit(self) -> None: + assert _validate_dea_number("AB1234560") is False + + def test_too_short(self) -> None: + assert _validate_dea_number("AB12345") is False + + def test_non_numeric_digits(self) -> None: + assert _validate_dea_number("ABabcdefg") is False + + def test_valid_with_9_prefix(self) -> None: + assert _validate_dea_number("A91234563") is True + + +class TestNPIPattern: + def test_ten_digit_matches(self) -> None: + assert NPI_PATTERN.search("1234567890") is not None + + def test_nine_digit_rejected(self) -> None: + assert NPI_PATTERN.search("123456789") is None + + def test_eleven_digit_no_exact_match(self) -> None: + match = NPI_PATTERN.search("12345678901") + if match is not None: + assert len(match.group()) == 10 + + +class TestNPIValidation: + def test_valid_npi(self) -> None: + assert _validate_npi("1234567893") is True + + def test_invalid_check_digit(self) -> None: + assert _validate_npi("1234567890") is False + + def test_non_numeric(self) -> None: + assert _validate_npi("abcdefghij") is False + + def test_too_short(self) -> None: + assert _validate_npi("12345") is False + + def test_valid_npi_second(self) -> None: + assert _validate_npi("1679576722") is True + + def test_all_zeros_invalid(self) -> None: + assert _validate_npi("0000000000") is False diff --git a/PROJECTS/intermediate/dlp-scanner/tests/test_detectors/test_rules/test_pii.py b/PROJECTS/intermediate/dlp-scanner/tests/test_detectors/test_rules/test_pii.py new file mode 100644 index 0000000..61ced3b --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/test_detectors/test_rules/test_pii.py @@ -0,0 +1,138 @@ +""" +©AngelaMos | 2026 +test_pii.py +""" + + +import pytest + +from dlp_scanner.detectors.rules.pii import ( + SSN_PATTERN, + EMAIL_PATTERN, + PHONE_US_PATTERN, + IPV4_PATTERN, + _validate_ssn, +) + + +class TestSSNPattern: + @pytest.mark.parametrize( + "text", + [ + "234-56-7890", + "567-89-0123", + "001-01-0001", + "899-99-9999", + ], + ) + def test_valid_ssns_match(self, text: str) -> None: + assert SSN_PATTERN.search(text) is not None + + @pytest.mark.parametrize( + "text", + [ + "000-45-6789", + "666-45-6789", + "900-45-6789", + "999-45-6789", + "123-00-6789", + "123-45-0000", + ], + ) + def test_invalid_ssns_rejected(self, text: str) -> None: + match = SSN_PATTERN.search(text) + if match is not None: + assert not _validate_ssn(match.group()) + + +class TestSSNValidation: + def test_valid_ssn(self) -> None: + assert _validate_ssn("234-56-7890") is True + + def test_invalid_area_000(self) -> None: + assert _validate_ssn("000-45-6789") is False + + def test_invalid_area_666(self) -> None: + assert _validate_ssn("666-45-6789") is False + + def test_invalid_area_900_plus(self) -> None: + assert _validate_ssn("950-45-6789") is False + + def test_invalid_group_00(self) -> None: + assert _validate_ssn("123-00-6789") is False + + def test_invalid_serial_0000(self) -> None: + assert _validate_ssn("123-45-0000") is False + + def test_bare_format(self) -> None: + assert _validate_ssn("234567890") is True + + def test_non_numeric(self) -> None: + assert _validate_ssn("abc-de-fghi") is False + + +class TestEmailPattern: + @pytest.mark.parametrize( + "text", + [ + "user@example.com", + "first.last@company.org", + "user+tag@domain.co.uk", + "test_email@test.museum", + ], + ) + def test_valid_emails_match(self, text: str) -> None: + assert EMAIL_PATTERN.search(text) is not None + + @pytest.mark.parametrize( + "text", + [ + "not-an-email", + "@nodomain", + "user@", + "user@.com", + ], + ) + def test_invalid_emails_rejected(self, text: str) -> None: + assert EMAIL_PATTERN.search(text) is None + + +class TestPhoneUSPattern: + @pytest.mark.parametrize( + "text", + [ + "(555) 234-5678", + "555-234-5678", + "555.234.5678", + "+1 555-234-5678", + "1-555-234-5678", + ], + ) + def test_valid_phones_match(self, text: str) -> None: + assert PHONE_US_PATTERN.search(text) is not None + + +class TestIPv4Pattern: + @pytest.mark.parametrize( + "text", + [ + "192.168.1.1", + "10.0.0.1", + "255.255.255.255", + "0.0.0.0", + "172.16.0.1", + ], + ) + def test_valid_ips_match(self, text: str) -> None: + assert IPV4_PATTERN.search(text) is not None + + @pytest.mark.parametrize( + "text", + [ + "256.1.1.1", + "1.1.1.256", + "999.999.999.999", + ], + ) + def test_invalid_ips_rejected(self, text: str) -> None: + assert IPV4_PATTERN.search(text) is None diff --git a/PROJECTS/intermediate/dlp-scanner/tests/test_engine.py b/PROJECTS/intermediate/dlp-scanner/tests/test_engine.py new file mode 100644 index 0000000..ecb853d --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/test_engine.py @@ -0,0 +1,220 @@ +""" +©AngelaMos | 2026 +test_engine.py +""" + + +import json +import tempfile +from pathlib import Path +from collections.abc import Generator + +import pytest + +from dlp_scanner.config import ScanConfig +from dlp_scanner.engine import ScanEngine +from dlp_scanner.models import ( + Finding, + Location, + ScanResult, +) + + +@pytest.fixture +def engine() -> ScanEngine: + """ + Provide a ScanEngine with default config + """ + return ScanEngine(ScanConfig()) + + +@pytest.fixture +def pii_dir() -> Generator[Path, None, None]: + """ + Provide a temp directory with valid detectable SSNs + """ + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + csv_path = root / "employees.csv" + csv_path.write_text( + "name,ssn\n" + "Alice,456-78-9012\n" + "Bob,234-56-7890\n" + ) + yield root + + +@pytest.fixture +def clean_dir() -> Generator[Path, None, None]: + """ + Provide a temp directory with no sensitive data + """ + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + txt_path = root / "readme.txt" + txt_path.write_text("This file contains no sensitive data.") + yield root + + +@pytest.fixture +def result_with_findings() -> ScanResult: + """ + Provide a ScanResult with test findings + """ + result = ScanResult(targets_scanned = 1) + result.findings = [ + Finding( + rule_id = "PII_SSN", + rule_name = "US Social Security Number", + severity = "critical", + confidence = 0.95, + location = Location( + source_type = "file", + uri = "data.csv", + line = 5, + ), + redacted_snippet = "***-**-6789", + compliance_frameworks = ["HIPAA", + "CCPA"], + remediation = "Encrypt data", + ), + ] + return result + + +class TestScanEngine: + def test_scan_files_finds_pii( + self, + engine: ScanEngine, + pii_dir: Path + ) -> None: + result = engine.scan_files(str(pii_dir)) + assert len(result.findings) > 0 + + def test_scan_files_clean_dir( + self, + engine: ScanEngine, + clean_dir: Path + ) -> None: + result = engine.scan_files(str(clean_dir)) + assert len(result.findings) == 0 + + def test_scan_files_nonexistent(self, engine: ScanEngine) -> None: + result = engine.scan_files("/no/such/path") + assert len(result.errors) > 0 + + def test_scan_files_sets_completed_at( + self, + engine: ScanEngine, + pii_dir: Path + ) -> None: + result = engine.scan_files(str(pii_dir)) + assert result.scan_completed_at is not None + + def test_scan_database_sqlite(self, engine: ScanEngine) -> None: + with tempfile.NamedTemporaryFile(suffix = ".db", + delete = False) as f: + db_path = f.name + + import sqlite3 + + conn = sqlite3.connect(db_path) + conn.execute("CREATE TABLE users " + "(name TEXT, ssn TEXT)") + conn.execute( + "INSERT INTO users VALUES " + "('Alice', '456-78-9012')" + ) + conn.commit() + conn.close() + + uri = f"sqlite:///{db_path}" + result = engine.scan_database(uri) + assert len(result.findings) > 0 + + Path(db_path).unlink(missing_ok = True) + + def test_generate_report_json( + self, + engine: ScanEngine, + result_with_findings: ScanResult, + ) -> None: + output = engine.generate_report(result_with_findings, "json") + data = json.loads(output) + assert "findings" in data + assert len(data["findings"]) == 1 + + def test_generate_report_sarif( + self, + engine: ScanEngine, + result_with_findings: ScanResult, + ) -> None: + output = engine.generate_report(result_with_findings, "sarif") + data = json.loads(output) + assert data["version"] == "2.1.0" + + def test_generate_report_csv( + self, + engine: ScanEngine, + result_with_findings: ScanResult, + ) -> None: + output = engine.generate_report(result_with_findings, "csv") + lines = output.strip().split("\n") + assert len(lines) == 2 + + def test_generate_report_console( + self, + engine: ScanEngine, + result_with_findings: ScanResult, + ) -> None: + output = engine.generate_report(result_with_findings, "console") + assert "PII_SSN" in output or "Social" in output + + def test_generate_report_uses_config_default( + self, + result_with_findings: ScanResult, + ) -> None: + config = ScanConfig() + config.output.format = "json" + engine = ScanEngine(config) + output = engine.generate_report(result_with_findings) + data = json.loads(output) + assert "findings" in data + + def test_display_console( + self, + engine: ScanEngine, + result_with_findings: ScanResult, + ) -> None: + engine.display_console(result_with_findings) + + def test_write_report( + self, + engine: ScanEngine, + result_with_findings: ScanResult, + ) -> None: + with tempfile.NamedTemporaryFile( + suffix = ".json", + delete = False, + mode = "w", + ) as f: + output_path = f.name + + engine.write_report( + result_with_findings, + output_path, + "json", + ) + content = Path(output_path).read_text() + data = json.loads(content) + assert len(data["findings"]) == 1 + + Path(output_path).unlink(missing_ok = True) + + +class TestReporterMap: + def test_all_formats_have_reporters(self) -> None: + from dlp_scanner.engine import REPORTER_MAP + + expected = {"console", "json", "sarif", "csv"} + assert set(REPORTER_MAP.keys()) == expected diff --git a/PROJECTS/intermediate/dlp-scanner/tests/test_extractors/__init__.py b/PROJECTS/intermediate/dlp-scanner/tests/test_extractors/__init__.py new file mode 100644 index 0000000..e1add2a --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/test_extractors/__init__.py @@ -0,0 +1,4 @@ +""" +©AngelaMos | 2026 +__init__.py +""" diff --git a/PROJECTS/intermediate/dlp-scanner/tests/test_network/__init__.py b/PROJECTS/intermediate/dlp-scanner/tests/test_network/__init__.py new file mode 100644 index 0000000..e1add2a --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/test_network/__init__.py @@ -0,0 +1,4 @@ +""" +©AngelaMos | 2026 +__init__.py +""" diff --git a/PROJECTS/intermediate/dlp-scanner/tests/test_network/test_exfiltration.py b/PROJECTS/intermediate/dlp-scanner/tests/test_network/test_exfiltration.py new file mode 100644 index 0000000..e956c21 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/test_network/test_exfiltration.py @@ -0,0 +1,160 @@ +""" +©AngelaMos | 2026 +test_exfiltration.py +""" + + +from dlp_scanner.network.exfiltration import ( + DnsExfilDetector, + _extract_base_domain, + detect_base64_payload, +) +from dlp_scanner.network.protocols import DnsQuery + + +class TestExtractBaseDomain: + def test_simple_domain(self) -> None: + assert (_extract_base_domain("www.example.com") == "example.com") + + def test_deep_subdomain(self) -> None: + result = _extract_base_domain("a.b.c.example.com") + assert result == "example.com" + + def test_trailing_dot(self) -> None: + result = _extract_base_domain("www.example.com.") + assert result == "example.com" + + def test_single_label(self) -> None: + assert _extract_base_domain("localhost") == ("localhost") + + def test_two_labels(self) -> None: + assert (_extract_base_domain("example.com") == "example.com") + + +class TestDnsExfilDetector: + def test_normal_query_no_indicator(self) -> None: + detector = DnsExfilDetector() + query = DnsQuery( + name = "www.google.com", + query_type = "A", + query_class = "1", + ) + result = detector.analyze_query(query, "10.0.0.1", "8.8.8.8") + assert result is None + + def test_long_label_detected(self) -> None: + detector = DnsExfilDetector() + long_label = "a" * 55 + query = DnsQuery( + name = f"{long_label}.evil.com", + query_type = "A", + query_class = "1", + ) + result = detector.analyze_query(query, "10.0.0.1", "1.2.3.4") + assert result is not None + assert (result.indicator_type == "dns_long_label") + + def test_high_entropy_subdomain(self) -> None: + detector = DnsExfilDetector(entropy_threshold = 3.5) + encoded = "aGVsbG8gd29ybGQgdGhpcw" + query = DnsQuery( + name = f"{encoded}.evil.com", + query_type = "A", + query_class = "1", + ) + result = detector.analyze_query(query, "10.0.0.1", "1.2.3.4") + assert result is not None + assert (result.indicator_type == "dns_high_entropy") + + def test_long_qname_detected(self) -> None: + detector = DnsExfilDetector() + parts = ["abc"] * 40 + name = ".".join(parts) + ".evil.com" + query = DnsQuery( + name = name, + query_type = "A", + query_class = "1", + ) + result = detector.analyze_query(query, "10.0.0.1", "1.2.3.4") + assert result is not None + + def test_txt_volume_detection(self) -> None: + detector = DnsExfilDetector() + for _ in range(10): + detector.analyze_query( + DnsQuery( + name = "data.evil.com", + query_type = "TXT", + query_class = "1", + ), + "10.0.0.1", + "1.2.3.4", + ) + + indicators = detector.check_txt_volume() + assert len(indicators) > 0 + assert (indicators[0].indicator_type == "dns_txt_volume") + + def test_get_indicators_accumulates( + self, + ) -> None: + detector = DnsExfilDetector() + long_label = "x" * 55 + query = DnsQuery( + name = f"{long_label}.evil.com", + query_type = "A", + query_class = "1", + ) + detector.analyze_query(query, "10.0.0.1", "1.2.3.4") + detector.analyze_query(query, "10.0.0.1", "1.2.3.4") + + indicators = detector.get_indicators() + assert len(indicators) == 2 + + def test_short_subdomain_no_entropy_check( + self, + ) -> None: + detector = DnsExfilDetector(entropy_threshold = 3.0) + query = DnsQuery( + name = "example.com", + query_type = "A", + query_class = "1", + ) + result = detector.analyze_query(query, "10.0.0.1", "8.8.8.8") + assert result is None + + +class TestDetectBase64Payload: + def test_base64_detected(self) -> None: + payload = (b"data=" + b"A" * 50 + b"== end") + indicators = detect_base64_payload(payload) + assert len(indicators) > 0 + assert (indicators[0].indicator_type == "base64_payload") + + def test_hex_detected(self) -> None: + payload = b"0x" + b"aabbccdd" * 10 + indicators = detect_base64_payload(payload) + assert len(indicators) > 0 + types = {i.indicator_type for i in indicators} + assert "hex_payload" in types + + def test_normal_text_no_detection(self) -> None: + payload = b"Hello, this is normal text." + indicators = detect_base64_payload(payload) + assert len(indicators) == 0 + + def test_short_base64_not_detected(self) -> None: + payload = b"dGVzdA==" + indicators = detect_base64_payload(payload) + assert len(indicators) == 0 + + def test_source_ip_preserved(self) -> None: + payload = b"A" * 50 + indicators = detect_base64_payload( + payload, + src_ip = "10.0.0.1", + dst_ip = "1.2.3.4", + ) + assert len(indicators) > 0 + assert indicators[0].source_ip == "10.0.0.1" + assert indicators[0].dest_ip == "1.2.3.4" diff --git a/PROJECTS/intermediate/dlp-scanner/tests/test_network/test_flow_tracker.py b/PROJECTS/intermediate/dlp-scanner/tests/test_network/test_flow_tracker.py new file mode 100644 index 0000000..642e56b --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/test_network/test_flow_tracker.py @@ -0,0 +1,195 @@ +""" +©AngelaMos | 2026 +test_flow_tracker.py +""" + + +from dlp_scanner.network.flow_tracker import ( + FlowTracker, + make_flow_key, +) +from dlp_scanner.network.pcap import PacketInfo + + +def _make_packet( + src_ip: str = "192.168.1.1", + dst_ip: str = "10.0.0.1", + src_port: int = 12345, + dst_port: int = 80, + protocol: str = "tcp", + payload: bytes = b"data", + timestamp: float = 1.0, + tcp_seq: int = 0, +) -> PacketInfo: + """ + Helper to create a PacketInfo for testing + """ + return PacketInfo( + timestamp = timestamp, + src_ip = src_ip, + dst_ip = dst_ip, + src_port = src_port, + dst_port = dst_port, + protocol = protocol, + payload = payload, + raw_length = len(payload) + 54, + tcp_seq = tcp_seq, + ) + + +class TestMakeFlowKey: + def test_bidirectional_key(self) -> None: + pkt_fwd = _make_packet( + src_ip = "192.168.1.1", + dst_ip = "10.0.0.1", + src_port = 12345, + dst_port = 80, + ) + pkt_rev = _make_packet( + src_ip = "10.0.0.1", + dst_ip = "192.168.1.1", + src_port = 80, + dst_port = 12345, + ) + assert make_flow_key(pkt_fwd) == make_flow_key(pkt_rev) + + def test_different_ports_different_key( + self, + ) -> None: + pkt1 = _make_packet(src_port = 1000) + pkt2 = _make_packet(src_port = 2000) + assert make_flow_key(pkt1) != make_flow_key(pkt2) + + +class TestFlowTracker: + def test_add_single_packet(self) -> None: + tracker = FlowTracker() + pkt = _make_packet() + tracker.add_packet(pkt) + + assert tracker.flow_count == 1 + flows = tracker.get_flows() + assert flows[0].packet_count == 1 + assert flows[0].total_bytes == 4 + + def test_add_multiple_packets_same_flow( + self, + ) -> None: + tracker = FlowTracker() + pkt1 = _make_packet(timestamp = 1.0) + pkt2 = _make_packet(timestamp = 2.0) + tracker.add_packet(pkt1) + tracker.add_packet(pkt2) + + assert tracker.flow_count == 1 + flow = tracker.get_flows()[0] + assert flow.packet_count == 2 + assert flow.total_bytes == 8 + assert flow.start_time == 1.0 + assert flow.end_time == 2.0 + + def test_different_flows_tracked(self) -> None: + tracker = FlowTracker() + pkt1 = _make_packet(dst_port = 80) + pkt2 = _make_packet(dst_port = 443) + tracker.add_packet(pkt1) + tracker.add_packet(pkt2) + + assert tracker.flow_count == 2 + + def test_bidirectional_packets_same_flow( + self, + ) -> None: + tracker = FlowTracker() + pkt_out = _make_packet( + src_ip = "192.168.1.1", + dst_ip = "10.0.0.1", + ) + pkt_in = _make_packet( + src_ip = "10.0.0.1", + dst_ip = "192.168.1.1", + src_port = 80, + dst_port = 12345, + ) + tracker.add_packet(pkt_out) + tracker.add_packet(pkt_in) + + assert tracker.flow_count == 1 + flow = tracker.get_flows()[0] + assert flow.packet_count == 2 + + def test_reassemble_stream_ordered( + self, + ) -> None: + tracker = FlowTracker() + pkt1 = _make_packet( + payload = b"first", + tcp_seq = 100, + timestamp = 1.0, + ) + pkt2 = _make_packet( + payload = b"second", + tcp_seq = 200, + timestamp = 2.0, + ) + pkt3 = _make_packet( + payload = b"third", + tcp_seq = 150, + timestamp = 1.5, + ) + tracker.add_packet(pkt1) + tracker.add_packet(pkt2) + tracker.add_packet(pkt3) + + key = make_flow_key(pkt1) + stream = tracker.reassemble_stream(key) + assert stream == b"firstthirdsecond" + + def test_reassemble_deduplicates_retransmits( + self, + ) -> None: + tracker = FlowTracker() + pkt1 = _make_packet( + payload = b"data", + tcp_seq = 100, + ) + pkt2 = _make_packet( + payload = b"data", + tcp_seq = 100, + ) + tracker.add_packet(pkt1) + tracker.add_packet(pkt2) + + key = make_flow_key(pkt1) + stream = tracker.reassemble_stream(key) + assert stream == b"data" + + def test_reassemble_unknown_key(self) -> None: + tracker = FlowTracker() + result = tracker.reassemble_stream(("1.1.1.1", "2.2.2.2", 1, 2)) + assert result == b"" + + def test_get_flow_by_key(self) -> None: + tracker = FlowTracker() + pkt = _make_packet() + tracker.add_packet(pkt) + + key = make_flow_key(pkt) + flow = tracker.get_flow(key) + assert flow is not None + assert flow.packet_count == 1 + + def test_get_flow_missing_key(self) -> None: + tracker = FlowTracker() + flow = tracker.get_flow(("1.1.1.1", "2.2.2.2", 0, 0)) + assert flow is None + + def test_empty_payload_not_stored(self) -> None: + tracker = FlowTracker() + pkt = _make_packet(payload = b"") + tracker.add_packet(pkt) + + key = make_flow_key(pkt) + flow = tracker.get_flow(key) + assert flow is not None + assert len(flow.segments) == 0 diff --git a/PROJECTS/intermediate/dlp-scanner/tests/test_network/test_pcap.py b/PROJECTS/intermediate/dlp-scanner/tests/test_network/test_pcap.py new file mode 100644 index 0000000..263bd64 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/test_network/test_pcap.py @@ -0,0 +1,60 @@ +""" +©AngelaMos | 2026 +test_pcap.py +""" + + +from dlp_scanner.network.pcap import PacketInfo + + +class TestPacketInfo: + def test_tcp_packet_construction(self) -> None: + pkt = PacketInfo( + timestamp = 1000.0, + src_ip = "192.168.1.1", + dst_ip = "10.0.0.1", + src_port = 12345, + dst_port = 80, + protocol = "tcp", + payload = b"hello", + raw_length = 100, + tcp_flags = 0x02, + tcp_seq = 1000, + ) + assert pkt.src_ip == "192.168.1.1" + assert pkt.dst_ip == "10.0.0.1" + assert pkt.protocol == "tcp" + assert pkt.payload == b"hello" + assert pkt.tcp_seq == 1000 + + def test_udp_packet_defaults(self) -> None: + pkt = PacketInfo( + timestamp = 1000.0, + src_ip = "10.0.0.1", + dst_ip = "8.8.8.8", + src_port = 54321, + dst_port = 53, + protocol = "udp", + payload = b"\x00", + raw_length = 50, + ) + assert pkt.tcp_flags == 0 + assert pkt.tcp_seq == 0 + assert pkt.protocol == "udp" + + def test_packet_is_frozen(self) -> None: + pkt = PacketInfo( + timestamp = 1.0, + src_ip = "1.1.1.1", + dst_ip = "2.2.2.2", + src_port = 1, + dst_port = 2, + protocol = "tcp", + payload = b"", + raw_length = 0, + ) + try: + pkt.src_ip = "changed" + raise AssertionError() + except AttributeError: + pass diff --git a/PROJECTS/intermediate/dlp-scanner/tests/test_network/test_protocols.py b/PROJECTS/intermediate/dlp-scanner/tests/test_network/test_protocols.py new file mode 100644 index 0000000..d59dd60 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/test_network/test_protocols.py @@ -0,0 +1,158 @@ +""" +©AngelaMos | 2026 +test_protocols.py +""" + + +from dlp_scanner.network.protocols import ( + _is_http_request, + _parse_txt_rdata, + identify_protocol, + parse_dns, + parse_http, +) + + +class TestIdentifyProtocol: + def test_http_get_request(self) -> None: + payload = b"GET / HTTP/1.1\r\nHost: x\r\n\r\n" + assert identify_protocol(payload) == "http" + + def test_http_post_request(self) -> None: + payload = b"POST /api HTTP/1.1\r\n\r\n" + assert identify_protocol(payload) == "http" + + def test_http_response(self) -> None: + payload = b"HTTP/1.1 200 OK\r\n\r\n" + assert identify_protocol(payload) == "http" + + def test_tls_handshake(self) -> None: + payload = b"\x16\x03\x01\x00\x05hello" + assert identify_protocol(payload) == "tls" + + def test_ssh_banner(self) -> None: + payload = b"SSH-2.0-OpenSSH_8.9\r\n" + assert identify_protocol(payload) == "ssh" + + def test_smtp_banner(self) -> None: + payload = b"220 mail.example.com ESMTP" + assert identify_protocol(payload) == "smtp" + + def test_unknown_protocol(self) -> None: + payload = b"\x00\x01\x02\x03" + assert identify_protocol(payload) == "unknown" + + def test_empty_payload(self) -> None: + assert identify_protocol(b"") == "unknown" + + +class TestIsHttpRequest: + def test_get_is_http(self) -> None: + assert _is_http_request(b"GET /path HTTP/1.1") + + def test_delete_is_http(self) -> None: + assert _is_http_request(b"DELETE /resource HTTP/1.1") + + def test_random_bytes_not_http(self) -> None: + assert not _is_http_request(b"\x00\x01\x02") + + def test_short_payload_not_http(self) -> None: + assert not _is_http_request(b"HI") + + +class TestParseHttp: + def test_parse_get_request(self) -> None: + raw = ( + b"GET /index.html HTTP/1.1\r\n" + b"Host: example.com\r\n" + b"\r\n" + ) + result = parse_http(raw) + assert result is not None + assert result.method == "GET" + assert result.uri == "/index.html" + assert result.is_request is True + assert "host" in result.headers + + def test_parse_post_with_body(self) -> None: + body = b"key=value" + raw = ( + b"POST /api HTTP/1.1\r\n" + b"Content-Length: 9\r\n" + b"\r\n" + body + ) + result = parse_http(raw) + assert result is not None + assert result.method == "POST" + assert result.body == "key=value" + + def test_parse_response(self) -> None: + raw = ( + b"HTTP/1.1 200 OK\r\n" + b"Content-Type: text/html\r\n" + b"Content-Length: 5\r\n" + b"\r\nhello" + ) + result = parse_http(raw) + assert result is not None + assert result.is_request is False + assert result.body == "hello" + + def test_invalid_data_returns_none(self) -> None: + assert parse_http(b"\x00\x01") is None + + +class TestParseDns: + def test_parse_dns_query(self) -> None: + import dpkt + + dns = dpkt.dns.DNS() + dns.id = 1234 + dns.qr = dpkt.dns.DNS_Q + dns.opcode = dpkt.dns.DNS_QUERY + q = dpkt.dns.DNS.Q() + q.name = "example.com" + q.type = dpkt.dns.DNS_A + q.cls = dpkt.dns.DNS_IN + dns.qd = [q] + + result = parse_dns(bytes(dns)) + assert result is not None + assert len(result.queries) == 1 + assert result.queries[0].name == "example.com" + assert result.queries[0].query_type == "A" + assert result.is_response is False + assert result.transaction_id == 1234 + + def test_parse_txt_query(self) -> None: + import dpkt + + dns = dpkt.dns.DNS() + dns.id = 5678 + dns.qr = dpkt.dns.DNS_Q + q = dpkt.dns.DNS.Q() + q.name = "data.evil.com" + q.type = dpkt.dns.DNS_TXT + q.cls = dpkt.dns.DNS_IN + dns.qd = [q] + + result = parse_dns(bytes(dns)) + assert result is not None + assert result.queries[0].query_type == "TXT" + assert (result.queries[0].name == "data.evil.com") + + def test_invalid_data_returns_none(self) -> None: + assert parse_dns(b"\x00\x01") is None + + +class TestParseTxtRdata: + def test_single_string(self) -> None: + rdata = b"\x05hello" + assert _parse_txt_rdata(rdata) == "hello" + + def test_multiple_strings(self) -> None: + rdata = b"\x02hi\x05world" + assert _parse_txt_rdata(rdata) == "hi world" + + def test_empty_rdata(self) -> None: + assert _parse_txt_rdata(b"") == "" diff --git a/PROJECTS/intermediate/dlp-scanner/tests/test_reporters/__init__.py b/PROJECTS/intermediate/dlp-scanner/tests/test_reporters/__init__.py new file mode 100644 index 0000000..e1add2a --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/test_reporters/__init__.py @@ -0,0 +1,4 @@ +""" +©AngelaMos | 2026 +__init__.py +""" diff --git a/PROJECTS/intermediate/dlp-scanner/tests/test_reporters/test_csv_report.py b/PROJECTS/intermediate/dlp-scanner/tests/test_reporters/test_csv_report.py new file mode 100644 index 0000000..b6bef16 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/test_reporters/test_csv_report.py @@ -0,0 +1,104 @@ +""" +©AngelaMos | 2026 +test_csv_report.py +""" + + +import csv +import io + +import pytest + +from dlp_scanner.models import ( + Finding, + Location, + ScanResult, +) +from dlp_scanner.reporters.csv_report import ( + CSV_COLUMNS, + CsvReporter, +) + + +@pytest.fixture +def result_with_findings() -> ScanResult: + """ + Provide a ScanResult with test findings + """ + result = ScanResult(targets_scanned = 1) + result.findings = [ + Finding( + rule_id = "PII_SSN", + rule_name = "US Social Security Number", + severity = "critical", + confidence = 0.95, + location = Location( + source_type = "file", + uri = "data.csv", + line = 5, + ), + redacted_snippet = "***-**-6789", + compliance_frameworks = [ + "HIPAA", + "CCPA", + ], + remediation = "Encrypt data", + ), + ] + return result + + +class TestCsvReporter: + def test_generates_valid_csv( + self, + result_with_findings: ScanResult + ) -> None: + reporter = CsvReporter() + output = reporter.generate(result_with_findings) + reader = csv.reader(io.StringIO(output)) + rows = list(reader) + assert len(rows) == 2 + + def test_header_matches_columns( + self, + result_with_findings: ScanResult + ) -> None: + reporter = CsvReporter() + output = reporter.generate(result_with_findings) + reader = csv.reader(io.StringIO(output)) + header = next(reader) + assert header == CSV_COLUMNS + + def test_finding_row_data( + self, + result_with_findings: ScanResult + ) -> None: + reporter = CsvReporter() + output = reporter.generate(result_with_findings) + reader = csv.reader(io.StringIO(output)) + next(reader) + row = next(reader) + assert row[2] == "critical" + assert row[4] == "PII_SSN" + assert row[7] == "data.csv" + assert "HIPAA" in row[12] + assert "CCPA" in row[12] + + def test_empty_result(self) -> None: + reporter = CsvReporter() + result = ScanResult() + output = reporter.generate(result) + reader = csv.reader(io.StringIO(output)) + rows = list(reader) + assert len(rows) == 1 + + def test_frameworks_semicolon_separated( + self, + result_with_findings: ScanResult + ) -> None: + reporter = CsvReporter() + output = reporter.generate(result_with_findings) + reader = csv.reader(io.StringIO(output)) + next(reader) + row = next(reader) + assert row[12] == "HIPAA;CCPA" diff --git a/PROJECTS/intermediate/dlp-scanner/tests/test_reporters/test_json_report.py b/PROJECTS/intermediate/dlp-scanner/tests/test_reporters/test_json_report.py new file mode 100644 index 0000000..8d677ea --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/test_reporters/test_json_report.py @@ -0,0 +1,159 @@ +""" +©AngelaMos | 2026 +test_json_report.py +""" + + +import json + +import pytest + +from dlp_scanner.models import ( + Finding, + Location, + ScanResult, +) +from dlp_scanner.reporters.json_report import ( + JsonReporter, +) + + +@pytest.fixture +def result_with_findings() -> ScanResult: + """ + Provide a ScanResult with test findings + """ + result = ScanResult(targets_scanned = 3) + result.findings = [ + Finding( + rule_id = "PII_SSN", + rule_name = "US Social Security Number", + severity = "critical", + confidence = 0.95, + location = Location( + source_type = "file", + uri = "employees.csv", + line = 2, + ), + redacted_snippet = "SSN: ***-**-6789", + compliance_frameworks = [ + "HIPAA", + "CCPA", + ], + remediation = "Encrypt SSN data", + ), + Finding( + rule_id = "PII_EMAIL", + rule_name = "Email Address", + severity = "medium", + confidence = 0.65, + location = Location( + source_type = "file", + uri = "contacts.json", + ), + redacted_snippet = "j***@example.com", + compliance_frameworks = ["GDPR"], + remediation = "Hash emails", + ), + ] + return result + + +@pytest.fixture +def empty_result() -> ScanResult: + """ + Provide a ScanResult with no findings + """ + return ScanResult(targets_scanned = 5) + + +class TestJsonReporter: + def test_generates_valid_json( + self, + result_with_findings: ScanResult + ) -> None: + reporter = JsonReporter() + output = reporter.generate(result_with_findings) + data = json.loads(output) + assert isinstance(data, dict) + + def test_has_metadata_section( + self, + result_with_findings: ScanResult + ) -> None: + reporter = JsonReporter() + output = reporter.generate(result_with_findings) + data = json.loads(output) + meta = data["scan_metadata"] + assert meta["scan_id"] + assert meta["tool_version"] == "0.1.0" + assert meta["targets_scanned"] == 3 + assert meta["total_findings"] == 2 + + def test_has_findings_section( + self, + result_with_findings: ScanResult + ) -> None: + reporter = JsonReporter() + output = reporter.generate(result_with_findings) + data = json.loads(output) + findings = data["findings"] + assert len(findings) == 2 + assert findings[0]["rule_id"] == "PII_SSN" + assert findings[0]["severity"] == "critical" + assert findings[0]["confidence"] == 0.95 + + def test_finding_has_location( + self, + result_with_findings: ScanResult + ) -> None: + reporter = JsonReporter() + output = reporter.generate(result_with_findings) + data = json.loads(output) + loc = data["findings"][0]["location"] + assert loc["source_type"] == "file" + assert loc["uri"] == "employees.csv" + assert loc["line"] == 2 + + def test_has_summary_section( + self, + result_with_findings: ScanResult + ) -> None: + reporter = JsonReporter() + output = reporter.generate(result_with_findings) + data = json.loads(output) + summary = data["summary"] + assert summary["by_severity"]["critical"] == 1 + assert summary["by_severity"]["medium"] == 1 + assert summary["by_rule"]["PII_SSN"] == 1 + assert summary["by_framework"]["HIPAA"] == 1 + + def test_empty_result_has_zero_findings( + self, + empty_result: ScanResult + ) -> None: + reporter = JsonReporter() + output = reporter.generate(empty_result) + data = json.loads(output) + assert len(data["findings"]) == 0 + assert (data["scan_metadata"]["total_findings"] == 0) + + def test_finding_has_remediation( + self, + result_with_findings: ScanResult + ) -> None: + reporter = JsonReporter() + output = reporter.generate(result_with_findings) + data = json.loads(output) + assert (data["findings"][0]["remediation"] == "Encrypt SSN data") + + def test_finding_has_compliance( + self, + result_with_findings: ScanResult + ) -> None: + reporter = JsonReporter() + output = reporter.generate(result_with_findings) + data = json.loads(output) + frameworks = data["findings"][0]["compliance_frameworks"] + assert "HIPAA" in frameworks + assert "CCPA" in frameworks diff --git a/PROJECTS/intermediate/dlp-scanner/tests/test_reporters/test_sarif.py b/PROJECTS/intermediate/dlp-scanner/tests/test_reporters/test_sarif.py new file mode 100644 index 0000000..65f9cd2 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/test_reporters/test_sarif.py @@ -0,0 +1,174 @@ +""" +©AngelaMos | 2026 +test_sarif.py +""" + + +import json + +import pytest + +from dlp_scanner.models import ( + Finding, + Location, + ScanResult, +) +from dlp_scanner.reporters.sarif import ( + SarifReporter, +) + + +@pytest.fixture +def result_with_findings() -> ScanResult: + """ + Provide a ScanResult with test findings + """ + result = ScanResult(targets_scanned = 2) + result.findings = [ + Finding( + rule_id = "PII_SSN", + rule_name = "US Social Security Number", + severity = "critical", + confidence = 0.95, + location = Location( + source_type = "file", + uri = "data/employees.csv", + line = 10, + column = 5, + ), + redacted_snippet = "***-**-6789", + compliance_frameworks = [ + "HIPAA", + "CCPA", + ], + remediation = "Encrypt SSN data", + ), + Finding( + rule_id = "CRED_AWS_ACCESS_KEY", + rule_name = "AWS Access Key", + severity = "high", + confidence = 0.85, + location = Location( + source_type = "database", + uri = "postgresql://host/db", + table_name = "config", + ), + redacted_snippet = "AKIA****", + compliance_frameworks = [], + remediation = "Rotate credentials", + ), + ] + return result + + +class TestSarifReporter: + def test_generates_valid_json( + self, + result_with_findings: ScanResult + ) -> None: + reporter = SarifReporter() + output = reporter.generate(result_with_findings) + data = json.loads(output) + assert isinstance(data, dict) + + def test_has_sarif_version( + self, + result_with_findings: ScanResult + ) -> None: + reporter = SarifReporter() + output = reporter.generate(result_with_findings) + data = json.loads(output) + assert data["version"] == "2.1.0" + assert "$schema" in data + + def test_has_tool_driver( + self, + result_with_findings: ScanResult + ) -> None: + reporter = SarifReporter() + output = reporter.generate(result_with_findings) + data = json.loads(output) + driver = data["runs"][0]["tool"]["driver"] + assert driver["name"] == "dlp-scanner" + assert driver["version"] == "0.1.0" + + def test_rules_collected( + self, + result_with_findings: ScanResult + ) -> None: + reporter = SarifReporter() + output = reporter.generate(result_with_findings) + data = json.loads(output) + rules = (data["runs"][0]["tool"]["driver"]["rules"]) + assert len(rules) == 2 + rule_ids = {r["id"] for r in rules} + assert "PII_SSN" in rule_ids + assert "CRED_AWS_ACCESS_KEY" in rule_ids + + def test_results_match_findings( + self, + result_with_findings: ScanResult + ) -> None: + reporter = SarifReporter() + output = reporter.generate(result_with_findings) + data = json.loads(output) + results = data["runs"][0]["results"] + assert len(results) == 2 + + def test_severity_mapped_to_level( + self, + result_with_findings: ScanResult + ) -> None: + reporter = SarifReporter() + output = reporter.generate(result_with_findings) + data = json.loads(output) + results = data["runs"][0]["results"] + assert results[0]["level"] == "error" + assert results[1]["level"] == "error" + + def test_location_has_artifact( + self, + result_with_findings: ScanResult + ) -> None: + reporter = SarifReporter() + output = reporter.generate(result_with_findings) + data = json.loads(output) + loc = data["runs"][0]["results"][0]["locations"][0] + physical = loc["physicalLocation"] + assert ( + physical["artifactLocation"]["uri"] == "data/employees.csv" + ) + assert physical["region"]["startLine"] == 10 + assert (physical["region"]["startColumn"] == 5) + + def test_database_finding_has_logical_location( + self, + result_with_findings: ScanResult + ) -> None: + reporter = SarifReporter() + output = reporter.generate(result_with_findings) + data = json.loads(output) + loc = data["runs"][0]["results"][1]["locations"][0] + logical = loc["logicalLocations"] + assert logical[0]["name"] == "config" + assert logical[0]["kind"] == "table" + + def test_properties_has_confidence( + self, + result_with_findings: ScanResult + ) -> None: + reporter = SarifReporter() + output = reporter.generate(result_with_findings) + data = json.loads(output) + props = data["runs"][0]["results"][0]["properties"] + assert props["confidence"] == 0.95 + assert props["redactedSnippet"] + assert "HIPAA" in (props["complianceFrameworks"]) + + def test_empty_result(self) -> None: + reporter = SarifReporter() + result = ScanResult() + output = reporter.generate(result) + data = json.loads(output) + assert len(data["runs"][0]["results"]) == 0 + assert (len(data["runs"][0]["tool"]["driver"]["rules"]) == 0) diff --git a/PROJECTS/intermediate/dlp-scanner/tests/test_scanners/__init__.py b/PROJECTS/intermediate/dlp-scanner/tests/test_scanners/__init__.py new file mode 100644 index 0000000..e1add2a --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/test_scanners/__init__.py @@ -0,0 +1,4 @@ +""" +©AngelaMos | 2026 +__init__.py +""" diff --git a/PROJECTS/intermediate/dlp-scanner/tests/test_scanners/test_db_scanner.py b/PROJECTS/intermediate/dlp-scanner/tests/test_scanners/test_db_scanner.py new file mode 100644 index 0000000..b3f157e --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/test_scanners/test_db_scanner.py @@ -0,0 +1,278 @@ +""" +©AngelaMos | 2026 +test_db_scanner.py +""" + + +import sqlite3 +from pathlib import Path +from typing import Any + +import pytest + +from dlp_scanner.config import ScanConfig +from dlp_scanner.detectors.registry import DetectorRegistry +from dlp_scanner.scanners.db_scanner import ( + DatabaseScanner, + _extract_mongo_strings, +) + + +@pytest.fixture +def sqlite_db_with_pii(temp_dir: Path) -> str: + """ + Provide a SQLite database containing PII test data + """ + db_path = temp_dir / "test.db" + conn = sqlite3.connect(str(db_path)) + conn.execute( + "CREATE TABLE employees (" + "id INTEGER PRIMARY KEY, " + "name TEXT, " + "ssn TEXT, " + "email TEXT, " + "salary REAL)" + ) + conn.execute( + "INSERT INTO employees " + "(name, ssn, email, salary) " + "VALUES (?, ?, ?, ?)", + ( + "John Doe", + "456-78-9012", + "john@example.com", + 75000.0, + ), + ) + conn.execute( + "INSERT INTO employees " + "(name, ssn, email, salary) " + "VALUES (?, ?, ?, ?)", + ( + "Jane Smith", + "234-56-7890", + "jane@example.com", + 85000.0, + ), + ) + conn.commit() + conn.close() + return f"sqlite:///{db_path}" + + +@pytest.fixture +def sqlite_db_empty(temp_dir: Path) -> str: + """ + Provide a SQLite database with an empty table + """ + db_path = temp_dir / "empty.db" + conn = sqlite3.connect(str(db_path)) + conn.execute( + "CREATE TABLE logs (" + "id INTEGER PRIMARY KEY, " + "message TEXT)" + ) + conn.commit() + conn.close() + return f"sqlite:///{db_path}" + + +@pytest.fixture +def db_scanner() -> DatabaseScanner: + """ + Provide a default DatabaseScanner instance + """ + config = ScanConfig() + registry = DetectorRegistry() + return DatabaseScanner(config = config, registry = registry) + + +class TestDatabaseScanner: + def test_sqlite_scan_finds_pii( + self, + db_scanner: DatabaseScanner, + sqlite_db_with_pii: str, + ) -> None: + result = db_scanner.scan(sqlite_db_with_pii) + assert result.targets_scanned > 0 + assert len(result.findings) > 0 + + def test_sqlite_scan_finds_ssn( + self, + db_scanner: DatabaseScanner, + sqlite_db_with_pii: str, + ) -> None: + result = db_scanner.scan(sqlite_db_with_pii) + ssn_findings = [ + f for f in result.findings if f.rule_id == "PII_SSN" + ] + assert len(ssn_findings) > 0 + + def test_sqlite_scan_empty_table( + self, + db_scanner: DatabaseScanner, + sqlite_db_empty: str, + ) -> None: + result = db_scanner.scan(sqlite_db_empty) + assert result.targets_scanned > 0 + assert len(result.findings) == 0 + + def test_findings_have_database_source( + self, + db_scanner: DatabaseScanner, + sqlite_db_with_pii: str, + ) -> None: + result = db_scanner.scan(sqlite_db_with_pii) + for finding in result.findings: + assert (finding.location.source_type == "database") + + def test_findings_have_table_name( + self, + db_scanner: DatabaseScanner, + sqlite_db_with_pii: str, + ) -> None: + result = db_scanner.scan(sqlite_db_with_pii) + for finding in result.findings: + assert (finding.location.table_name == "employees") + + def test_unsupported_scheme_errors( + self, + db_scanner: DatabaseScanner, + ) -> None: + result = db_scanner.scan("ftp://localhost/db") + assert len(result.errors) > 0 + + def test_completed_at_is_set( + self, + db_scanner: DatabaseScanner, + sqlite_db_with_pii: str, + ) -> None: + result = db_scanner.scan(sqlite_db_with_pii) + assert result.scan_completed_at is not None + + def test_findings_have_remediation( + self, + db_scanner: DatabaseScanner, + sqlite_db_with_pii: str, + ) -> None: + result = db_scanner.scan(sqlite_db_with_pii) + for finding in result.findings: + assert finding.remediation + + def test_table_exclude_filter( + self, + temp_dir: Path, + ) -> None: + db_path = temp_dir / "filter.db" + conn = sqlite3.connect(str(db_path)) + conn.execute("CREATE TABLE users " + "(id INTEGER, ssn TEXT)") + conn.execute("INSERT INTO users " + "VALUES (1, '123-45-6789')") + conn.execute("CREATE TABLE audit_log " + "(id INTEGER, note TEXT)") + conn.execute("INSERT INTO audit_log " + "VALUES (1, '987-65-4321')") + conn.commit() + conn.close() + + config = ScanConfig() + config.database.exclude_tables = ["audit_log"] + registry = DetectorRegistry() + scanner = DatabaseScanner(config = config, registry = registry) + + result = scanner.scan(f"sqlite:///{db_path}") + assert result.targets_scanned == 1 + + def test_table_include_filter( + self, + temp_dir: Path, + ) -> None: + db_path = temp_dir / "include.db" + conn = sqlite3.connect(str(db_path)) + conn.execute("CREATE TABLE users " + "(id INTEGER, ssn TEXT)") + conn.execute("INSERT INTO users " + "VALUES (1, '123-45-6789')") + conn.execute("CREATE TABLE logs " + "(id INTEGER, msg TEXT)") + conn.execute("INSERT INTO logs " + "VALUES (1, '987-65-4321')") + conn.commit() + conn.close() + + config = ScanConfig() + config.database.include_tables = ["users"] + registry = DetectorRegistry() + scanner = DatabaseScanner(config = config, registry = registry) + + result = scanner.scan(f"sqlite:///{db_path}") + assert result.targets_scanned == 1 + + +class TestExtractMongoStrings: + def test_simple_strings(self) -> None: + doc: dict[str, + Any] = { + "name": "John", + "email": "john@test.com", + } + parts: list[str] = [] + _extract_mongo_strings(doc, parts) + assert len(parts) == 2 + + def test_nested_doc(self) -> None: + doc: dict[str, + Any] = { + "user": { + "name": "Jane", + "ssn": "123-45-6789", + } + } + parts: list[str] = [] + _extract_mongo_strings(doc, parts) + assert any("user.name" in p for p in parts) + assert any("user.ssn" in p for p in parts) + + def test_skips_id_field(self) -> None: + doc: dict[str, + Any] = { + "_id": "abc123", + "name": "Test", + } + parts: list[str] = [] + _extract_mongo_strings(doc, parts) + assert len(parts) == 1 + assert "name" in parts[0] + + def test_list_values(self) -> None: + doc: dict[str, Any] = {"emails": ["a@b.com", "c@d.com"]} + parts: list[str] = [] + _extract_mongo_strings(doc, parts) + assert len(parts) == 2 + + def test_empty_strings_skipped(self) -> None: + doc: dict[str, + Any] = { + "name": "", + "bio": " ", + } + parts: list[str] = [] + _extract_mongo_strings(doc, parts) + assert len(parts) == 0 + + def test_nested_list_of_dicts(self) -> None: + doc: dict[str, + Any] = { + "records": [ + { + "value": "secret" + }, + { + "value": "data" + }, + ] + } + parts: list[str] = [] + _extract_mongo_strings(doc, parts) + assert len(parts) == 2 diff --git a/PROJECTS/intermediate/dlp-scanner/tests/test_scanners/test_file_scanner.py b/PROJECTS/intermediate/dlp-scanner/tests/test_scanners/test_file_scanner.py new file mode 100644 index 0000000..f854322 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/tests/test_scanners/test_file_scanner.py @@ -0,0 +1,210 @@ +""" +©AngelaMos | 2026 +test_file_scanner.py +""" + + +from pathlib import Path + +import pytest + +from dlp_scanner.config import ScanConfig +from dlp_scanner.detectors.registry import DetectorRegistry +from dlp_scanner.scanners.file_scanner import ( + FileScanner, + _build_extension_map, + _get_full_suffix, +) + + +@pytest.fixture +def file_scanner() -> FileScanner: + """ + Provide a default FileScanner instance + """ + config = ScanConfig() + registry = DetectorRegistry() + return FileScanner(config = config, registry = registry) + + +class TestFileScanner: + def test_scan_directory_finds_pii( + self, + file_scanner: FileScanner, + temp_dir_with_pii: Path, + ) -> None: + result = file_scanner.scan(str(temp_dir_with_pii)) + assert result.targets_scanned > 0 + assert len(result.findings) > 0 + + def test_scan_single_file( + self, + file_scanner: FileScanner, + temp_dir_with_pii: Path, + ) -> None: + csv_path = temp_dir_with_pii / "employees.csv" + result = file_scanner.scan(str(csv_path)) + assert result.targets_scanned == 1 + assert len(result.findings) > 0 + + def test_scan_clean_file_no_findings( + self, + file_scanner: FileScanner, + temp_dir_with_pii: Path, + ) -> None: + txt_path = temp_dir_with_pii / "clean.txt" + result = file_scanner.scan(str(txt_path)) + assert result.targets_scanned == 1 + assert len(result.findings) == 0 + + def test_scan_nonexistent_target( + self, + file_scanner: FileScanner, + ) -> None: + result = file_scanner.scan("/nonexistent/path") + assert len(result.errors) > 0 + + def test_scan_empty_directory( + self, + file_scanner: FileScanner, + temp_dir: Path, + ) -> None: + result = file_scanner.scan(str(temp_dir)) + assert result.targets_scanned == 0 + assert len(result.findings) == 0 + + def test_scan_respects_exclude_patterns( + self, + temp_dir: Path, + ) -> None: + secret = temp_dir / "secret.log" + secret.write_text("SSN: 123-45-6789") + + config = ScanConfig() + config.file.exclude_patterns = ["*.log"] + registry = DetectorRegistry() + scanner = FileScanner(config = config, registry = registry) + + result = scanner.scan(str(temp_dir)) + assert result.targets_scanned == 0 + + def test_scan_respects_max_file_size( + self, + temp_dir: Path, + ) -> None: + large = temp_dir / "large.txt" + large.write_text("SSN: 123-45-6789\n" * 100) + + config = ScanConfig() + config.file.max_file_size_mb = 0 + registry = DetectorRegistry() + scanner = FileScanner(config = config, registry = registry) + + result = scanner.scan(str(temp_dir)) + assert result.targets_scanned == 0 + + def test_scan_completed_at_is_set( + self, + file_scanner: FileScanner, + temp_dir: Path, + ) -> None: + result = file_scanner.scan(str(temp_dir)) + assert result.scan_completed_at is not None + + def test_findings_have_compliance_frameworks( + self, + file_scanner: FileScanner, + temp_dir_with_pii: Path, + ) -> None: + result = file_scanner.scan(str(temp_dir_with_pii)) + ssn_findings = [ + f for f in result.findings if f.rule_id == "PII_SSN" + ] + for finding in ssn_findings: + assert len(finding.compliance_frameworks) > 0 + + def test_findings_have_redacted_snippets( + self, + file_scanner: FileScanner, + temp_dir_with_pii: Path, + ) -> None: + result = file_scanner.scan(str(temp_dir_with_pii)) + for finding in result.findings: + assert finding.redacted_snippet + + def test_findings_have_severity( + self, + file_scanner: FileScanner, + temp_dir_with_pii: Path, + ) -> None: + result = file_scanner.scan(str(temp_dir_with_pii)) + valid_severities = { + "critical", + "high", + "medium", + "low", + } + for finding in result.findings: + assert finding.severity in valid_severities + + def test_scan_json_finds_api_key( + self, + file_scanner: FileScanner, + temp_dir_with_pii: Path, + ) -> None: + result = file_scanner.scan(str(temp_dir_with_pii)) + cred_findings = [ + f for f in result.findings if f.rule_id.startswith("CRED_") + ] + assert len(cred_findings) > 0 + + +class TestExtensionMap: + def test_has_common_text_types(self) -> None: + ext_map = _build_extension_map() + assert ".txt" in ext_map + assert ".csv" in ext_map + assert ".json" in ext_map + assert ".xml" in ext_map + assert ".yaml" in ext_map + + def test_has_office_types(self) -> None: + ext_map = _build_extension_map() + assert ".pdf" in ext_map + assert ".docx" in ext_map + assert ".xlsx" in ext_map + assert ".xls" in ext_map + + def test_has_archive_types(self) -> None: + ext_map = _build_extension_map() + assert ".zip" in ext_map + assert ".tar" in ext_map + assert ".tar.gz" in ext_map + + def test_has_email_types(self) -> None: + ext_map = _build_extension_map() + assert ".eml" in ext_map + assert ".msg" in ext_map + + +class TestGetFullSuffix: + def test_simple_extension(self) -> None: + assert _get_full_suffix(Path("f.txt")) == ".txt" + + def test_tar_gz(self) -> None: + path = Path("archive.tar.gz") + assert _get_full_suffix(path) == ".tar.gz" + + def test_tar_bz2(self) -> None: + path = Path("archive.tar.bz2") + assert _get_full_suffix(path) == ".tar.bz2" + + def test_uppercase_normalized(self) -> None: + assert _get_full_suffix(Path("F.TXT")) == ".txt" + + def test_no_extension(self) -> None: + assert _get_full_suffix(Path("Makefile")) == "" + + def test_dotfile(self) -> None: + result = _get_full_suffix(Path(".gitignore")) + assert result == "" diff --git a/PROJECTS/intermediate/dlp-scanner/uv.lock b/PROJECTS/intermediate/dlp-scanner/uv.lock new file mode 100644 index 0000000..207abb9 --- /dev/null +++ b/PROJECTS/intermediate/dlp-scanner/uv.lock @@ -0,0 +1,1377 @@ +version = 1 +revision = 3 +requires-python = ">=3.12" + +[[package]] +name = "aiomysql" +version = "0.3.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pymysql" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/29/e0/302aeffe8d90853556f47f3106b89c16cc2ec2a4d269bdfd82e3f4ae12cc/aiomysql-0.3.2.tar.gz", hash = "sha256:72d15ef5cfc34c03468eb41e1b90adb9fd9347b0b589114bd23ead569a02ac1a", size = 108311, upload-time = "2025-10-22T00:15:21.278Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4c/af/aae0153c3e28712adaf462328f6c7a3c196a1c1c27b491de4377dd3e6b52/aiomysql-0.3.2-py3-none-any.whl", hash = "sha256:c82c5ba04137d7afd5c693a258bea8ead2aad77101668044143a991e04632eb2", size = 71834, upload-time = "2025-10-22T00:15:15.905Z" }, +] + +[[package]] +name = "aiosqlite" +version = "0.22.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4e/8a/64761f4005f17809769d23e518d915db74e6310474e733e3593cfc854ef1/aiosqlite-0.22.1.tar.gz", hash = "sha256:043e0bd78d32888c0a9ca90fc788b38796843360c855a7262a532813133a0650", size = 14821, upload-time = "2025-12-23T19:25:43.997Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/00/b7/e3bf5133d697a08128598c8d0abc5e16377b51465a33756de24fa7dee953/aiosqlite-0.22.1-py3-none-any.whl", hash = "sha256:21c002eb13823fad740196c5a2e9d8e62f6243bd9e7e4a1f87fb5e44ecb4fceb", size = 17405, upload-time = "2025-12-23T19:25:42.139Z" }, +] + +[[package]] +name = "annotated-doc" +version = "0.0.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" }, +] + +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, +] + +[[package]] +name = "asyncpg" +version = "0.31.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fe/cc/d18065ce2380d80b1bcce927c24a2642efd38918e33fd724bc4bca904877/asyncpg-0.31.0.tar.gz", hash = "sha256:c989386c83940bfbd787180f2b1519415e2d3d6277a70d9d0f0145ac73500735", size = 993667, upload-time = "2025-11-24T23:27:00.812Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/a6/59d0a146e61d20e18db7396583242e32e0f120693b67a8de43f1557033e2/asyncpg-0.31.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b44c31e1efc1c15188ef183f287c728e2046abb1d26af4d20858215d50d91fad", size = 662042, upload-time = "2025-11-24T23:25:49.578Z" }, + { url = "https://files.pythonhosted.org/packages/36/01/ffaa189dcb63a2471720615e60185c3f6327716fdc0fc04334436fbb7c65/asyncpg-0.31.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0c89ccf741c067614c9b5fc7f1fc6f3b61ab05ae4aaa966e6fd6b93097c7d20d", size = 638504, upload-time = "2025-11-24T23:25:51.501Z" }, + { url = "https://files.pythonhosted.org/packages/9f/62/3f699ba45d8bd24c5d65392190d19656d74ff0185f42e19d0bbd973bb371/asyncpg-0.31.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:12b3b2e39dc5470abd5e98c8d3373e4b1d1234d9fbdedf538798b2c13c64460a", size = 3426241, upload-time = "2025-11-24T23:25:53.278Z" }, + { url = "https://files.pythonhosted.org/packages/8c/d1/a867c2150f9c6e7af6462637f613ba67f78a314b00db220cd26ff559d532/asyncpg-0.31.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:aad7a33913fb8bcb5454313377cc330fbb19a0cd5faa7272407d8a0c4257b671", size = 3520321, upload-time = "2025-11-24T23:25:54.982Z" }, + { url = "https://files.pythonhosted.org/packages/7a/1a/cce4c3f246805ecd285a3591222a2611141f1669d002163abef999b60f98/asyncpg-0.31.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3df118d94f46d85b2e434fd62c84cb66d5834d5a890725fe625f498e72e4d5ec", size = 3316685, upload-time = "2025-11-24T23:25:57.43Z" }, + { url = "https://files.pythonhosted.org/packages/40/ae/0fc961179e78cc579e138fad6eb580448ecae64908f95b8cb8ee2f241f67/asyncpg-0.31.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bd5b6efff3c17c3202d4b37189969acf8927438a238c6257f66be3c426beba20", size = 3471858, upload-time = "2025-11-24T23:25:59.636Z" }, + { url = "https://files.pythonhosted.org/packages/52/b2/b20e09670be031afa4cbfabd645caece7f85ec62d69c312239de568e058e/asyncpg-0.31.0-cp312-cp312-win32.whl", hash = "sha256:027eaa61361ec735926566f995d959ade4796f6a49d3bde17e5134b9964f9ba8", size = 527852, upload-time = "2025-11-24T23:26:01.084Z" }, + { url = "https://files.pythonhosted.org/packages/b5/f0/f2ed1de154e15b107dc692262395b3c17fc34eafe2a78fc2115931561730/asyncpg-0.31.0-cp312-cp312-win_amd64.whl", hash = "sha256:72d6bdcbc93d608a1158f17932de2321f68b1a967a13e014998db87a72ed3186", size = 597175, upload-time = "2025-11-24T23:26:02.564Z" }, + { url = "https://files.pythonhosted.org/packages/95/11/97b5c2af72a5d0b9bc3fa30cd4b9ce22284a9a943a150fdc768763caf035/asyncpg-0.31.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c204fab1b91e08b0f47e90a75d1b3c62174dab21f670ad6c5d0f243a228f015b", size = 661111, upload-time = "2025-11-24T23:26:04.467Z" }, + { url = "https://files.pythonhosted.org/packages/1b/71/157d611c791a5e2d0423f09f027bd499935f0906e0c2a416ce712ba51ef3/asyncpg-0.31.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:54a64f91839ba59008eccf7aad2e93d6e3de688d796f35803235ea1c4898ae1e", size = 636928, upload-time = "2025-11-24T23:26:05.944Z" }, + { url = "https://files.pythonhosted.org/packages/2e/fc/9e3486fb2bbe69d4a867c0b76d68542650a7ff1574ca40e84c3111bb0c6e/asyncpg-0.31.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0e0822b1038dc7253b337b0f3f676cadc4ac31b126c5d42691c39691962e403", size = 3424067, upload-time = "2025-11-24T23:26:07.957Z" }, + { url = "https://files.pythonhosted.org/packages/12/c6/8c9d076f73f07f995013c791e018a1cd5f31823c2a3187fc8581706aa00f/asyncpg-0.31.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bef056aa502ee34204c161c72ca1f3c274917596877f825968368b2c33f585f4", size = 3518156, upload-time = "2025-11-24T23:26:09.591Z" }, + { url = "https://files.pythonhosted.org/packages/ae/3b/60683a0baf50fbc546499cfb53132cb6835b92b529a05f6a81471ab60d0c/asyncpg-0.31.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0bfbcc5b7ffcd9b75ab1558f00db2ae07db9c80637ad1b2469c43df79d7a5ae2", size = 3319636, upload-time = "2025-11-24T23:26:11.168Z" }, + { url = "https://files.pythonhosted.org/packages/50/dc/8487df0f69bd398a61e1792b3cba0e47477f214eff085ba0efa7eac9ce87/asyncpg-0.31.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:22bc525ebbdc24d1261ecbf6f504998244d4e3be1721784b5f64664d61fbe602", size = 3472079, upload-time = "2025-11-24T23:26:13.164Z" }, + { url = "https://files.pythonhosted.org/packages/13/a1/c5bbeeb8531c05c89135cb8b28575ac2fac618bcb60119ee9696c3faf71c/asyncpg-0.31.0-cp313-cp313-win32.whl", hash = "sha256:f890de5e1e4f7e14023619399a471ce4b71f5418cd67a51853b9910fdfa73696", size = 527606, upload-time = "2025-11-24T23:26:14.78Z" }, + { url = "https://files.pythonhosted.org/packages/91/66/b25ccb84a246b470eb943b0107c07edcae51804912b824054b3413995a10/asyncpg-0.31.0-cp313-cp313-win_amd64.whl", hash = "sha256:dc5f2fa9916f292e5c5c8b2ac2813763bcd7f58e130055b4ad8a0531314201ab", size = 596569, upload-time = "2025-11-24T23:26:16.189Z" }, + { url = "https://files.pythonhosted.org/packages/3c/36/e9450d62e84a13aea6580c83a47a437f26c7ca6fa0f0fd40b6670793ea30/asyncpg-0.31.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:f6b56b91bb0ffc328c4e3ed113136cddd9deefdf5f79ab448598b9772831df44", size = 660867, upload-time = "2025-11-24T23:26:17.631Z" }, + { url = "https://files.pythonhosted.org/packages/82/4b/1d0a2b33b3102d210439338e1beea616a6122267c0df459ff0265cd5807a/asyncpg-0.31.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:334dec28cf20d7f5bb9e45b39546ddf247f8042a690bff9b9573d00086e69cb5", size = 638349, upload-time = "2025-11-24T23:26:19.689Z" }, + { url = "https://files.pythonhosted.org/packages/41/aa/e7f7ac9a7974f08eff9183e392b2d62516f90412686532d27e196c0f0eeb/asyncpg-0.31.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:98cc158c53f46de7bb677fd20c417e264fc02b36d901cc2a43bd6cb0dc6dbfd2", size = 3410428, upload-time = "2025-11-24T23:26:21.275Z" }, + { url = "https://files.pythonhosted.org/packages/6f/de/bf1b60de3dede5c2731e6788617a512bc0ebd9693eac297ee74086f101d7/asyncpg-0.31.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9322b563e2661a52e3cdbc93eed3be7748b289f792e0011cb2720d278b366ce2", size = 3471678, upload-time = "2025-11-24T23:26:23.627Z" }, + { url = "https://files.pythonhosted.org/packages/46/78/fc3ade003e22d8bd53aaf8f75f4be48f0b460fa73738f0391b9c856a9147/asyncpg-0.31.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:19857a358fc811d82227449b7ca40afb46e75b33eb8897240c3839dd8b744218", size = 3313505, upload-time = "2025-11-24T23:26:25.235Z" }, + { url = "https://files.pythonhosted.org/packages/bf/e9/73eb8a6789e927816f4705291be21f2225687bfa97321e40cd23055e903a/asyncpg-0.31.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ba5f8886e850882ff2c2ace5732300e99193823e8107e2c53ef01c1ebfa1e85d", size = 3434744, upload-time = "2025-11-24T23:26:26.944Z" }, + { url = "https://files.pythonhosted.org/packages/08/4b/f10b880534413c65c5b5862f79b8e81553a8f364e5238832ad4c0af71b7f/asyncpg-0.31.0-cp314-cp314-win32.whl", hash = "sha256:cea3a0b2a14f95834cee29432e4ddc399b95700eb1d51bbc5bfee8f31fa07b2b", size = 532251, upload-time = "2025-11-24T23:26:28.404Z" }, + { url = "https://files.pythonhosted.org/packages/d3/2d/7aa40750b7a19efa5d66e67fc06008ca0f27ba1bd082e457ad82f59aba49/asyncpg-0.31.0-cp314-cp314-win_amd64.whl", hash = "sha256:04d19392716af6b029411a0264d92093b6e5e8285ae97a39957b9a9c14ea72be", size = 604901, upload-time = "2025-11-24T23:26:30.34Z" }, + { url = "https://files.pythonhosted.org/packages/ce/fe/b9dfe349b83b9dee28cc42360d2c86b2cdce4cb551a2c2d27e156bcac84d/asyncpg-0.31.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:bdb957706da132e982cc6856bb2f7b740603472b54c3ebc77fe60ea3e57e1bd2", size = 702280, upload-time = "2025-11-24T23:26:32Z" }, + { url = "https://files.pythonhosted.org/packages/6a/81/e6be6e37e560bd91e6c23ea8a6138a04fd057b08cf63d3c5055c98e81c1d/asyncpg-0.31.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6d11b198111a72f47154fa03b85799f9be63701e068b43f84ac25da0bda9cb31", size = 682931, upload-time = "2025-11-24T23:26:33.572Z" }, + { url = "https://files.pythonhosted.org/packages/a6/45/6009040da85a1648dd5bc75b3b0a062081c483e75a1a29041ae63a0bf0dc/asyncpg-0.31.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:18c83b03bc0d1b23e6230f5bf8d4f217dc9bc08644ce0502a9d91dc9e634a9c7", size = 3581608, upload-time = "2025-11-24T23:26:35.638Z" }, + { url = "https://files.pythonhosted.org/packages/7e/06/2e3d4d7608b0b2b3adbee0d0bd6a2d29ca0fc4d8a78f8277df04e2d1fd7b/asyncpg-0.31.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e009abc333464ff18b8f6fd146addffd9aaf63e79aa3bb40ab7a4c332d0c5e9e", size = 3498738, upload-time = "2025-11-24T23:26:37.275Z" }, + { url = "https://files.pythonhosted.org/packages/7d/aa/7d75ede780033141c51d83577ea23236ba7d3a23593929b32b49db8ed36e/asyncpg-0.31.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3b1fbcb0e396a5ca435a8826a87e5c2c2cc0c8c68eb6fadf82168056b0e53a8c", size = 3401026, upload-time = "2025-11-24T23:26:39.423Z" }, + { url = "https://files.pythonhosted.org/packages/ba/7a/15e37d45e7f7c94facc1e9148c0e455e8f33c08f0b8a0b1deb2c5171771b/asyncpg-0.31.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8df714dba348efcc162d2adf02d213e5fab1bd9f557e1305633e851a61814a7a", size = 3429426, upload-time = "2025-11-24T23:26:41.032Z" }, + { url = "https://files.pythonhosted.org/packages/13/d5/71437c5f6ae5f307828710efbe62163974e71237d5d46ebd2869ea052d10/asyncpg-0.31.0-cp314-cp314t-win32.whl", hash = "sha256:1b41f1afb1033f2b44f3234993b15096ddc9cd71b21a42dbd87fc6a57b43d65d", size = 614495, upload-time = "2025-11-24T23:26:42.659Z" }, + { url = "https://files.pythonhosted.org/packages/3c/d7/8fb3044eaef08a310acfe23dae9a8e2e07d305edc29a53497e52bc76eca7/asyncpg-0.31.0-cp314-cp314t-win_amd64.whl", hash = "sha256:bd4107bb7cdd0e9e65fae66a62afd3a249663b844fa34d479f6d5b3bef9c04c3", size = 706062, upload-time = "2025-11-24T23:26:44.086Z" }, +] + +[[package]] +name = "beautifulsoup4" +version = "4.13.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "soupsieve" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/85/2e/3e5079847e653b1f6dc647aa24549d68c6addb4c595cc0d902d1b19308ad/beautifulsoup4-4.13.5.tar.gz", hash = "sha256:5e70131382930e7c3de33450a2f54a63d5e4b19386eab43a5b34d594268f3695", size = 622954, upload-time = "2025-08-24T14:06:13.168Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/eb/f4151e0c7377a6e08a38108609ba5cede57986802757848688aeedd1b9e8/beautifulsoup4-4.13.5-py3-none-any.whl", hash = "sha256:642085eaa22233aceadff9c69651bc51e8bf3f874fb6d7104ece2beb24b47c4a", size = 105113, upload-time = "2025-08-24T14:06:14.884Z" }, +] + +[[package]] +name = "cffi" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pycparser", marker = "implementation_name != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ea/47/4f61023ea636104d4f16ab488e268b93008c3d0bb76893b1b31db1f96802/cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d", size = 185271, upload-time = "2025-09-08T23:22:44.795Z" }, + { url = "https://files.pythonhosted.org/packages/df/a2/781b623f57358e360d62cdd7a8c681f074a71d445418a776eef0aadb4ab4/cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c", size = 181048, upload-time = "2025-09-08T23:22:45.938Z" }, + { url = "https://files.pythonhosted.org/packages/ff/df/a4f0fbd47331ceeba3d37c2e51e9dfc9722498becbeec2bd8bc856c9538a/cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe", size = 212529, upload-time = "2025-09-08T23:22:47.349Z" }, + { url = "https://files.pythonhosted.org/packages/d5/72/12b5f8d3865bf0f87cf1404d8c374e7487dcf097a1c91c436e72e6badd83/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062", size = 220097, upload-time = "2025-09-08T23:22:48.677Z" }, + { url = "https://files.pythonhosted.org/packages/c2/95/7a135d52a50dfa7c882ab0ac17e8dc11cec9d55d2c18dda414c051c5e69e/cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e", size = 207983, upload-time = "2025-09-08T23:22:50.06Z" }, + { url = "https://files.pythonhosted.org/packages/3a/c8/15cb9ada8895957ea171c62dc78ff3e99159ee7adb13c0123c001a2546c1/cffi-2.0.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037", size = 206519, upload-time = "2025-09-08T23:22:51.364Z" }, + { url = "https://files.pythonhosted.org/packages/78/2d/7fa73dfa841b5ac06c7b8855cfc18622132e365f5b81d02230333ff26e9e/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba", size = 219572, upload-time = "2025-09-08T23:22:52.902Z" }, + { url = "https://files.pythonhosted.org/packages/07/e0/267e57e387b4ca276b90f0434ff88b2c2241ad72b16d31836adddfd6031b/cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94", size = 222963, upload-time = "2025-09-08T23:22:54.518Z" }, + { url = "https://files.pythonhosted.org/packages/b6/75/1f2747525e06f53efbd878f4d03bac5b859cbc11c633d0fb81432d98a795/cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187", size = 221361, upload-time = "2025-09-08T23:22:55.867Z" }, + { url = "https://files.pythonhosted.org/packages/7b/2b/2b6435f76bfeb6bbf055596976da087377ede68df465419d192acf00c437/cffi-2.0.0-cp312-cp312-win32.whl", hash = "sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18", size = 172932, upload-time = "2025-09-08T23:22:57.188Z" }, + { url = "https://files.pythonhosted.org/packages/f8/ed/13bd4418627013bec4ed6e54283b1959cf6db888048c7cf4b4c3b5b36002/cffi-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5", size = 183557, upload-time = "2025-09-08T23:22:58.351Z" }, + { url = "https://files.pythonhosted.org/packages/95/31/9f7f93ad2f8eff1dbc1c3656d7ca5bfd8fb52c9d786b4dcf19b2d02217fa/cffi-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6", size = 177762, upload-time = "2025-09-08T23:22:59.668Z" }, + { url = "https://files.pythonhosted.org/packages/4b/8d/a0a47a0c9e413a658623d014e91e74a50cdd2c423f7ccfd44086ef767f90/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb", size = 185230, upload-time = "2025-09-08T23:23:00.879Z" }, + { url = "https://files.pythonhosted.org/packages/4a/d2/a6c0296814556c68ee32009d9c2ad4f85f2707cdecfd7727951ec228005d/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca", size = 181043, upload-time = "2025-09-08T23:23:02.231Z" }, + { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446, upload-time = "2025-09-08T23:23:03.472Z" }, + { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101, upload-time = "2025-09-08T23:23:04.792Z" }, + { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948, upload-time = "2025-09-08T23:23:06.127Z" }, + { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422, upload-time = "2025-09-08T23:23:07.753Z" }, + { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload-time = "2025-09-08T23:23:09.648Z" }, + { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload-time = "2025-09-08T23:23:10.928Z" }, + { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload-time = "2025-09-08T23:23:12.42Z" }, + { url = "https://files.pythonhosted.org/packages/eb/6d/bf9bda840d5f1dfdbf0feca87fbdb64a918a69bca42cfa0ba7b137c48cb8/cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27", size = 172909, upload-time = "2025-09-08T23:23:14.32Z" }, + { url = "https://files.pythonhosted.org/packages/37/18/6519e1ee6f5a1e579e04b9ddb6f1676c17368a7aba48299c3759bbc3c8b3/cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75", size = 183402, upload-time = "2025-09-08T23:23:15.535Z" }, + { url = "https://files.pythonhosted.org/packages/cb/0e/02ceeec9a7d6ee63bb596121c2c8e9b3a9e150936f4fbef6ca1943e6137c/cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91", size = 177780, upload-time = "2025-09-08T23:23:16.761Z" }, + { url = "https://files.pythonhosted.org/packages/92/c4/3ce07396253a83250ee98564f8d7e9789fab8e58858f35d07a9a2c78de9f/cffi-2.0.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5", size = 185320, upload-time = "2025-09-08T23:23:18.087Z" }, + { url = "https://files.pythonhosted.org/packages/59/dd/27e9fa567a23931c838c6b02d0764611c62290062a6d4e8ff7863daf9730/cffi-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13", size = 181487, upload-time = "2025-09-08T23:23:19.622Z" }, + { url = "https://files.pythonhosted.org/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049, upload-time = "2025-09-08T23:23:20.853Z" }, + { url = "https://files.pythonhosted.org/packages/b4/89/76799151d9c2d2d1ead63c2429da9ea9d7aac304603de0c6e8764e6e8e70/cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", size = 207793, upload-time = "2025-09-08T23:23:22.08Z" }, + { url = "https://files.pythonhosted.org/packages/bb/dd/3465b14bb9e24ee24cb88c9e3730f6de63111fffe513492bf8c808a3547e/cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", size = 206300, upload-time = "2025-09-08T23:23:23.314Z" }, + { url = "https://files.pythonhosted.org/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244, upload-time = "2025-09-08T23:23:24.541Z" }, + { url = "https://files.pythonhosted.org/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828, upload-time = "2025-09-08T23:23:26.143Z" }, + { url = "https://files.pythonhosted.org/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926, upload-time = "2025-09-08T23:23:27.873Z" }, + { url = "https://files.pythonhosted.org/packages/3e/aa/df335faa45b395396fcbc03de2dfcab242cd61a9900e914fe682a59170b1/cffi-2.0.0-cp314-cp314-win32.whl", hash = "sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f", size = 175328, upload-time = "2025-09-08T23:23:44.61Z" }, + { url = "https://files.pythonhosted.org/packages/bb/92/882c2d30831744296ce713f0feb4c1cd30f346ef747b530b5318715cc367/cffi-2.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25", size = 185650, upload-time = "2025-09-08T23:23:45.848Z" }, + { url = "https://files.pythonhosted.org/packages/9f/2c/98ece204b9d35a7366b5b2c6539c350313ca13932143e79dc133ba757104/cffi-2.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad", size = 180687, upload-time = "2025-09-08T23:23:47.105Z" }, + { url = "https://files.pythonhosted.org/packages/3e/61/c768e4d548bfa607abcda77423448df8c471f25dbe64fb2ef6d555eae006/cffi-2.0.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9", size = 188773, upload-time = "2025-09-08T23:23:29.347Z" }, + { url = "https://files.pythonhosted.org/packages/2c/ea/5f76bce7cf6fcd0ab1a1058b5af899bfbef198bea4d5686da88471ea0336/cffi-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d", size = 185013, upload-time = "2025-09-08T23:23:30.63Z" }, + { url = "https://files.pythonhosted.org/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593, upload-time = "2025-09-08T23:23:31.91Z" }, + { url = "https://files.pythonhosted.org/packages/e0/0d/eb704606dfe8033e7128df5e90fee946bbcb64a04fcdaa97321309004000/cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", size = 209354, upload-time = "2025-09-08T23:23:33.214Z" }, + { url = "https://files.pythonhosted.org/packages/d8/19/3c435d727b368ca475fb8742ab97c9cb13a0de600ce86f62eab7fa3eea60/cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", size = 208480, upload-time = "2025-09-08T23:23:34.495Z" }, + { url = "https://files.pythonhosted.org/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584, upload-time = "2025-09-08T23:23:36.096Z" }, + { url = "https://files.pythonhosted.org/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443, upload-time = "2025-09-08T23:23:37.328Z" }, + { url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437, upload-time = "2025-09-08T23:23:38.945Z" }, + { url = "https://files.pythonhosted.org/packages/a0/1d/ec1a60bd1a10daa292d3cd6bb0b359a81607154fb8165f3ec95fe003b85c/cffi-2.0.0-cp314-cp314t-win32.whl", hash = "sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e", size = 180487, upload-time = "2025-09-08T23:23:40.423Z" }, + { url = "https://files.pythonhosted.org/packages/bf/41/4c1168c74fac325c0c8156f04b6749c8b6a8f405bbf91413ba088359f60d/cffi-2.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6", size = 191726, upload-time = "2025-09-08T23:23:41.742Z" }, + { url = "https://files.pythonhosted.org/packages/ae/3a/dbeec9d1ee0844c679f6bb5d6ad4e9f198b1224f4e7a32825f47f6192b0c/cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", size = 184195, upload-time = "2025-09-08T23:23:43.004Z" }, +] + +[[package]] +name = "click" +version = "8.3.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/57/75/31212c6bf2503fdf920d87fee5d7a86a2e3bcf444984126f13d8e4016804/click-8.3.2.tar.gz", hash = "sha256:14162b8b3b3550a7d479eafa77dfd3c38d9dc8951f6f69c78913a8f9a7540fd5", size = 302856, upload-time = "2026-04-03T19:14:45.118Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e4/20/71885d8b97d4f3dde17b1fdb92dbd4908b00541c5a3379787137285f602e/click-8.3.2-py3-none-any.whl", hash = "sha256:1924d2c27c5653561cd2cae4548d1406039cb79b858b747cfea24924bbc1616d", size = 108379, upload-time = "2026-04-03T19:14:43.505Z" }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, +] + +[[package]] +name = "colorclass" +version = "2.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/1a/31ff00a33569a3b59d65bbdc445c73e12f92ad28195b7ace299f68b9af70/colorclass-2.2.2.tar.gz", hash = "sha256:6d4fe287766166a98ca7bc6f6312daf04a0481b1eda43e7173484051c0ab4366", size = 16709, upload-time = "2021-12-09T00:41:35.661Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/30/b6/daf3e2976932da4ed3579cff7a30a53d22ea9323ee4f0d8e43be60454897/colorclass-2.2.2-py2.py3-none-any.whl", hash = "sha256:6f10c273a0ef7a1150b1120b6095cbdd68e5cf36dfd5d0fc957a2500bbf99a55", size = 18995, upload-time = "2021-12-09T00:41:34.653Z" }, +] + +[[package]] +name = "compressed-rtf" +version = "1.0.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b7/0c/929a4e8ef9d7143f54d77dadb5f370cc7b98534b1bd6e1124d0abe8efb24/compressed_rtf-1.0.7.tar.gz", hash = "sha256:7c30859334839f3cdc7d10796af5b434bb326b9df7cb5a65e95a8eacb2951b0e", size = 8152, upload-time = "2025-03-24T22:39:32.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/1d/62f5bf92e12335eb63517f42671ed78512d48bbc69e02a942dd7b90f03f0/compressed_rtf-1.0.7-py3-none-any.whl", hash = "sha256:b7904921d78c67a0a4b7fff9fb361a00ae2b447b6edca010ce321cd98fa0fcc0", size = 7968, upload-time = "2025-03-24T23:03:57.433Z" }, +] + +[[package]] +name = "coverage" +version = "7.13.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9d/e0/70553e3000e345daff267cec284ce4cbf3fc141b6da229ac52775b5428f1/coverage-7.13.5.tar.gz", hash = "sha256:c81f6515c4c40141f83f502b07bbfa5c240ba25bbe73da7b33f1e5b6120ff179", size = 915967, upload-time = "2026-03-17T10:33:18.341Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/c3/a396306ba7db865bf96fc1fb3b7fd29bcbf3d829df642e77b13555163cd6/coverage-7.13.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:460cf0114c5016fa841214ff5564aa4864f11948da9440bc97e21ad1f4ba1e01", size = 219554, upload-time = "2026-03-17T10:30:42.208Z" }, + { url = "https://files.pythonhosted.org/packages/a6/16/a68a19e5384e93f811dccc51034b1fd0b865841c390e3c931dcc4699e035/coverage-7.13.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0e223ce4b4ed47f065bfb123687686512e37629be25cc63728557ae7db261422", size = 219908, upload-time = "2026-03-17T10:30:43.906Z" }, + { url = "https://files.pythonhosted.org/packages/29/72/20b917c6793af3a5ceb7fb9c50033f3ec7865f2911a1416b34a7cfa0813b/coverage-7.13.5-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:6e3370441f4513c6252bf042b9c36d22491142385049243253c7e48398a15a9f", size = 251419, upload-time = "2026-03-17T10:30:45.545Z" }, + { url = "https://files.pythonhosted.org/packages/8c/49/cd14b789536ac6a4778c453c6a2338bc0a2fb60c5a5a41b4008328b9acc1/coverage-7.13.5-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:03ccc709a17a1de074fb1d11f217342fb0d2b1582ed544f554fc9fc3f07e95f5", size = 254159, upload-time = "2026-03-17T10:30:47.204Z" }, + { url = "https://files.pythonhosted.org/packages/9d/00/7b0edcfe64e2ed4c0340dac14a52ad0f4c9bd0b8b5e531af7d55b703db7c/coverage-7.13.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3f4818d065964db3c1c66dc0fbdac5ac692ecbc875555e13374fdbe7eedb4376", size = 255270, upload-time = "2026-03-17T10:30:48.812Z" }, + { url = "https://files.pythonhosted.org/packages/93/89/7ffc4ba0f5d0a55c1e84ea7cee39c9fc06af7b170513d83fbf3bbefce280/coverage-7.13.5-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:012d5319e66e9d5a218834642d6c35d265515a62f01157a45bcc036ecf947256", size = 257538, upload-time = "2026-03-17T10:30:50.77Z" }, + { url = "https://files.pythonhosted.org/packages/81/bd/73ddf85f93f7e6fa83e77ccecb6162d9415c79007b4bc124008a4995e4a7/coverage-7.13.5-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8dd02af98971bdb956363e4827d34425cb3df19ee550ef92855b0acb9c7ce51c", size = 251821, upload-time = "2026-03-17T10:30:52.5Z" }, + { url = "https://files.pythonhosted.org/packages/a0/81/278aff4e8dec4926a0bcb9486320752811f543a3ce5b602cc7a29978d073/coverage-7.13.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f08fd75c50a760c7eb068ae823777268daaf16a80b918fa58eea888f8e3919f5", size = 253191, upload-time = "2026-03-17T10:30:54.543Z" }, + { url = "https://files.pythonhosted.org/packages/70/ee/fe1621488e2e0a58d7e94c4800f0d96f79671553488d401a612bebae324b/coverage-7.13.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:843ea8643cf967d1ac7e8ecd4bb00c99135adf4816c0c0593fdcc47b597fcf09", size = 251337, upload-time = "2026-03-17T10:30:56.663Z" }, + { url = "https://files.pythonhosted.org/packages/37/a6/f79fb37aa104b562207cc23cb5711ab6793608e246cae1e93f26b2236ed9/coverage-7.13.5-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:9d44d7aa963820b1b971dbecd90bfe5fe8f81cff79787eb6cca15750bd2f79b9", size = 255404, upload-time = "2026-03-17T10:30:58.427Z" }, + { url = "https://files.pythonhosted.org/packages/75/f0/ed15262a58ec81ce457ceb717b7f78752a1713556b19081b76e90896e8d4/coverage-7.13.5-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:7132bed4bd7b836200c591410ae7d97bf7ae8be6fc87d160b2bd881df929e7bf", size = 250903, upload-time = "2026-03-17T10:31:00.093Z" }, + { url = "https://files.pythonhosted.org/packages/0f/e9/9129958f20e7e9d4d56d51d42ccf708d15cac355ff4ac6e736e97a9393d2/coverage-7.13.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a698e363641b98843c517817db75373c83254781426e94ada3197cabbc2c919c", size = 252780, upload-time = "2026-03-17T10:31:01.916Z" }, + { url = "https://files.pythonhosted.org/packages/a4/d7/0ad9b15812d81272db94379fe4c6df8fd17781cc7671fdfa30c76ba5ff7b/coverage-7.13.5-cp312-cp312-win32.whl", hash = "sha256:bdba0a6b8812e8c7df002d908a9a2ea3c36e92611b5708633c50869e6d922fdf", size = 222093, upload-time = "2026-03-17T10:31:03.642Z" }, + { url = "https://files.pythonhosted.org/packages/29/3d/821a9a5799fac2556bcf0bd37a70d1d11fa9e49784b6d22e92e8b2f85f18/coverage-7.13.5-cp312-cp312-win_amd64.whl", hash = "sha256:d2c87e0c473a10bffe991502eac389220533024c8082ec1ce849f4218dded810", size = 222900, upload-time = "2026-03-17T10:31:05.651Z" }, + { url = "https://files.pythonhosted.org/packages/d4/fa/2238c2ad08e35cf4f020ea721f717e09ec3152aea75d191a7faf3ef009a8/coverage-7.13.5-cp312-cp312-win_arm64.whl", hash = "sha256:bf69236a9a81bdca3bff53796237aab096cdbf8d78a66ad61e992d9dac7eb2de", size = 221515, upload-time = "2026-03-17T10:31:07.293Z" }, + { url = "https://files.pythonhosted.org/packages/74/8c/74fedc9663dcf168b0a059d4ea756ecae4da77a489048f94b5f512a8d0b3/coverage-7.13.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5ec4af212df513e399cf11610cc27063f1586419e814755ab362e50a85ea69c1", size = 219576, upload-time = "2026-03-17T10:31:09.045Z" }, + { url = "https://files.pythonhosted.org/packages/0c/c9/44fb661c55062f0818a6ffd2685c67aa30816200d5f2817543717d4b92eb/coverage-7.13.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:941617e518602e2d64942c88ec8499f7fbd49d3f6c4327d3a71d43a1973032f3", size = 219942, upload-time = "2026-03-17T10:31:10.708Z" }, + { url = "https://files.pythonhosted.org/packages/5f/13/93419671cee82b780bab7ea96b67c8ef448f5f295f36bf5031154ec9a790/coverage-7.13.5-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:da305e9937617ee95c2e39d8ff9f040e0487cbf1ac174f777ed5eddd7a7c1f26", size = 250935, upload-time = "2026-03-17T10:31:12.392Z" }, + { url = "https://files.pythonhosted.org/packages/ac/68/1666e3a4462f8202d836920114fa7a5ee9275d1fa45366d336c551a162dd/coverage-7.13.5-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:78e696e1cc714e57e8b25760b33a8b1026b7048d270140d25dafe1b0a1ee05a3", size = 253541, upload-time = "2026-03-17T10:31:14.247Z" }, + { url = "https://files.pythonhosted.org/packages/4e/5e/3ee3b835647be646dcf3c65a7c6c18f87c27326a858f72ab22c12730773d/coverage-7.13.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02ca0eed225b2ff301c474aeeeae27d26e2537942aa0f87491d3e147e784a82b", size = 254780, upload-time = "2026-03-17T10:31:16.193Z" }, + { url = "https://files.pythonhosted.org/packages/44/b3/cb5bd1a04cfcc49ede6cd8409d80bee17661167686741e041abc7ee1b9a9/coverage-7.13.5-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:04690832cbea4e4663d9149e05dba142546ca05cb1848816760e7f58285c970a", size = 256912, upload-time = "2026-03-17T10:31:17.89Z" }, + { url = "https://files.pythonhosted.org/packages/1b/66/c1dceb7b9714473800b075f5c8a84f4588f887a90eb8645282031676e242/coverage-7.13.5-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0590e44dd2745c696a778f7bab6aa95256de2cbc8b8cff4f7db8ff09813d6969", size = 251165, upload-time = "2026-03-17T10:31:19.605Z" }, + { url = "https://files.pythonhosted.org/packages/b7/62/5502b73b97aa2e53ea22a39cf8649ff44827bef76d90bf638777daa27a9d/coverage-7.13.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d7cfad2d6d81dd298ab6b89fe72c3b7b05ec7544bdda3b707ddaecff8d25c161", size = 252908, upload-time = "2026-03-17T10:31:21.312Z" }, + { url = "https://files.pythonhosted.org/packages/7d/37/7792c2d69854397ca77a55c4646e5897c467928b0e27f2d235d83b5d08c6/coverage-7.13.5-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:e092b9499de38ae0fbfbc603a74660eb6ff3e869e507b50d85a13b6db9863e15", size = 250873, upload-time = "2026-03-17T10:31:23.565Z" }, + { url = "https://files.pythonhosted.org/packages/a3/23/bc866fb6163be52a8a9e5d708ba0d3b1283c12158cefca0a8bbb6e247a43/coverage-7.13.5-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:48c39bc4a04d983a54a705a6389512883d4a3b9862991b3617d547940e9f52b1", size = 255030, upload-time = "2026-03-17T10:31:25.58Z" }, + { url = "https://files.pythonhosted.org/packages/7d/8b/ef67e1c222ef49860701d346b8bbb70881bef283bd5f6cbba68a39a086c7/coverage-7.13.5-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2d3807015f138ffea1ed9afeeb8624fd781703f2858b62a8dd8da5a0994c57b6", size = 250694, upload-time = "2026-03-17T10:31:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/46/0d/866d1f74f0acddbb906db212e096dee77a8e2158ca5e6bb44729f9d93298/coverage-7.13.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ee2aa19e03161671ec964004fb74b2257805d9710bf14a5c704558b9d8dbaf17", size = 252469, upload-time = "2026-03-17T10:31:29.472Z" }, + { url = "https://files.pythonhosted.org/packages/7a/f5/be742fec31118f02ce42b21c6af187ad6a344fed546b56ca60caacc6a9a0/coverage-7.13.5-cp313-cp313-win32.whl", hash = "sha256:ce1998c0483007608c8382f4ff50164bfc5bd07a2246dd272aa4043b75e61e85", size = 222112, upload-time = "2026-03-17T10:31:31.526Z" }, + { url = "https://files.pythonhosted.org/packages/66/40/7732d648ab9d069a46e686043241f01206348e2bbf128daea85be4d6414b/coverage-7.13.5-cp313-cp313-win_amd64.whl", hash = "sha256:631efb83f01569670a5e866ceb80fe483e7c159fac6f167e6571522636104a0b", size = 222923, upload-time = "2026-03-17T10:31:33.633Z" }, + { url = "https://files.pythonhosted.org/packages/48/af/fea819c12a095781f6ccd504890aaddaf88b8fab263c4940e82c7b770124/coverage-7.13.5-cp313-cp313-win_arm64.whl", hash = "sha256:f4cd16206ad171cbc2470dbea9103cf9a7607d5fe8c242fdf1edf36174020664", size = 221540, upload-time = "2026-03-17T10:31:35.445Z" }, + { url = "https://files.pythonhosted.org/packages/23/d2/17879af479df7fbbd44bd528a31692a48f6b25055d16482fdf5cdb633805/coverage-7.13.5-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0428cbef5783ad91fe240f673cc1f76b25e74bbfe1a13115e4aa30d3f538162d", size = 220262, upload-time = "2026-03-17T10:31:37.184Z" }, + { url = "https://files.pythonhosted.org/packages/5b/4c/d20e554f988c8f91d6a02c5118f9abbbf73a8768a3048cb4962230d5743f/coverage-7.13.5-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e0b216a19534b2427cc201a26c25da4a48633f29a487c61258643e89d28200c0", size = 220617, upload-time = "2026-03-17T10:31:39.245Z" }, + { url = "https://files.pythonhosted.org/packages/29/9c/f9f5277b95184f764b24e7231e166dfdb5780a46d408a2ac665969416d61/coverage-7.13.5-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:972a9cd27894afe4bc2b1480107054e062df08e671df7c2f18c205e805ccd806", size = 261912, upload-time = "2026-03-17T10:31:41.324Z" }, + { url = "https://files.pythonhosted.org/packages/d5/f6/7f1ab39393eeb50cfe4747ae8ef0e4fc564b989225aa1152e13a180d74f8/coverage-7.13.5-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:4b59148601efcd2bac8c4dbf1f0ad6391693ccf7a74b8205781751637076aee3", size = 263987, upload-time = "2026-03-17T10:31:43.724Z" }, + { url = "https://files.pythonhosted.org/packages/a0/d7/62c084fb489ed9c6fbdf57e006752e7c516ea46fd690e5ed8b8617c7d52e/coverage-7.13.5-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:505d7083c8b0c87a8fa8c07370c285847c1f77739b22e299ad75a6af6c32c5c9", size = 266416, upload-time = "2026-03-17T10:31:45.769Z" }, + { url = "https://files.pythonhosted.org/packages/a9/f6/df63d8660e1a0bff6125947afda112a0502736f470d62ca68b288ea762d8/coverage-7.13.5-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:60365289c3741e4db327e7baff2a4aaacf22f788e80fa4683393891b70a89fbd", size = 267558, upload-time = "2026-03-17T10:31:48.293Z" }, + { url = "https://files.pythonhosted.org/packages/5b/02/353ca81d36779bd108f6d384425f7139ac3c58c750dcfaafe5d0bee6436b/coverage-7.13.5-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1b88c69c8ef5d4b6fe7dea66d6636056a0f6a7527c440e890cf9259011f5e606", size = 261163, upload-time = "2026-03-17T10:31:50.125Z" }, + { url = "https://files.pythonhosted.org/packages/2c/16/2e79106d5749bcaf3aee6d309123548e3276517cd7851faa8da213bc61bf/coverage-7.13.5-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5b13955d31d1633cf9376908089b7cebe7d15ddad7aeaabcbe969a595a97e95e", size = 263981, upload-time = "2026-03-17T10:31:51.961Z" }, + { url = "https://files.pythonhosted.org/packages/29/c7/c29e0c59ffa6942030ae6f50b88ae49988e7e8da06de7ecdbf49c6d4feae/coverage-7.13.5-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:f70c9ab2595c56f81a89620e22899eea8b212a4041bd728ac6f4a28bf5d3ddd0", size = 261604, upload-time = "2026-03-17T10:31:53.872Z" }, + { url = "https://files.pythonhosted.org/packages/40/48/097cdc3db342f34006a308ab41c3a7c11c3f0d84750d340f45d88a782e00/coverage-7.13.5-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:084b84a8c63e8d6fc7e3931b316a9bcafca1458d753c539db82d31ed20091a87", size = 265321, upload-time = "2026-03-17T10:31:55.997Z" }, + { url = "https://files.pythonhosted.org/packages/bb/1f/4994af354689e14fd03a75f8ec85a9a68d94e0188bbdab3fc1516b55e512/coverage-7.13.5-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:ad14385487393e386e2ea988b09d62dd42c397662ac2dabc3832d71253eee479", size = 260502, upload-time = "2026-03-17T10:31:58.308Z" }, + { url = "https://files.pythonhosted.org/packages/22/c6/9bb9ef55903e628033560885f5c31aa227e46878118b63ab15dc7ba87797/coverage-7.13.5-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7f2c47b36fe7709a6e83bfadf4eefb90bd25fbe4014d715224c4316f808e59a2", size = 262688, upload-time = "2026-03-17T10:32:00.141Z" }, + { url = "https://files.pythonhosted.org/packages/14/4f/f5df9007e50b15e53e01edea486814783a7f019893733d9e4d6caad75557/coverage-7.13.5-cp313-cp313t-win32.whl", hash = "sha256:67e9bc5449801fad0e5dff329499fb090ba4c5800b86805c80617b4e29809b2a", size = 222788, upload-time = "2026-03-17T10:32:02.246Z" }, + { url = "https://files.pythonhosted.org/packages/e1/98/aa7fccaa97d0f3192bec013c4e6fd6d294a6ed44b640e6bb61f479e00ed5/coverage-7.13.5-cp313-cp313t-win_amd64.whl", hash = "sha256:da86cdcf10d2519e10cabb8ac2de03da1bcb6e4853790b7fbd48523332e3a819", size = 223851, upload-time = "2026-03-17T10:32:04.416Z" }, + { url = "https://files.pythonhosted.org/packages/3d/8b/e5c469f7352651e5f013198e9e21f97510b23de957dd06a84071683b4b60/coverage-7.13.5-cp313-cp313t-win_arm64.whl", hash = "sha256:0ecf12ecb326fe2c339d93fc131816f3a7367d223db37817208905c89bded911", size = 222104, upload-time = "2026-03-17T10:32:06.65Z" }, + { url = "https://files.pythonhosted.org/packages/8e/77/39703f0d1d4b478bfd30191d3c14f53caf596fac00efb3f8f6ee23646439/coverage-7.13.5-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:fbabfaceaeb587e16f7008f7795cd80d20ec548dc7f94fbb0d4ec2e038ce563f", size = 219621, upload-time = "2026-03-17T10:32:08.589Z" }, + { url = "https://files.pythonhosted.org/packages/e2/3e/51dff36d99ae14639a133d9b164d63e628532e2974d8b1edb99dd1ebc733/coverage-7.13.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9bb2a28101a443669a423b665939381084412b81c3f8c0fcfbac57f4e30b5b8e", size = 219953, upload-time = "2026-03-17T10:32:10.507Z" }, + { url = "https://files.pythonhosted.org/packages/6a/6c/1f1917b01eb647c2f2adc9962bd66c79eb978951cab61bdc1acab3290c07/coverage-7.13.5-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bd3a2fbc1c6cccb3c5106140d87cc6a8715110373ef42b63cf5aea29df8c217a", size = 250992, upload-time = "2026-03-17T10:32:12.41Z" }, + { url = "https://files.pythonhosted.org/packages/22/e5/06b1f88f42a5a99df42ce61208bdec3bddb3d261412874280a19796fc09c/coverage-7.13.5-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6c36ddb64ed9d7e496028d1d00dfec3e428e0aabf4006583bb1839958d280510", size = 253503, upload-time = "2026-03-17T10:32:14.449Z" }, + { url = "https://files.pythonhosted.org/packages/80/28/2a148a51e5907e504fa7b85490277734e6771d8844ebcc48764a15e28155/coverage-7.13.5-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:380e8e9084d8eb38db3a9176a1a4f3c0082c3806fa0dc882d1d87abc3c789247", size = 254852, upload-time = "2026-03-17T10:32:16.56Z" }, + { url = "https://files.pythonhosted.org/packages/61/77/50e8d3d85cc0b7ebe09f30f151d670e302c7ff4a1bf6243f71dd8b0981fa/coverage-7.13.5-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e808af52a0513762df4d945ea164a24b37f2f518cbe97e03deaa0ee66139b4d6", size = 257161, upload-time = "2026-03-17T10:32:19.004Z" }, + { url = "https://files.pythonhosted.org/packages/3b/c4/b5fd1d4b7bf8d0e75d997afd3925c59ba629fc8616f1b3aae7605132e256/coverage-7.13.5-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e301d30dd7e95ae068671d746ba8c34e945a82682e62918e41b2679acd2051a0", size = 251021, upload-time = "2026-03-17T10:32:21.344Z" }, + { url = "https://files.pythonhosted.org/packages/f8/66/6ea21f910e92d69ef0b1c3346ea5922a51bad4446c9126db2ae96ee24c4c/coverage-7.13.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:800bc829053c80d240a687ceeb927a94fd108bbdc68dfbe505d0d75ab578a882", size = 252858, upload-time = "2026-03-17T10:32:23.506Z" }, + { url = "https://files.pythonhosted.org/packages/9e/ea/879c83cb5d61aa2a35fb80e72715e92672daef8191b84911a643f533840c/coverage-7.13.5-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:0b67af5492adb31940ee418a5a655c28e48165da5afab8c7fa6fd72a142f8740", size = 250823, upload-time = "2026-03-17T10:32:25.516Z" }, + { url = "https://files.pythonhosted.org/packages/8a/fb/616d95d3adb88b9803b275580bdeee8bd1b69a886d057652521f83d7322f/coverage-7.13.5-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c9136ff29c3a91e25b1d1552b5308e53a1e0653a23e53b6366d7c2dcbbaf8a16", size = 255099, upload-time = "2026-03-17T10:32:27.944Z" }, + { url = "https://files.pythonhosted.org/packages/1c/93/25e6917c90ec1c9a56b0b26f6cad6408e5f13bb6b35d484a0d75c9cf000d/coverage-7.13.5-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:cff784eef7f0b8f6cb28804fbddcfa99f89efe4cc35fb5627e3ac58f91ed3ac0", size = 250638, upload-time = "2026-03-17T10:32:29.914Z" }, + { url = "https://files.pythonhosted.org/packages/fc/7b/dc1776b0464145a929deed214aef9fb1493f159b59ff3c7eeeedf91eddd0/coverage-7.13.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:68a4953be99b17ac3c23b6efbc8a38330d99680c9458927491d18700ef23ded0", size = 252295, upload-time = "2026-03-17T10:32:31.981Z" }, + { url = "https://files.pythonhosted.org/packages/ea/fb/99cbbc56a26e07762a2740713f3c8f9f3f3106e3a3dd8cc4474954bccd34/coverage-7.13.5-cp314-cp314-win32.whl", hash = "sha256:35a31f2b1578185fbe6aa2e74cea1b1d0bbf4c552774247d9160d29b80ed56cc", size = 222360, upload-time = "2026-03-17T10:32:34.233Z" }, + { url = "https://files.pythonhosted.org/packages/8d/b7/4758d4f73fb536347cc5e4ad63662f9d60ba9118cb6785e9616b2ce5d7fa/coverage-7.13.5-cp314-cp314-win_amd64.whl", hash = "sha256:2aa055ae1857258f9e0045be26a6d62bdb47a72448b62d7b55f4820f361a2633", size = 223174, upload-time = "2026-03-17T10:32:36.369Z" }, + { url = "https://files.pythonhosted.org/packages/2c/f2/24d84e1dfe70f8ac9fdf30d338239860d0d1d5da0bda528959d0ebc9da28/coverage-7.13.5-cp314-cp314-win_arm64.whl", hash = "sha256:1b11eef33edeae9d142f9b4358edb76273b3bfd30bc3df9a4f95d0e49caf94e8", size = 221739, upload-time = "2026-03-17T10:32:38.736Z" }, + { url = "https://files.pythonhosted.org/packages/60/5b/4a168591057b3668c2428bff25dd3ebc21b629d666d90bcdfa0217940e84/coverage-7.13.5-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:10a0c37f0b646eaff7cce1874c31d1f1ccb297688d4c747291f4f4c70741cc8b", size = 220351, upload-time = "2026-03-17T10:32:41.196Z" }, + { url = "https://files.pythonhosted.org/packages/f5/21/1fd5c4dbfe4a58b6b99649125635df46decdfd4a784c3cd6d410d303e370/coverage-7.13.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b5db73ba3c41c7008037fa731ad5459fc3944cb7452fc0aa9f822ad3533c583c", size = 220612, upload-time = "2026-03-17T10:32:43.204Z" }, + { url = "https://files.pythonhosted.org/packages/d6/fe/2a924b3055a5e7e4512655a9d4609781b0d62334fa0140c3e742926834e2/coverage-7.13.5-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:750db93a81e3e5a9831b534be7b1229df848b2e125a604fe6651e48aa070e5f9", size = 261985, upload-time = "2026-03-17T10:32:45.514Z" }, + { url = "https://files.pythonhosted.org/packages/d7/0d/c8928f2bd518c45990fe1a2ab8db42e914ef9b726c975facc4282578c3eb/coverage-7.13.5-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9ddb4f4a5479f2539644be484da179b653273bca1a323947d48ab107b3ed1f29", size = 264107, upload-time = "2026-03-17T10:32:47.971Z" }, + { url = "https://files.pythonhosted.org/packages/ef/ae/4ae35bbd9a0af9d820362751f0766582833c211224b38665c0f8de3d487f/coverage-7.13.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8a7a2049c14f413163e2bdabd37e41179b1d1ccb10ffc6ccc4b7a718429c607", size = 266513, upload-time = "2026-03-17T10:32:50.1Z" }, + { url = "https://files.pythonhosted.org/packages/9c/20/d326174c55af36f74eac6ae781612d9492f060ce8244b570bb9d50d9d609/coverage-7.13.5-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e1c85e0b6c05c592ea6d8768a66a254bfb3874b53774b12d4c89c481eb78cb90", size = 267650, upload-time = "2026-03-17T10:32:52.391Z" }, + { url = "https://files.pythonhosted.org/packages/7a/5e/31484d62cbd0eabd3412e30d74386ece4a0837d4f6c3040a653878bfc019/coverage-7.13.5-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:777c4d1eff1b67876139d24288aaf1817f6c03d6bae9c5cc8d27b83bcfe38fe3", size = 261089, upload-time = "2026-03-17T10:32:54.544Z" }, + { url = "https://files.pythonhosted.org/packages/e9/d8/49a72d6de146eebb0b7e48cc0f4bc2c0dd858e3d4790ab2b39a2872b62bd/coverage-7.13.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6697e29b93707167687543480a40f0db8f356e86d9f67ddf2e37e2dfd91a9dab", size = 263982, upload-time = "2026-03-17T10:32:56.803Z" }, + { url = "https://files.pythonhosted.org/packages/06/3b/0351f1bd566e6e4dd39e978efe7958bde1d32f879e85589de147654f57bb/coverage-7.13.5-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:8fdf453a942c3e4d99bd80088141c4c6960bb232c409d9c3558e2dbaa3998562", size = 261579, upload-time = "2026-03-17T10:32:59.466Z" }, + { url = "https://files.pythonhosted.org/packages/5d/ce/796a2a2f4017f554d7810f5c573449b35b1e46788424a548d4d19201b222/coverage-7.13.5-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:32ca0c0114c9834a43f045a87dcebd69d108d8ffb666957ea65aa132f50332e2", size = 265316, upload-time = "2026-03-17T10:33:01.847Z" }, + { url = "https://files.pythonhosted.org/packages/3d/16/d5ae91455541d1a78bc90abf495be600588aff8f6db5c8b0dae739fa39c9/coverage-7.13.5-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:8769751c10f339021e2638cd354e13adeac54004d1941119b2c96fe5276d45ea", size = 260427, upload-time = "2026-03-17T10:33:03.945Z" }, + { url = "https://files.pythonhosted.org/packages/48/11/07f413dba62db21fb3fad5d0de013a50e073cc4e2dc4306e770360f6dfc8/coverage-7.13.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cec2d83125531bd153175354055cdb7a09987af08a9430bd173c937c6d0fba2a", size = 262745, upload-time = "2026-03-17T10:33:06.285Z" }, + { url = "https://files.pythonhosted.org/packages/91/15/d792371332eb4663115becf4bad47e047d16234b1aff687b1b18c58d60ae/coverage-7.13.5-cp314-cp314t-win32.whl", hash = "sha256:0cd9ed7a8b181775459296e402ca4fb27db1279740a24e93b3b41942ebe4b215", size = 223146, upload-time = "2026-03-17T10:33:08.756Z" }, + { url = "https://files.pythonhosted.org/packages/db/51/37221f59a111dca5e85be7dbf09696323b5b9f13ff65e0641d535ed06ea8/coverage-7.13.5-cp314-cp314t-win_amd64.whl", hash = "sha256:301e3b7dfefecaca37c9f1aa6f0049b7d4ab8dd933742b607765d757aca77d43", size = 224254, upload-time = "2026-03-17T10:33:11.174Z" }, + { url = "https://files.pythonhosted.org/packages/54/83/6acacc889de8987441aa7d5adfbdbf33d288dad28704a67e574f1df9bcbb/coverage-7.13.5-cp314-cp314t-win_arm64.whl", hash = "sha256:9dacc2ad679b292709e0f5fc1ac74a6d4d5562e424058962c7bb0c658ad25e45", size = 222276, upload-time = "2026-03-17T10:33:13.466Z" }, + { url = "https://files.pythonhosted.org/packages/9e/ee/a4cf96b8ce1e566ed238f0659ac2d3f007ed1d14b181bcb684e19561a69a/coverage-7.13.5-py3-none-any.whl", hash = "sha256:34b02417cf070e173989b3db962f7ed56d2f644307b2cf9d5a0f258e13084a61", size = 211346, upload-time = "2026-03-17T10:33:15.691Z" }, +] + +[[package]] +name = "cryptography" +version = "46.0.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/47/93/ac8f3d5ff04d54bc814e961a43ae5b0b146154c89c61b47bb07557679b18/cryptography-46.0.7.tar.gz", hash = "sha256:e4cfd68c5f3e0bfdad0d38e023239b96a2fe84146481852dffbcca442c245aa5", size = 750652, upload-time = "2026-04-08T01:57:54.692Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/5d/4a8f770695d73be252331e60e526291e3df0c9b27556a90a6b47bccca4c2/cryptography-46.0.7-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:ea42cbe97209df307fdc3b155f1b6fa2577c0defa8f1f7d3be7d31d189108ad4", size = 7179869, upload-time = "2026-04-08T01:56:17.157Z" }, + { url = "https://files.pythonhosted.org/packages/5f/45/6d80dc379b0bbc1f9d1e429f42e4cb9e1d319c7a8201beffd967c516ea01/cryptography-46.0.7-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b36a4695e29fe69215d75960b22577197aca3f7a25b9cf9d165dcfe9d80bc325", size = 4275492, upload-time = "2026-04-08T01:56:19.36Z" }, + { url = "https://files.pythonhosted.org/packages/4a/9a/1765afe9f572e239c3469f2cb429f3ba7b31878c893b246b4b2994ffe2fe/cryptography-46.0.7-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5ad9ef796328c5e3c4ceed237a183f5d41d21150f972455a9d926593a1dcb308", size = 4426670, upload-time = "2026-04-08T01:56:21.415Z" }, + { url = "https://files.pythonhosted.org/packages/8f/3e/af9246aaf23cd4ee060699adab1e47ced3f5f7e7a8ffdd339f817b446462/cryptography-46.0.7-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:73510b83623e080a2c35c62c15298096e2a5dc8d51c3b4e1740211839d0dea77", size = 4280275, upload-time = "2026-04-08T01:56:23.539Z" }, + { url = "https://files.pythonhosted.org/packages/0f/54/6bbbfc5efe86f9d71041827b793c24811a017c6ac0fd12883e4caa86b8ed/cryptography-46.0.7-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:cbd5fb06b62bd0721e1170273d3f4d5a277044c47ca27ee257025146c34cbdd1", size = 4928402, upload-time = "2026-04-08T01:56:25.624Z" }, + { url = "https://files.pythonhosted.org/packages/2d/cf/054b9d8220f81509939599c8bdbc0c408dbd2bdd41688616a20731371fe0/cryptography-46.0.7-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:420b1e4109cc95f0e5700eed79908cef9268265c773d3a66f7af1eef53d409ef", size = 4459985, upload-time = "2026-04-08T01:56:27.309Z" }, + { url = "https://files.pythonhosted.org/packages/f9/46/4e4e9c6040fb01c7467d47217d2f882daddeb8828f7df800cb806d8a2288/cryptography-46.0.7-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:24402210aa54baae71d99441d15bb5a1919c195398a87b563df84468160a65de", size = 3990652, upload-time = "2026-04-08T01:56:29.095Z" }, + { url = "https://files.pythonhosted.org/packages/36/5f/313586c3be5a2fbe87e4c9a254207b860155a8e1f3cca99f9910008e7d08/cryptography-46.0.7-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:8a469028a86f12eb7d2fe97162d0634026d92a21f3ae0ac87ed1c4a447886c83", size = 4279805, upload-time = "2026-04-08T01:56:30.928Z" }, + { url = "https://files.pythonhosted.org/packages/69/33/60dfc4595f334a2082749673386a4d05e4f0cf4df8248e63b2c3437585f2/cryptography-46.0.7-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:9694078c5d44c157ef3162e3bf3946510b857df5a3955458381d1c7cfc143ddb", size = 4892883, upload-time = "2026-04-08T01:56:32.614Z" }, + { url = "https://files.pythonhosted.org/packages/c7/0b/333ddab4270c4f5b972f980adef4faa66951a4aaf646ca067af597f15563/cryptography-46.0.7-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:42a1e5f98abb6391717978baf9f90dc28a743b7d9be7f0751a6f56a75d14065b", size = 4459756, upload-time = "2026-04-08T01:56:34.306Z" }, + { url = "https://files.pythonhosted.org/packages/d2/14/633913398b43b75f1234834170947957c6b623d1701ffc7a9600da907e89/cryptography-46.0.7-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:91bbcb08347344f810cbe49065914fe048949648f6bd5c2519f34619142bbe85", size = 4410244, upload-time = "2026-04-08T01:56:35.977Z" }, + { url = "https://files.pythonhosted.org/packages/10/f2/19ceb3b3dc14009373432af0c13f46aa08e3ce334ec6eff13492e1812ccd/cryptography-46.0.7-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5d1c02a14ceb9148cc7816249f64f623fbfee39e8c03b3650d842ad3f34d637e", size = 4674868, upload-time = "2026-04-08T01:56:38.034Z" }, + { url = "https://files.pythonhosted.org/packages/1a/bb/a5c213c19ee94b15dfccc48f363738633a493812687f5567addbcbba9f6f/cryptography-46.0.7-cp311-abi3-win32.whl", hash = "sha256:d23c8ca48e44ee015cd0a54aeccdf9f09004eba9fc96f38c911011d9ff1bd457", size = 3026504, upload-time = "2026-04-08T01:56:39.666Z" }, + { url = "https://files.pythonhosted.org/packages/2b/02/7788f9fefa1d060ca68717c3901ae7fffa21ee087a90b7f23c7a603c32ae/cryptography-46.0.7-cp311-abi3-win_amd64.whl", hash = "sha256:397655da831414d165029da9bc483bed2fe0e75dde6a1523ec2fe63f3c46046b", size = 3488363, upload-time = "2026-04-08T01:56:41.893Z" }, + { url = "https://files.pythonhosted.org/packages/7b/56/15619b210e689c5403bb0540e4cb7dbf11a6bf42e483b7644e471a2812b3/cryptography-46.0.7-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:d151173275e1728cf7839aaa80c34fe550c04ddb27b34f48c232193df8db5842", size = 7119671, upload-time = "2026-04-08T01:56:44Z" }, + { url = "https://files.pythonhosted.org/packages/74/66/e3ce040721b0b5599e175ba91ab08884c75928fbeb74597dd10ef13505d2/cryptography-46.0.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:db0f493b9181c7820c8134437eb8b0b4792085d37dbb24da050476ccb664e59c", size = 4268551, upload-time = "2026-04-08T01:56:46.071Z" }, + { url = "https://files.pythonhosted.org/packages/03/11/5e395f961d6868269835dee1bafec6a1ac176505a167f68b7d8818431068/cryptography-46.0.7-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ebd6daf519b9f189f85c479427bbd6e9c9037862cf8fe89ee35503bd209ed902", size = 4408887, upload-time = "2026-04-08T01:56:47.718Z" }, + { url = "https://files.pythonhosted.org/packages/40/53/8ed1cf4c3b9c8e611e7122fb56f1c32d09e1fff0f1d77e78d9ff7c82653e/cryptography-46.0.7-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:b7b412817be92117ec5ed95f880defe9cf18a832e8cafacf0a22337dc1981b4d", size = 4271354, upload-time = "2026-04-08T01:56:49.312Z" }, + { url = "https://files.pythonhosted.org/packages/50/46/cf71e26025c2e767c5609162c866a78e8a2915bbcfa408b7ca495c6140c4/cryptography-46.0.7-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:fbfd0e5f273877695cb93baf14b185f4878128b250cc9f8e617ea0c025dfb022", size = 4905845, upload-time = "2026-04-08T01:56:50.916Z" }, + { url = "https://files.pythonhosted.org/packages/c0/ea/01276740375bac6249d0a971ebdf6b4dc9ead0ee0a34ef3b5a88c1a9b0d4/cryptography-46.0.7-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:ffca7aa1d00cf7d6469b988c581598f2259e46215e0140af408966a24cf086ce", size = 4444641, upload-time = "2026-04-08T01:56:52.882Z" }, + { url = "https://files.pythonhosted.org/packages/3d/4c/7d258f169ae71230f25d9f3d06caabcff8c3baf0978e2b7d65e0acac3827/cryptography-46.0.7-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:60627cf07e0d9274338521205899337c5d18249db56865f943cbe753aa96f40f", size = 3967749, upload-time = "2026-04-08T01:56:54.597Z" }, + { url = "https://files.pythonhosted.org/packages/b5/2a/2ea0767cad19e71b3530e4cad9605d0b5e338b6a1e72c37c9c1ceb86c333/cryptography-46.0.7-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:80406c3065e2c55d7f49a9550fe0c49b3f12e5bfff5dedb727e319e1afb9bf99", size = 4270942, upload-time = "2026-04-08T01:56:56.416Z" }, + { url = "https://files.pythonhosted.org/packages/41/3d/fe14df95a83319af25717677e956567a105bb6ab25641acaa093db79975d/cryptography-46.0.7-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:c5b1ccd1239f48b7151a65bc6dd54bcfcc15e028c8ac126d3fada09db0e07ef1", size = 4871079, upload-time = "2026-04-08T01:56:58.31Z" }, + { url = "https://files.pythonhosted.org/packages/9c/59/4a479e0f36f8f378d397f4eab4c850b4ffb79a2f0d58704b8fa0703ddc11/cryptography-46.0.7-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:d5f7520159cd9c2154eb61eb67548ca05c5774d39e9c2c4339fd793fe7d097b2", size = 4443999, upload-time = "2026-04-08T01:57:00.508Z" }, + { url = "https://files.pythonhosted.org/packages/28/17/b59a741645822ec6d04732b43c5d35e4ef58be7bfa84a81e5ae6f05a1d33/cryptography-46.0.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:fcd8eac50d9138c1d7fc53a653ba60a2bee81a505f9f8850b6b2888555a45d0e", size = 4399191, upload-time = "2026-04-08T01:57:02.654Z" }, + { url = "https://files.pythonhosted.org/packages/59/6a/bb2e166d6d0e0955f1e9ff70f10ec4b2824c9cfcdb4da772c7dd69cc7d80/cryptography-46.0.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:65814c60f8cc400c63131584e3e1fad01235edba2614b61fbfbfa954082db0ee", size = 4655782, upload-time = "2026-04-08T01:57:04.592Z" }, + { url = "https://files.pythonhosted.org/packages/95/b6/3da51d48415bcb63b00dc17c2eff3a651b7c4fed484308d0f19b30e8cb2c/cryptography-46.0.7-cp314-cp314t-win32.whl", hash = "sha256:fdd1736fed309b4300346f88f74cd120c27c56852c3838cab416e7a166f67298", size = 3002227, upload-time = "2026-04-08T01:57:06.91Z" }, + { url = "https://files.pythonhosted.org/packages/32/a8/9f0e4ed57ec9cebe506e58db11ae472972ecb0c659e4d52bbaee80ca340a/cryptography-46.0.7-cp314-cp314t-win_amd64.whl", hash = "sha256:e06acf3c99be55aa3b516397fe42f5855597f430add9c17fa46bf2e0fb34c9bb", size = 3475332, upload-time = "2026-04-08T01:57:08.807Z" }, + { url = "https://files.pythonhosted.org/packages/a7/7f/cd42fc3614386bc0c12f0cb3c4ae1fc2bbca5c9662dfed031514911d513d/cryptography-46.0.7-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:462ad5cb1c148a22b2e3bcc5ad52504dff325d17daf5df8d88c17dda1f75f2a4", size = 7165618, upload-time = "2026-04-08T01:57:10.645Z" }, + { url = "https://files.pythonhosted.org/packages/a5/d0/36a49f0262d2319139d2829f773f1b97ef8aef7f97e6e5bd21455e5a8fb5/cryptography-46.0.7-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:84d4cced91f0f159a7ddacad249cc077e63195c36aac40b4150e7a57e84fffe7", size = 4270628, upload-time = "2026-04-08T01:57:12.885Z" }, + { url = "https://files.pythonhosted.org/packages/8a/6c/1a42450f464dda6ffbe578a911f773e54dd48c10f9895a23a7e88b3e7db5/cryptography-46.0.7-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:128c5edfe5e5938b86b03941e94fac9ee793a94452ad1365c9fc3f4f62216832", size = 4415405, upload-time = "2026-04-08T01:57:14.923Z" }, + { url = "https://files.pythonhosted.org/packages/9a/92/4ed714dbe93a066dc1f4b4581a464d2d7dbec9046f7c8b7016f5286329e2/cryptography-46.0.7-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:5e51be372b26ef4ba3de3c167cd3d1022934bc838ae9eaad7e644986d2a3d163", size = 4272715, upload-time = "2026-04-08T01:57:16.638Z" }, + { url = "https://files.pythonhosted.org/packages/b7/e6/a26b84096eddd51494bba19111f8fffe976f6a09f132706f8f1bf03f51f7/cryptography-46.0.7-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:cdf1a610ef82abb396451862739e3fc93b071c844399e15b90726ef7470eeaf2", size = 4918400, upload-time = "2026-04-08T01:57:19.021Z" }, + { url = "https://files.pythonhosted.org/packages/c7/08/ffd537b605568a148543ac3c2b239708ae0bd635064bab41359252ef88ed/cryptography-46.0.7-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:1d25aee46d0c6f1a501adcddb2d2fee4b979381346a78558ed13e50aa8a59067", size = 4450634, upload-time = "2026-04-08T01:57:21.185Z" }, + { url = "https://files.pythonhosted.org/packages/16/01/0cd51dd86ab5b9befe0d031e276510491976c3a80e9f6e31810cce46c4ad/cryptography-46.0.7-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:cdfbe22376065ffcf8be74dc9a909f032df19bc58a699456a21712d6e5eabfd0", size = 3985233, upload-time = "2026-04-08T01:57:22.862Z" }, + { url = "https://files.pythonhosted.org/packages/92/49/819d6ed3a7d9349c2939f81b500a738cb733ab62fbecdbc1e38e83d45e12/cryptography-46.0.7-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:abad9dac36cbf55de6eb49badd4016806b3165d396f64925bf2999bcb67837ba", size = 4271955, upload-time = "2026-04-08T01:57:24.814Z" }, + { url = "https://files.pythonhosted.org/packages/80/07/ad9b3c56ebb95ed2473d46df0847357e01583f4c52a85754d1a55e29e4d0/cryptography-46.0.7-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:935ce7e3cfdb53e3536119a542b839bb94ec1ad081013e9ab9b7cfd478b05006", size = 4879888, upload-time = "2026-04-08T01:57:26.88Z" }, + { url = "https://files.pythonhosted.org/packages/b8/c7/201d3d58f30c4c2bdbe9b03844c291feb77c20511cc3586daf7edc12a47b/cryptography-46.0.7-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:35719dc79d4730d30f1c2b6474bd6acda36ae2dfae1e3c16f2051f215df33ce0", size = 4449961, upload-time = "2026-04-08T01:57:29.068Z" }, + { url = "https://files.pythonhosted.org/packages/a5/ef/649750cbf96f3033c3c976e112265c33906f8e462291a33d77f90356548c/cryptography-46.0.7-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:7bbc6ccf49d05ac8f7d7b5e2e2c33830d4fe2061def88210a126d130d7f71a85", size = 4401696, upload-time = "2026-04-08T01:57:31.029Z" }, + { url = "https://files.pythonhosted.org/packages/41/52/a8908dcb1a389a459a29008c29966c1d552588d4ae6d43f3a1a4512e0ebe/cryptography-46.0.7-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a1529d614f44b863a7b480c6d000fe93b59acee9c82ffa027cfadc77521a9f5e", size = 4664256, upload-time = "2026-04-08T01:57:33.144Z" }, + { url = "https://files.pythonhosted.org/packages/4b/fa/f0ab06238e899cc3fb332623f337a7364f36f4bb3f2534c2bb95a35b132c/cryptography-46.0.7-cp38-abi3-win32.whl", hash = "sha256:f247c8c1a1fb45e12586afbb436ef21ff1e80670b2861a90353d9b025583d246", size = 3013001, upload-time = "2026-04-08T01:57:34.933Z" }, + { url = "https://files.pythonhosted.org/packages/d2/f1/00ce3bde3ca542d1acd8f8cfa38e446840945aa6363f9b74746394b14127/cryptography-46.0.7-cp38-abi3-win_amd64.whl", hash = "sha256:506c4ff91eff4f82bdac7633318a526b1d1309fc07ca76a3ad182cb5b686d6d3", size = 3472985, upload-time = "2026-04-08T01:57:36.714Z" }, +] + +[[package]] +name = "defusedxml" +version = "0.7.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520, upload-time = "2021-03-08T10:59:26.269Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" }, +] + +[[package]] +name = "dlp-scanner" +version = "0.1.0" +source = { editable = "." } +dependencies = [ + { name = "aiomysql" }, + { name = "aiosqlite" }, + { name = "asyncpg" }, + { name = "defusedxml" }, + { name = "dpkt" }, + { name = "extract-msg" }, + { name = "fastavro" }, + { name = "lxml" }, + { name = "openpyxl" }, + { name = "orjson" }, + { name = "pyarrow" }, + { name = "pydantic" }, + { name = "pymongo" }, + { name = "pymupdf" }, + { name = "python-docx" }, + { name = "rich" }, + { name = "ruamel-yaml" }, + { name = "structlog" }, + { name = "typer" }, + { name = "xlrd" }, +] + +[package.dev-dependencies] +dev = [ + { name = "hypothesis" }, + { name = "mypy" }, + { name = "pytest" }, + { name = "pytest-asyncio" }, + { name = "pytest-cov" }, + { name = "ruff" }, + { name = "yapf" }, +] + +[package.metadata] +requires-dist = [ + { name = "aiomysql", specifier = ">=0.2.0" }, + { name = "aiosqlite", specifier = ">=0.21.0" }, + { name = "asyncpg", specifier = ">=0.30.0" }, + { name = "defusedxml", specifier = ">=0.7.0" }, + { name = "dpkt", specifier = ">=1.9.0" }, + { name = "extract-msg", specifier = ">=0.50.0" }, + { name = "fastavro", specifier = ">=1.9.0" }, + { name = "lxml", specifier = ">=5.0.0" }, + { name = "openpyxl", specifier = ">=3.1.0" }, + { name = "orjson", specifier = ">=3.10.0" }, + { name = "pyarrow", specifier = ">=16.0.0" }, + { name = "pydantic", specifier = ">=2.10.0" }, + { name = "pymongo", specifier = ">=4.10.0" }, + { name = "pymupdf", specifier = ">=1.25.0" }, + { name = "python-docx", specifier = ">=1.1.0" }, + { name = "rich", specifier = ">=14.0.0" }, + { name = "ruamel-yaml", specifier = ">=0.18.0" }, + { name = "structlog", specifier = ">=25.0.0" }, + { name = "typer", specifier = ">=0.15.0" }, + { name = "xlrd", specifier = ">=2.0.0" }, +] + +[package.metadata.requires-dev] +dev = [ + { name = "hypothesis", specifier = ">=6.130.0" }, + { name = "mypy", specifier = ">=1.15.0" }, + { name = "pytest", specifier = ">=8.3.0" }, + { name = "pytest-asyncio", specifier = ">=0.25.0" }, + { name = "pytest-cov", specifier = ">=6.0.0" }, + { name = "ruff", specifier = ">=0.11.0" }, + { name = "yapf", specifier = ">=0.43.0" }, +] + +[[package]] +name = "dnspython" +version = "2.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8c/8b/57666417c0f90f08bcafa776861060426765fdb422eb10212086fb811d26/dnspython-2.8.0.tar.gz", hash = "sha256:181d3c6996452cb1189c4046c61599b84a5a86e099562ffde77d26984ff26d0f", size = 368251, upload-time = "2025-09-07T18:58:00.022Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ba/5a/18ad964b0086c6e62e2e7500f7edc89e3faa45033c71c1893d34eed2b2de/dnspython-2.8.0-py3-none-any.whl", hash = "sha256:01d9bbc4a2d76bf0db7c1f729812ded6d912bd318d3b1cf81d30c0f845dbf3af", size = 331094, upload-time = "2025-09-07T18:57:58.071Z" }, +] + +[[package]] +name = "dpkt" +version = "1.9.8" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c9/7d/52f17a794db52a66e46ebb0c7549bf2f035ed61d5a920ba4aaa127dd038e/dpkt-1.9.8.tar.gz", hash = "sha256:43f8686e455da5052835fd1eda2689d51de3670aac9799b1b00cfd203927ee45", size = 180073, upload-time = "2022-08-18T05:54:13.582Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/11/79/479e2194c9096b92aecdf33634ae948d2be306c6011673e98ee1917f32c2/dpkt-1.9.8-py3-none-any.whl", hash = "sha256:4da4d111d7bf67575b571f5c678c71bddd2d8a01a3d57d489faf0a92c748fbfd", size = 194973, upload-time = "2022-08-18T05:54:10.793Z" }, +] + +[[package]] +name = "easygui" +version = "0.98.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cc/ad/e35f7a30272d322be09dc98592d2f55d27cc933a7fde8baccbbeb2bd9409/easygui-0.98.3.tar.gz", hash = "sha256:d653ff79ee1f42f63b5a090f2f98ce02335d86ad8963b3ce2661805cafe99a04", size = 85583, upload-time = "2022-04-01T13:15:50.752Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8e/a7/b276ff776533b423710a285c8168b52551cb2ab0855443131fdc7fd8c16f/easygui-0.98.3-py2.py3-none-any.whl", hash = "sha256:33498710c68b5376b459cd3fc48d1d1f33822139eb3ed01defbc0528326da3ba", size = 92655, upload-time = "2022-04-01T13:15:49.568Z" }, +] + +[[package]] +name = "ebcdic" +version = "1.1.1" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0d/2f/633031205333bee5f9f93761af8268746aa75f38754823aabb8570eb245b/ebcdic-1.1.1-py2.py3-none-any.whl", hash = "sha256:33b4cb729bc2d0bf46cc1847b0e5946897cb8d3f53520c5b9aa5fa98d7e735f1", size = 128537, upload-time = "2019-08-09T00:54:35.544Z" }, +] + +[[package]] +name = "et-xmlfile" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234, upload-time = "2024-10-25T17:25:40.039Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059, upload-time = "2024-10-25T17:25:39.051Z" }, +] + +[[package]] +name = "extract-msg" +version = "0.55.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "beautifulsoup4" }, + { name = "compressed-rtf" }, + { name = "ebcdic" }, + { name = "olefile" }, + { name = "red-black-tree-mod" }, + { name = "rtfde" }, + { name = "tzlocal" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5e/65/c70afb3b119a44b3ee36b029485dc15326cf3a7c50da19a1ecbbf949c5d1/extract_msg-0.55.0.tar.gz", hash = "sha256:cf08283498c3dfcc7f894dad1579f52e3ced9fb76b865c2355cbe757af8a54e1", size = 331170, upload-time = "2025-08-12T16:07:56.537Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/53/81/87d5241036046ea17c5c8db228f4c9e04e07e53b627015d4496a99449aaf/extract_msg-0.55.0-py3-none-any.whl", hash = "sha256:baf0cdee9a8d267b70c366bc57ceb03dbfa1e7ab2dca6824169a7fe623f0917c", size = 336033, upload-time = "2025-08-12T16:07:54.886Z" }, +] + +[[package]] +name = "fastavro" +version = "1.12.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/65/8b/fa2d3287fd2267be6261d0177c6809a7fa12c5600ddb33490c8dc29e77b2/fastavro-1.12.1.tar.gz", hash = "sha256:2f285be49e45bc047ab2f6bed040bb349da85db3f3c87880e4b92595ea093b2b", size = 1025661, upload-time = "2025-10-10T15:40:55.41Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7c/f0/10bd1a3d08667fa0739e2b451fe90e06df575ec8b8ba5d3135c70555c9bd/fastavro-1.12.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:509818cb24b98a804fc80be9c5fed90f660310ae3d59382fc811bfa187122167", size = 1009057, upload-time = "2025-10-10T15:41:24.556Z" }, + { url = "https://files.pythonhosted.org/packages/78/ad/0d985bc99e1fa9e74c636658000ba38a5cd7f5ab2708e9c62eaf736ecf1a/fastavro-1.12.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:089e155c0c76e0d418d7e79144ce000524dd345eab3bc1e9c5ae69d500f71b14", size = 3391866, upload-time = "2025-10-10T15:41:26.882Z" }, + { url = "https://files.pythonhosted.org/packages/0d/9e/b4951dc84ebc34aac69afcbfbb22ea4a91080422ec2bfd2c06076ff1d419/fastavro-1.12.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:44cbff7518901c91a82aab476fcab13d102e4999499df219d481b9e15f61af34", size = 3458005, upload-time = "2025-10-10T15:41:29.017Z" }, + { url = "https://files.pythonhosted.org/packages/af/f8/5a8df450a9f55ca8441f22ea0351d8c77809fc121498b6970daaaf667a21/fastavro-1.12.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a275e48df0b1701bb764b18a8a21900b24cf882263cb03d35ecdba636bbc830b", size = 3295258, upload-time = "2025-10-10T15:41:31.564Z" }, + { url = "https://files.pythonhosted.org/packages/99/b2/40f25299111d737e58b85696e91138a66c25b7334f5357e7ac2b0e8966f8/fastavro-1.12.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2de72d786eb38be6b16d556b27232b1bf1b2797ea09599507938cdb7a9fe3e7c", size = 3430328, upload-time = "2025-10-10T15:41:33.689Z" }, + { url = "https://files.pythonhosted.org/packages/e0/07/85157a7c57c5f8b95507d7829b5946561e5ee656ff80e9dd9a757f53ddaf/fastavro-1.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:9090f0dee63fe022ee9cc5147483366cc4171c821644c22da020d6b48f576b4f", size = 444140, upload-time = "2025-10-10T15:41:34.902Z" }, + { url = "https://files.pythonhosted.org/packages/bb/57/26d5efef9182392d5ac9f253953c856ccb66e4c549fd3176a1e94efb05c9/fastavro-1.12.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:78df838351e4dff9edd10a1c41d1324131ffecbadefb9c297d612ef5363c049a", size = 1000599, upload-time = "2025-10-10T15:41:36.554Z" }, + { url = "https://files.pythonhosted.org/packages/33/cb/8ab55b21d018178eb126007a56bde14fd01c0afc11d20b5f2624fe01e698/fastavro-1.12.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:780476c23175d2ae457c52f45b9ffa9d504593499a36cd3c1929662bf5b7b14b", size = 3335933, upload-time = "2025-10-10T15:41:39.07Z" }, + { url = "https://files.pythonhosted.org/packages/fe/03/9c94ec9bf873eb1ffb0aa694f4e71940154e6e9728ddfdc46046d7e8ced4/fastavro-1.12.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0714b285160fcd515eb0455540f40dd6dac93bdeacdb03f24e8eac3d8aa51f8d", size = 3402066, upload-time = "2025-10-10T15:41:41.608Z" }, + { url = "https://files.pythonhosted.org/packages/75/c8/cb472347c5a584ccb8777a649ebb28278fccea39d005fc7df19996f41df8/fastavro-1.12.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a8bc2dcec5843d499f2489bfe0747999108f78c5b29295d877379f1972a3d41a", size = 3240038, upload-time = "2025-10-10T15:41:43.743Z" }, + { url = "https://files.pythonhosted.org/packages/e1/77/569ce9474c40304b3a09e109494e020462b83e405545b78069ddba5f614e/fastavro-1.12.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:3b1921ac35f3d89090a5816b626cf46e67dbecf3f054131f84d56b4e70496f45", size = 3369398, upload-time = "2025-10-10T15:41:45.719Z" }, + { url = "https://files.pythonhosted.org/packages/4a/1f/9589e35e9ea68035385db7bdbf500d36b8891db474063fb1ccc8215ee37c/fastavro-1.12.1-cp313-cp313-win_amd64.whl", hash = "sha256:5aa777b8ee595b50aa084104cd70670bf25a7bbb9fd8bb5d07524b0785ee1699", size = 444220, upload-time = "2025-10-10T15:41:47.39Z" }, + { url = "https://files.pythonhosted.org/packages/6c/d2/78435fe737df94bd8db2234b2100f5453737cffd29adee2504a2b013de84/fastavro-1.12.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:c3d67c47f177e486640404a56f2f50b165fe892cc343ac3a34673b80cc7f1dd6", size = 1086611, upload-time = "2025-10-10T15:41:48.818Z" }, + { url = "https://files.pythonhosted.org/packages/b6/be/428f99b10157230ddac77ec8cc167005b29e2bd5cbe228345192bb645f30/fastavro-1.12.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5217f773492bac43dae15ff2931432bce2d7a80be7039685a78d3fab7df910bd", size = 3541001, upload-time = "2025-10-10T15:41:50.871Z" }, + { url = "https://files.pythonhosted.org/packages/16/08/a2eea4f20b85897740efe44887e1ac08f30dfa4bfc3de8962bdcbb21a5a1/fastavro-1.12.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:469fecb25cba07f2e1bfa4c8d008477cd6b5b34a59d48715e1b1a73f6160097d", size = 3432217, upload-time = "2025-10-10T15:41:53.149Z" }, + { url = "https://files.pythonhosted.org/packages/87/bb/b4c620b9eb6e9838c7f7e4b7be0762834443adf9daeb252a214e9ad3178c/fastavro-1.12.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:d71c8aa841ef65cfab709a22bb887955f42934bced3ddb571e98fdbdade4c609", size = 3366742, upload-time = "2025-10-10T15:41:55.237Z" }, + { url = "https://files.pythonhosted.org/packages/3d/d1/e69534ccdd5368350646fea7d93be39e5f77c614cca825c990bd9ca58f67/fastavro-1.12.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:b81fc04e85dfccf7c028e0580c606e33aa8472370b767ef058aae2c674a90746", size = 3383743, upload-time = "2025-10-10T15:41:57.68Z" }, + { url = "https://files.pythonhosted.org/packages/58/54/b7b4a0c3fb5fcba38128542da1b26c4e6d69933c923f493548bdfd63ab6a/fastavro-1.12.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:9445da127751ba65975d8e4bdabf36bfcfdad70fc35b2d988e3950cce0ec0e7c", size = 1001377, upload-time = "2025-10-10T15:41:59.241Z" }, + { url = "https://files.pythonhosted.org/packages/1e/4f/0e589089c7df0d8f57d7e5293fdc34efec9a3b758a0d4d0c99a7937e2492/fastavro-1.12.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ed924233272719b5d5a6a0b4d80ef3345fc7e84fc7a382b6232192a9112d38a6", size = 3320401, upload-time = "2025-10-10T15:42:01.682Z" }, + { url = "https://files.pythonhosted.org/packages/f9/19/260110d56194ae29d7e423a336fccea8bcd103196d00f0b364b732bdb84e/fastavro-1.12.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3616e2f0e1c9265e92954fa099db79c6e7817356d3ff34f4bcc92699ae99697c", size = 3350894, upload-time = "2025-10-10T15:42:04.073Z" }, + { url = "https://files.pythonhosted.org/packages/d0/96/58b0411e8be9694d5972bee3167d6c1fd1fdfdf7ce253c1a19a327208f4f/fastavro-1.12.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:cb0337b42fd3c047fcf0e9b7597bd6ad25868de719f29da81eabb6343f08d399", size = 3229644, upload-time = "2025-10-10T15:42:06.221Z" }, + { url = "https://files.pythonhosted.org/packages/5b/db/38660660eac82c30471d9101f45b3acfdcbadfe42d8f7cdb129459a45050/fastavro-1.12.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:64961ab15b74b7c168717bbece5660e0f3d457837c3cc9d9145181d011199fa7", size = 3329704, upload-time = "2025-10-10T15:42:08.384Z" }, + { url = "https://files.pythonhosted.org/packages/9d/a9/1672910f458ecb30b596c9e59e41b7c00309b602a0494341451e92e62747/fastavro-1.12.1-cp314-cp314-win_amd64.whl", hash = "sha256:792356d320f6e757e89f7ac9c22f481e546c886454a6709247f43c0dd7058004", size = 452911, upload-time = "2025-10-10T15:42:09.795Z" }, + { url = "https://files.pythonhosted.org/packages/dc/8d/2e15d0938ded1891b33eff252e8500605508b799c2e57188a933f0bd744c/fastavro-1.12.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:120aaf82ac19d60a1016afe410935fe94728752d9c2d684e267e5b7f0e70f6d9", size = 3541999, upload-time = "2025-10-10T15:42:11.794Z" }, + { url = "https://files.pythonhosted.org/packages/a7/1c/6dfd082a205be4510543221b734b1191299e6a1810c452b6bc76dfa6968e/fastavro-1.12.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6a3462934b20a74f9ece1daa49c2e4e749bd9a35fa2657b53bf62898fba80f5", size = 3433972, upload-time = "2025-10-10T15:42:14.485Z" }, + { url = "https://files.pythonhosted.org/packages/24/90/9de694625a1a4b727b1ad0958d220cab25a9b6cf7f16a5c7faa9ea7b2261/fastavro-1.12.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1f81011d54dd47b12437b51dd93a70a9aa17b61307abf26542fc3c13efbc6c51", size = 3368752, upload-time = "2025-10-10T15:42:16.618Z" }, + { url = "https://files.pythonhosted.org/packages/fa/93/b44f67589e4d439913dab6720f7e3507b0fa8b8e56d06f6fc875ced26afb/fastavro-1.12.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:43ded16b3f4a9f1a42f5970c2aa618acb23ea59c4fcaa06680bdf470b255e5a8", size = 3386636, upload-time = "2025-10-10T15:42:18.974Z" }, +] + +[[package]] +name = "hypothesis" +version = "6.151.11" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "sortedcontainers" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a9/58/41af0d539b3c95644d1e4e353cbd6ac9473e892ea21802546a8886b79078/hypothesis-6.151.11.tar.gz", hash = "sha256:f33dcb68b62c7b07c9ac49664989be898fa8ce57583f0dc080259a197c6c7ff1", size = 463779, upload-time = "2026-04-05T17:35:55.935Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1d/06/f49393eca84b87b17a67aaebf9f6251190ba1e9fe9f2236504049fc43fee/hypothesis-6.151.11-py3-none-any.whl", hash = "sha256:7ac05173206746cec8312f95164a30a4eb4916815413a278922e63ff1e404648", size = 529572, upload-time = "2026-04-05T17:35:53.438Z" }, +] + +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, +] + +[[package]] +name = "lark" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/da/34/28fff3ab31ccff1fd4f6c7c7b0ceb2b6968d8ea4950663eadcb5720591a0/lark-1.3.1.tar.gz", hash = "sha256:b426a7a6d6d53189d318f2b6236ab5d6429eaf09259f1ca33eb716eed10d2905", size = 382732, upload-time = "2025-10-27T18:25:56.653Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/82/3d/14ce75ef66813643812f3093ab17e46d3a206942ce7376d31ec2d36229e7/lark-1.3.1-py3-none-any.whl", hash = "sha256:c629b661023a014c37da873b4ff58a817398d12635d3bbb2c5a03be7fe5d1e12", size = 113151, upload-time = "2025-10-27T18:25:54.882Z" }, +] + +[[package]] +name = "librt" +version = "0.8.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/56/9c/b4b0c54d84da4a94b37bd44151e46d5e583c9534c7e02250b961b1b6d8a8/librt-0.8.1.tar.gz", hash = "sha256:be46a14693955b3bd96014ccbdb8339ee8c9346fbe11c1b78901b55125f14c73", size = 177471, upload-time = "2026-02-17T16:13:06.101Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/95/21/d39b0a87ac52fc98f621fb6f8060efb017a767ebbbac2f99fbcbc9ddc0d7/librt-0.8.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a28f2612ab566b17f3698b0da021ff9960610301607c9a5e8eaca62f5e1c350a", size = 66516, upload-time = "2026-02-17T16:11:41.604Z" }, + { url = "https://files.pythonhosted.org/packages/69/f1/46375e71441c43e8ae335905e069f1c54febee63a146278bcee8782c84fd/librt-0.8.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:60a78b694c9aee2a0f1aaeaa7d101cf713e92e8423a941d2897f4fa37908dab9", size = 68634, upload-time = "2026-02-17T16:11:43.268Z" }, + { url = "https://files.pythonhosted.org/packages/0a/33/c510de7f93bf1fa19e13423a606d8189a02624a800710f6e6a0a0f0784b3/librt-0.8.1-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:758509ea3f1eba2a57558e7e98f4659d0ea7670bff49673b0dde18a3c7e6c0eb", size = 198941, upload-time = "2026-02-17T16:11:44.28Z" }, + { url = "https://files.pythonhosted.org/packages/dd/36/e725903416409a533d92398e88ce665476f275081d0d7d42f9c4951999e5/librt-0.8.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:039b9f2c506bd0ab0f8725aa5ba339c6f0cd19d3b514b50d134789809c24285d", size = 209991, upload-time = "2026-02-17T16:11:45.462Z" }, + { url = "https://files.pythonhosted.org/packages/30/7a/8d908a152e1875c9f8eac96c97a480df425e657cdb47854b9efaa4998889/librt-0.8.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5bb54f1205a3a6ab41a6fd71dfcdcbd278670d3a90ca502a30d9da583105b6f7", size = 224476, upload-time = "2026-02-17T16:11:46.542Z" }, + { url = "https://files.pythonhosted.org/packages/a8/b8/a22c34f2c485b8903a06f3fe3315341fe6876ef3599792344669db98fcff/librt-0.8.1-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:05bd41cdee35b0c59c259f870f6da532a2c5ca57db95b5f23689fcb5c9e42440", size = 217518, upload-time = "2026-02-17T16:11:47.746Z" }, + { url = "https://files.pythonhosted.org/packages/79/6f/5c6fea00357e4f82ba44f81dbfb027921f1ab10e320d4a64e1c408d035d9/librt-0.8.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:adfab487facf03f0d0857b8710cf82d0704a309d8ffc33b03d9302b4c64e91a9", size = 225116, upload-time = "2026-02-17T16:11:49.298Z" }, + { url = "https://files.pythonhosted.org/packages/f2/a0/95ced4e7b1267fe1e2720a111685bcddf0e781f7e9e0ce59d751c44dcfe5/librt-0.8.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:153188fe98a72f206042be10a2c6026139852805215ed9539186312d50a8e972", size = 217751, upload-time = "2026-02-17T16:11:50.49Z" }, + { url = "https://files.pythonhosted.org/packages/93/c2/0517281cb4d4101c27ab59472924e67f55e375bc46bedae94ac6dc6e1902/librt-0.8.1-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:dd3c41254ee98604b08bd5b3af5bf0a89740d4ee0711de95b65166bf44091921", size = 218378, upload-time = "2026-02-17T16:11:51.783Z" }, + { url = "https://files.pythonhosted.org/packages/43/e8/37b3ac108e8976888e559a7b227d0ceac03c384cfd3e7a1c2ee248dbae79/librt-0.8.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e0d138c7ae532908cbb342162b2611dbd4d90c941cd25ab82084aaf71d2c0bd0", size = 241199, upload-time = "2026-02-17T16:11:53.561Z" }, + { url = "https://files.pythonhosted.org/packages/4b/5b/35812d041c53967fedf551a39399271bbe4257e681236a2cf1a69c8e7fa1/librt-0.8.1-cp312-cp312-win32.whl", hash = "sha256:43353b943613c5d9c49a25aaffdba46f888ec354e71e3529a00cca3f04d66a7a", size = 54917, upload-time = "2026-02-17T16:11:54.758Z" }, + { url = "https://files.pythonhosted.org/packages/de/d1/fa5d5331b862b9775aaf2a100f5ef86854e5d4407f71bddf102f4421e034/librt-0.8.1-cp312-cp312-win_amd64.whl", hash = "sha256:ff8baf1f8d3f4b6b7257fcb75a501f2a5499d0dda57645baa09d4d0d34b19444", size = 62017, upload-time = "2026-02-17T16:11:55.748Z" }, + { url = "https://files.pythonhosted.org/packages/c7/7c/c614252f9acda59b01a66e2ddfd243ed1c7e1deab0293332dfbccf862808/librt-0.8.1-cp312-cp312-win_arm64.whl", hash = "sha256:0f2ae3725904f7377e11cc37722d5d401e8b3d5851fb9273d7f4fe04f6b3d37d", size = 52441, upload-time = "2026-02-17T16:11:56.801Z" }, + { url = "https://files.pythonhosted.org/packages/c5/3c/f614c8e4eaac7cbf2bbdf9528790b21d89e277ee20d57dc6e559c626105f/librt-0.8.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7e6bad1cd94f6764e1e21950542f818a09316645337fd5ab9a7acc45d99a8f35", size = 66529, upload-time = "2026-02-17T16:11:57.809Z" }, + { url = "https://files.pythonhosted.org/packages/ab/96/5836544a45100ae411eda07d29e3d99448e5258b6e9c8059deb92945f5c2/librt-0.8.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cf450f498c30af55551ba4f66b9123b7185362ec8b625a773b3d39aa1a717583", size = 68669, upload-time = "2026-02-17T16:11:58.843Z" }, + { url = "https://files.pythonhosted.org/packages/06/53/f0b992b57af6d5531bf4677d75c44f095f2366a1741fb695ee462ae04b05/librt-0.8.1-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:eca45e982fa074090057132e30585a7e8674e9e885d402eae85633e9f449ce6c", size = 199279, upload-time = "2026-02-17T16:11:59.862Z" }, + { url = "https://files.pythonhosted.org/packages/f3/ad/4848cc16e268d14280d8168aee4f31cea92bbd2b79ce33d3e166f2b4e4fc/librt-0.8.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0c3811485fccfda840861905b8c70bba5ec094e02825598bb9d4ca3936857a04", size = 210288, upload-time = "2026-02-17T16:12:00.954Z" }, + { url = "https://files.pythonhosted.org/packages/52/05/27fdc2e95de26273d83b96742d8d3b7345f2ea2bdbd2405cc504644f2096/librt-0.8.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e4af413908f77294605e28cfd98063f54b2c790561383971d2f52d113d9c363", size = 224809, upload-time = "2026-02-17T16:12:02.108Z" }, + { url = "https://files.pythonhosted.org/packages/7a/d0/78200a45ba3240cb042bc597d6f2accba9193a2c57d0356268cbbe2d0925/librt-0.8.1-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:5212a5bd7fae98dae95710032902edcd2ec4dc994e883294f75c857b83f9aba0", size = 218075, upload-time = "2026-02-17T16:12:03.631Z" }, + { url = "https://files.pythonhosted.org/packages/af/72/a210839fa74c90474897124c064ffca07f8d4b347b6574d309686aae7ca6/librt-0.8.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e692aa2d1d604e6ca12d35e51fdc36f4cda6345e28e36374579f7ef3611b3012", size = 225486, upload-time = "2026-02-17T16:12:04.725Z" }, + { url = "https://files.pythonhosted.org/packages/a3/c1/a03cc63722339ddbf087485f253493e2b013039f5b707e8e6016141130fa/librt-0.8.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4be2a5c926b9770c9e08e717f05737a269b9d0ebc5d2f0060f0fe3fe9ce47acb", size = 218219, upload-time = "2026-02-17T16:12:05.828Z" }, + { url = "https://files.pythonhosted.org/packages/58/f5/fff6108af0acf941c6f274a946aea0e484bd10cd2dc37610287ce49388c5/librt-0.8.1-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:fd1a720332ea335ceb544cf0a03f81df92abd4bb887679fd1e460976b0e6214b", size = 218750, upload-time = "2026-02-17T16:12:07.09Z" }, + { url = "https://files.pythonhosted.org/packages/71/67/5a387bfef30ec1e4b4f30562c8586566faf87e47d696768c19feb49e3646/librt-0.8.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:93c2af9e01e0ef80d95ae3c720be101227edae5f2fe7e3dc63d8857fadfc5a1d", size = 241624, upload-time = "2026-02-17T16:12:08.43Z" }, + { url = "https://files.pythonhosted.org/packages/d4/be/24f8502db11d405232ac1162eb98069ca49c3306c1d75c6ccc61d9af8789/librt-0.8.1-cp313-cp313-win32.whl", hash = "sha256:086a32dbb71336627e78cc1d6ee305a68d038ef7d4c39aaff41ae8c9aa46e91a", size = 54969, upload-time = "2026-02-17T16:12:09.633Z" }, + { url = "https://files.pythonhosted.org/packages/5c/73/c9fdf6cb2a529c1a092ce769a12d88c8cca991194dfe641b6af12fa964d2/librt-0.8.1-cp313-cp313-win_amd64.whl", hash = "sha256:e11769a1dbda4da7b00a76cfffa67aa47cfa66921d2724539eee4b9ede780b79", size = 62000, upload-time = "2026-02-17T16:12:10.632Z" }, + { url = "https://files.pythonhosted.org/packages/d3/97/68f80ca3ac4924f250cdfa6e20142a803e5e50fca96ef5148c52ee8c10ea/librt-0.8.1-cp313-cp313-win_arm64.whl", hash = "sha256:924817ab3141aca17893386ee13261f1d100d1ef410d70afe4389f2359fea4f0", size = 52495, upload-time = "2026-02-17T16:12:11.633Z" }, + { url = "https://files.pythonhosted.org/packages/c9/6a/907ef6800f7bca71b525a05f1839b21f708c09043b1c6aa77b6b827b3996/librt-0.8.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:6cfa7fe54fd4d1f47130017351a959fe5804bda7a0bc7e07a2cdbc3fdd28d34f", size = 66081, upload-time = "2026-02-17T16:12:12.766Z" }, + { url = "https://files.pythonhosted.org/packages/1b/18/25e991cd5640c9fb0f8d91b18797b29066b792f17bf8493da183bf5caabe/librt-0.8.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:228c2409c079f8c11fb2e5d7b277077f694cb93443eb760e00b3b83cb8b3176c", size = 68309, upload-time = "2026-02-17T16:12:13.756Z" }, + { url = "https://files.pythonhosted.org/packages/a4/36/46820d03f058cfb5a9de5940640ba03165ed8aded69e0733c417bb04df34/librt-0.8.1-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7aae78ab5e3206181780e56912d1b9bb9f90a7249ce12f0e8bf531d0462dd0fc", size = 196804, upload-time = "2026-02-17T16:12:14.818Z" }, + { url = "https://files.pythonhosted.org/packages/59/18/5dd0d3b87b8ff9c061849fbdb347758d1f724b9a82241aa908e0ec54ccd0/librt-0.8.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:172d57ec04346b047ca6af181e1ea4858086c80bdf455f61994c4aa6fc3f866c", size = 206907, upload-time = "2026-02-17T16:12:16.513Z" }, + { url = "https://files.pythonhosted.org/packages/d1/96/ef04902aad1424fd7299b62d1890e803e6ab4018c3044dca5922319c4b97/librt-0.8.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6b1977c4ea97ce5eb7755a78fae68d87e4102e4aaf54985e8b56806849cc06a3", size = 221217, upload-time = "2026-02-17T16:12:17.906Z" }, + { url = "https://files.pythonhosted.org/packages/6d/ff/7e01f2dda84a8f5d280637a2e5827210a8acca9a567a54507ef1c75b342d/librt-0.8.1-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:10c42e1f6fd06733ef65ae7bebce2872bcafd8d6e6b0a08fe0a05a23b044fb14", size = 214622, upload-time = "2026-02-17T16:12:19.108Z" }, + { url = "https://files.pythonhosted.org/packages/1e/8c/5b093d08a13946034fed57619742f790faf77058558b14ca36a6e331161e/librt-0.8.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:4c8dfa264b9193c4ee19113c985c95f876fae5e51f731494fc4e0cf594990ba7", size = 221987, upload-time = "2026-02-17T16:12:20.331Z" }, + { url = "https://files.pythonhosted.org/packages/d3/cc/86b0b3b151d40920ad45a94ce0171dec1aebba8a9d72bb3fa00c73ab25dd/librt-0.8.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:01170b6729a438f0dedc4a26ed342e3dc4f02d1000b4b19f980e1877f0c297e6", size = 215132, upload-time = "2026-02-17T16:12:21.54Z" }, + { url = "https://files.pythonhosted.org/packages/fc/be/8588164a46edf1e69858d952654e216a9a91174688eeefb9efbb38a9c799/librt-0.8.1-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:7b02679a0d783bdae30d443025b94465d8c3dc512f32f5b5031f93f57ac32071", size = 215195, upload-time = "2026-02-17T16:12:23.073Z" }, + { url = "https://files.pythonhosted.org/packages/f5/f2/0b9279bea735c734d69344ecfe056c1ba211694a72df10f568745c899c76/librt-0.8.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:190b109bb69592a3401fe1ffdea41a2e73370ace2ffdc4a0e8e2b39cdea81b78", size = 237946, upload-time = "2026-02-17T16:12:24.275Z" }, + { url = "https://files.pythonhosted.org/packages/e9/cc/5f2a34fbc8aeb35314a3641f9956fa9051a947424652fad9882be7a97949/librt-0.8.1-cp314-cp314-win32.whl", hash = "sha256:e70a57ecf89a0f64c24e37f38d3fe217a58169d2fe6ed6d70554964042474023", size = 50689, upload-time = "2026-02-17T16:12:25.766Z" }, + { url = "https://files.pythonhosted.org/packages/a0/76/cd4d010ab2147339ca2b93e959c3686e964edc6de66ddacc935c325883d7/librt-0.8.1-cp314-cp314-win_amd64.whl", hash = "sha256:7e2f3edca35664499fbb36e4770650c4bd4a08abc1f4458eab9df4ec56389730", size = 57875, upload-time = "2026-02-17T16:12:27.465Z" }, + { url = "https://files.pythonhosted.org/packages/84/0f/2143cb3c3ca48bd3379dcd11817163ca50781927c4537345d608b5045998/librt-0.8.1-cp314-cp314-win_arm64.whl", hash = "sha256:0d2f82168e55ddefd27c01c654ce52379c0750ddc31ee86b4b266bcf4d65f2a3", size = 48058, upload-time = "2026-02-17T16:12:28.556Z" }, + { url = "https://files.pythonhosted.org/packages/d2/0e/9b23a87e37baf00311c3efe6b48d6b6c168c29902dfc3f04c338372fd7db/librt-0.8.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2c74a2da57a094bd48d03fa5d196da83d2815678385d2978657499063709abe1", size = 68313, upload-time = "2026-02-17T16:12:29.659Z" }, + { url = "https://files.pythonhosted.org/packages/db/9a/859c41e5a4f1c84200a7d2b92f586aa27133c8243b6cac9926f6e54d01b9/librt-0.8.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a355d99c4c0d8e5b770313b8b247411ed40949ca44e33e46a4789b9293a907ee", size = 70994, upload-time = "2026-02-17T16:12:31.516Z" }, + { url = "https://files.pythonhosted.org/packages/4c/28/10605366ee599ed34223ac2bf66404c6fb59399f47108215d16d5ad751a8/librt-0.8.1-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:2eb345e8b33fb748227409c9f1233d4df354d6e54091f0e8fc53acdb2ffedeb7", size = 220770, upload-time = "2026-02-17T16:12:33.294Z" }, + { url = "https://files.pythonhosted.org/packages/af/8d/16ed8fd452dafae9c48d17a6bc1ee3e818fd40ef718d149a8eff2c9f4ea2/librt-0.8.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9be2f15e53ce4e83cc08adc29b26fb5978db62ef2a366fbdf716c8a6c8901040", size = 235409, upload-time = "2026-02-17T16:12:35.443Z" }, + { url = "https://files.pythonhosted.org/packages/89/1b/7bdf3e49349c134b25db816e4a3db6b94a47ac69d7d46b1e682c2c4949be/librt-0.8.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:785ae29c1f5c6e7c2cde2c7c0e148147f4503da3abc5d44d482068da5322fd9e", size = 246473, upload-time = "2026-02-17T16:12:36.656Z" }, + { url = "https://files.pythonhosted.org/packages/4e/8a/91fab8e4fd2a24930a17188c7af5380eb27b203d72101c9cc000dbdfd95a/librt-0.8.1-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1d3a7da44baf692f0c6aeb5b2a09c5e6fc7a703bca9ffa337ddd2e2da53f7732", size = 238866, upload-time = "2026-02-17T16:12:37.849Z" }, + { url = "https://files.pythonhosted.org/packages/b9/e0/c45a098843fc7c07e18a7f8a24ca8496aecbf7bdcd54980c6ca1aaa79a8e/librt-0.8.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5fc48998000cbc39ec0d5311312dda93ecf92b39aaf184c5e817d5d440b29624", size = 250248, upload-time = "2026-02-17T16:12:39.445Z" }, + { url = "https://files.pythonhosted.org/packages/82/30/07627de23036640c952cce0c1fe78972e77d7d2f8fd54fa5ef4554ff4a56/librt-0.8.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:e96baa6820280077a78244b2e06e416480ed859bbd8e5d641cf5742919d8beb4", size = 240629, upload-time = "2026-02-17T16:12:40.889Z" }, + { url = "https://files.pythonhosted.org/packages/fb/c1/55bfe1ee3542eba055616f9098eaf6eddb966efb0ca0f44eaa4aba327307/librt-0.8.1-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:31362dbfe297b23590530007062c32c6f6176f6099646bb2c95ab1b00a57c382", size = 239615, upload-time = "2026-02-17T16:12:42.446Z" }, + { url = "https://files.pythonhosted.org/packages/2b/39/191d3d28abc26c9099b19852e6c99f7f6d400b82fa5a4e80291bd3803e19/librt-0.8.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cc3656283d11540ab0ea01978378e73e10002145117055e03722417aeab30994", size = 263001, upload-time = "2026-02-17T16:12:43.627Z" }, + { url = "https://files.pythonhosted.org/packages/b9/eb/7697f60fbe7042ab4e88f4ee6af496b7f222fffb0a4e3593ef1f29f81652/librt-0.8.1-cp314-cp314t-win32.whl", hash = "sha256:738f08021b3142c2918c03692608baed43bc51144c29e35807682f8070ee2a3a", size = 51328, upload-time = "2026-02-17T16:12:45.148Z" }, + { url = "https://files.pythonhosted.org/packages/7c/72/34bf2eb7a15414a23e5e70ecb9440c1d3179f393d9349338a91e2781c0fb/librt-0.8.1-cp314-cp314t-win_amd64.whl", hash = "sha256:89815a22daf9c51884fb5dbe4f1ef65ee6a146e0b6a8df05f753e2e4a9359bf4", size = 58722, upload-time = "2026-02-17T16:12:46.85Z" }, + { url = "https://files.pythonhosted.org/packages/b2/c8/d148e041732d631fc76036f8b30fae4e77b027a1e95b7a84bb522481a940/librt-0.8.1-cp314-cp314t-win_arm64.whl", hash = "sha256:bf512a71a23504ed08103a13c941f763db13fb11177beb3d9244c98c29fb4a61", size = 48755, upload-time = "2026-02-17T16:12:47.943Z" }, +] + +[[package]] +name = "lxml" +version = "6.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/aa/88/262177de60548e5a2bfc46ad28232c9e9cbde697bd94132aeb80364675cb/lxml-6.0.2.tar.gz", hash = "sha256:cd79f3367bd74b317dda655dc8fcfa304d9eb6e4fb06b7168c5cf27f96e0cd62", size = 4073426, upload-time = "2025-09-22T04:04:59.287Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f3/c8/8ff2bc6b920c84355146cd1ab7d181bc543b89241cfb1ebee824a7c81457/lxml-6.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a59f5448ba2ceccd06995c95ea59a7674a10de0810f2ce90c9006f3cbc044456", size = 8661887, upload-time = "2025-09-22T04:01:17.265Z" }, + { url = "https://files.pythonhosted.org/packages/37/6f/9aae1008083bb501ef63284220ce81638332f9ccbfa53765b2b7502203cf/lxml-6.0.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e8113639f3296706fbac34a30813929e29247718e88173ad849f57ca59754924", size = 4667818, upload-time = "2025-09-22T04:01:19.688Z" }, + { url = "https://files.pythonhosted.org/packages/f1/ca/31fb37f99f37f1536c133476674c10b577e409c0a624384147653e38baf2/lxml-6.0.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a8bef9b9825fa8bc816a6e641bb67219489229ebc648be422af695f6e7a4fa7f", size = 4950807, upload-time = "2025-09-22T04:01:21.487Z" }, + { url = "https://files.pythonhosted.org/packages/da/87/f6cb9442e4bada8aab5ae7e1046264f62fdbeaa6e3f6211b93f4c0dd97f1/lxml-6.0.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:65ea18d710fd14e0186c2f973dc60bb52039a275f82d3c44a0e42b43440ea534", size = 5109179, upload-time = "2025-09-22T04:01:23.32Z" }, + { url = "https://files.pythonhosted.org/packages/c8/20/a7760713e65888db79bbae4f6146a6ae5c04e4a204a3c48896c408cd6ed2/lxml-6.0.2-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c371aa98126a0d4c739ca93ceffa0fd7a5d732e3ac66a46e74339acd4d334564", size = 5023044, upload-time = "2025-09-22T04:01:25.118Z" }, + { url = "https://files.pythonhosted.org/packages/a2/b0/7e64e0460fcb36471899f75831509098f3fd7cd02a3833ac517433cb4f8f/lxml-6.0.2-cp312-cp312-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:700efd30c0fa1a3581d80a748157397559396090a51d306ea59a70020223d16f", size = 5359685, upload-time = "2025-09-22T04:01:27.398Z" }, + { url = "https://files.pythonhosted.org/packages/b9/e1/e5df362e9ca4e2f48ed6411bd4b3a0ae737cc842e96877f5bf9428055ab4/lxml-6.0.2-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c33e66d44fe60e72397b487ee92e01da0d09ba2d66df8eae42d77b6d06e5eba0", size = 5654127, upload-time = "2025-09-22T04:01:29.629Z" }, + { url = "https://files.pythonhosted.org/packages/c6/d1/232b3309a02d60f11e71857778bfcd4acbdb86c07db8260caf7d008b08f8/lxml-6.0.2-cp312-cp312-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:90a345bbeaf9d0587a3aaffb7006aa39ccb6ff0e96a57286c0cb2fd1520ea192", size = 5253958, upload-time = "2025-09-22T04:01:31.535Z" }, + { url = "https://files.pythonhosted.org/packages/35/35/d955a070994725c4f7d80583a96cab9c107c57a125b20bb5f708fe941011/lxml-6.0.2-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:064fdadaf7a21af3ed1dcaa106b854077fbeada827c18f72aec9346847cd65d0", size = 4711541, upload-time = "2025-09-22T04:01:33.801Z" }, + { url = "https://files.pythonhosted.org/packages/1e/be/667d17363b38a78c4bd63cfd4b4632029fd68d2c2dc81f25ce9eb5224dd5/lxml-6.0.2-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fbc74f42c3525ac4ffa4b89cbdd00057b6196bcefe8bce794abd42d33a018092", size = 5267426, upload-time = "2025-09-22T04:01:35.639Z" }, + { url = "https://files.pythonhosted.org/packages/ea/47/62c70aa4a1c26569bc958c9ca86af2bb4e1f614e8c04fb2989833874f7ae/lxml-6.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6ddff43f702905a4e32bc24f3f2e2edfe0f8fde3277d481bffb709a4cced7a1f", size = 5064917, upload-time = "2025-09-22T04:01:37.448Z" }, + { url = "https://files.pythonhosted.org/packages/bd/55/6ceddaca353ebd0f1908ef712c597f8570cc9c58130dbb89903198e441fd/lxml-6.0.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:6da5185951d72e6f5352166e3da7b0dc27aa70bd1090b0eb3f7f7212b53f1bb8", size = 4788795, upload-time = "2025-09-22T04:01:39.165Z" }, + { url = "https://files.pythonhosted.org/packages/cf/e8/fd63e15da5e3fd4c2146f8bbb3c14e94ab850589beab88e547b2dbce22e1/lxml-6.0.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:57a86e1ebb4020a38d295c04fc79603c7899e0df71588043eb218722dabc087f", size = 5676759, upload-time = "2025-09-22T04:01:41.506Z" }, + { url = "https://files.pythonhosted.org/packages/76/47/b3ec58dc5c374697f5ba37412cd2728f427d056315d124dd4b61da381877/lxml-6.0.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:2047d8234fe735ab77802ce5f2297e410ff40f5238aec569ad7c8e163d7b19a6", size = 5255666, upload-time = "2025-09-22T04:01:43.363Z" }, + { url = "https://files.pythonhosted.org/packages/19/93/03ba725df4c3d72afd9596eef4a37a837ce8e4806010569bedfcd2cb68fd/lxml-6.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6f91fd2b2ea15a6800c8e24418c0775a1694eefc011392da73bc6cef2623b322", size = 5277989, upload-time = "2025-09-22T04:01:45.215Z" }, + { url = "https://files.pythonhosted.org/packages/c6/80/c06de80bfce881d0ad738576f243911fccf992687ae09fd80b734712b39c/lxml-6.0.2-cp312-cp312-win32.whl", hash = "sha256:3ae2ce7d6fedfb3414a2b6c5e20b249c4c607f72cb8d2bb7cc9c6ec7c6f4e849", size = 3611456, upload-time = "2025-09-22T04:01:48.243Z" }, + { url = "https://files.pythonhosted.org/packages/f7/d7/0cdfb6c3e30893463fb3d1e52bc5f5f99684a03c29a0b6b605cfae879cd5/lxml-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:72c87e5ee4e58a8354fb9c7c84cbf95a1c8236c127a5d1b7683f04bed8361e1f", size = 4011793, upload-time = "2025-09-22T04:01:50.042Z" }, + { url = "https://files.pythonhosted.org/packages/ea/7b/93c73c67db235931527301ed3785f849c78991e2e34f3fd9a6663ffda4c5/lxml-6.0.2-cp312-cp312-win_arm64.whl", hash = "sha256:61cb10eeb95570153e0c0e554f58df92ecf5109f75eacad4a95baa709e26c3d6", size = 3672836, upload-time = "2025-09-22T04:01:52.145Z" }, + { url = "https://files.pythonhosted.org/packages/53/fd/4e8f0540608977aea078bf6d79f128e0e2c2bba8af1acf775c30baa70460/lxml-6.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:9b33d21594afab46f37ae58dfadd06636f154923c4e8a4d754b0127554eb2e77", size = 8648494, upload-time = "2025-09-22T04:01:54.242Z" }, + { url = "https://files.pythonhosted.org/packages/5d/f4/2a94a3d3dfd6c6b433501b8d470a1960a20ecce93245cf2db1706adf6c19/lxml-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6c8963287d7a4c5c9a432ff487c52e9c5618667179c18a204bdedb27310f022f", size = 4661146, upload-time = "2025-09-22T04:01:56.282Z" }, + { url = "https://files.pythonhosted.org/packages/25/2e/4efa677fa6b322013035d38016f6ae859d06cac67437ca7dc708a6af7028/lxml-6.0.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1941354d92699fb5ffe6ed7b32f9649e43c2feb4b97205f75866f7d21aa91452", size = 4946932, upload-time = "2025-09-22T04:01:58.989Z" }, + { url = "https://files.pythonhosted.org/packages/ce/0f/526e78a6d38d109fdbaa5049c62e1d32fdd70c75fb61c4eadf3045d3d124/lxml-6.0.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bb2f6ca0ae2d983ded09357b84af659c954722bbf04dea98030064996d156048", size = 5100060, upload-time = "2025-09-22T04:02:00.812Z" }, + { url = "https://files.pythonhosted.org/packages/81/76/99de58d81fa702cc0ea7edae4f4640416c2062813a00ff24bd70ac1d9c9b/lxml-6.0.2-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb2a12d704f180a902d7fa778c6d71f36ceb7b0d317f34cdc76a5d05aa1dd1df", size = 5019000, upload-time = "2025-09-22T04:02:02.671Z" }, + { url = "https://files.pythonhosted.org/packages/b5/35/9e57d25482bc9a9882cb0037fdb9cc18f4b79d85df94fa9d2a89562f1d25/lxml-6.0.2-cp313-cp313-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:6ec0e3f745021bfed19c456647f0298d60a24c9ff86d9d051f52b509663feeb1", size = 5348496, upload-time = "2025-09-22T04:02:04.904Z" }, + { url = "https://files.pythonhosted.org/packages/a6/8e/cb99bd0b83ccc3e8f0f528e9aa1f7a9965dfec08c617070c5db8d63a87ce/lxml-6.0.2-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:846ae9a12d54e368933b9759052d6206a9e8b250291109c48e350c1f1f49d916", size = 5643779, upload-time = "2025-09-22T04:02:06.689Z" }, + { url = "https://files.pythonhosted.org/packages/d0/34/9e591954939276bb679b73773836c6684c22e56d05980e31d52a9a8deb18/lxml-6.0.2-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ef9266d2aa545d7374938fb5c484531ef5a2ec7f2d573e62f8ce722c735685fd", size = 5244072, upload-time = "2025-09-22T04:02:08.587Z" }, + { url = "https://files.pythonhosted.org/packages/8d/27/b29ff065f9aaca443ee377aff699714fcbffb371b4fce5ac4ca759e436d5/lxml-6.0.2-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:4077b7c79f31755df33b795dc12119cb557a0106bfdab0d2c2d97bd3cf3dffa6", size = 4718675, upload-time = "2025-09-22T04:02:10.783Z" }, + { url = "https://files.pythonhosted.org/packages/2b/9f/f756f9c2cd27caa1a6ef8c32ae47aadea697f5c2c6d07b0dae133c244fbe/lxml-6.0.2-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a7c5d5e5f1081955358533be077166ee97ed2571d6a66bdba6ec2f609a715d1a", size = 5255171, upload-time = "2025-09-22T04:02:12.631Z" }, + { url = "https://files.pythonhosted.org/packages/61/46/bb85ea42d2cb1bd8395484fd72f38e3389611aa496ac7772da9205bbda0e/lxml-6.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8f8d0cbd0674ee89863a523e6994ac25fd5be9c8486acfc3e5ccea679bad2679", size = 5057175, upload-time = "2025-09-22T04:02:14.718Z" }, + { url = "https://files.pythonhosted.org/packages/95/0c/443fc476dcc8e41577f0af70458c50fe299a97bb6b7505bb1ae09aa7f9ac/lxml-6.0.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:2cbcbf6d6e924c28f04a43f3b6f6e272312a090f269eff68a2982e13e5d57659", size = 4785688, upload-time = "2025-09-22T04:02:16.957Z" }, + { url = "https://files.pythonhosted.org/packages/48/78/6ef0b359d45bb9697bc5a626e1992fa5d27aa3f8004b137b2314793b50a0/lxml-6.0.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:dfb874cfa53340009af6bdd7e54ebc0d21012a60a4e65d927c2e477112e63484", size = 5660655, upload-time = "2025-09-22T04:02:18.815Z" }, + { url = "https://files.pythonhosted.org/packages/ff/ea/e1d33808f386bc1339d08c0dcada6e4712d4ed8e93fcad5f057070b7988a/lxml-6.0.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:fb8dae0b6b8b7f9e96c26fdd8121522ce5de9bb5538010870bd538683d30e9a2", size = 5247695, upload-time = "2025-09-22T04:02:20.593Z" }, + { url = "https://files.pythonhosted.org/packages/4f/47/eba75dfd8183673725255247a603b4ad606f4ae657b60c6c145b381697da/lxml-6.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:358d9adae670b63e95bc59747c72f4dc97c9ec58881d4627fe0120da0f90d314", size = 5269841, upload-time = "2025-09-22T04:02:22.489Z" }, + { url = "https://files.pythonhosted.org/packages/76/04/5c5e2b8577bc936e219becb2e98cdb1aca14a4921a12995b9d0c523502ae/lxml-6.0.2-cp313-cp313-win32.whl", hash = "sha256:e8cd2415f372e7e5a789d743d133ae474290a90b9023197fd78f32e2dc6873e2", size = 3610700, upload-time = "2025-09-22T04:02:24.465Z" }, + { url = "https://files.pythonhosted.org/packages/fe/0a/4643ccc6bb8b143e9f9640aa54e38255f9d3b45feb2cbe7ae2ca47e8782e/lxml-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:b30d46379644fbfc3ab81f8f82ae4de55179414651f110a1514f0b1f8f6cb2d7", size = 4010347, upload-time = "2025-09-22T04:02:26.286Z" }, + { url = "https://files.pythonhosted.org/packages/31/ef/dcf1d29c3f530577f61e5fe2f1bd72929acf779953668a8a47a479ae6f26/lxml-6.0.2-cp313-cp313-win_arm64.whl", hash = "sha256:13dcecc9946dca97b11b7c40d29fba63b55ab4170d3c0cf8c0c164343b9bfdcf", size = 3671248, upload-time = "2025-09-22T04:02:27.918Z" }, + { url = "https://files.pythonhosted.org/packages/03/15/d4a377b385ab693ce97b472fe0c77c2b16ec79590e688b3ccc71fba19884/lxml-6.0.2-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:b0c732aa23de8f8aec23f4b580d1e52905ef468afb4abeafd3fec77042abb6fe", size = 8659801, upload-time = "2025-09-22T04:02:30.113Z" }, + { url = "https://files.pythonhosted.org/packages/c8/e8/c128e37589463668794d503afaeb003987373c5f94d667124ffd8078bbd9/lxml-6.0.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:4468e3b83e10e0317a89a33d28f7aeba1caa4d1a6fd457d115dd4ffe90c5931d", size = 4659403, upload-time = "2025-09-22T04:02:32.119Z" }, + { url = "https://files.pythonhosted.org/packages/00/ce/74903904339decdf7da7847bb5741fc98a5451b42fc419a86c0c13d26fe2/lxml-6.0.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:abd44571493973bad4598a3be7e1d807ed45aa2adaf7ab92ab7c62609569b17d", size = 4966974, upload-time = "2025-09-22T04:02:34.155Z" }, + { url = "https://files.pythonhosted.org/packages/1f/d3/131dec79ce61c5567fecf82515bd9bc36395df42501b50f7f7f3bd065df0/lxml-6.0.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:370cd78d5855cfbffd57c422851f7d3864e6ae72d0da615fca4dad8c45d375a5", size = 5102953, upload-time = "2025-09-22T04:02:36.054Z" }, + { url = "https://files.pythonhosted.org/packages/3a/ea/a43ba9bb750d4ffdd885f2cd333572f5bb900cd2408b67fdda07e85978a0/lxml-6.0.2-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:901e3b4219fa04ef766885fb40fa516a71662a4c61b80c94d25336b4934b71c0", size = 5055054, upload-time = "2025-09-22T04:02:38.154Z" }, + { url = "https://files.pythonhosted.org/packages/60/23/6885b451636ae286c34628f70a7ed1fcc759f8d9ad382d132e1c8d3d9bfd/lxml-6.0.2-cp314-cp314-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:a4bf42d2e4cf52c28cc1812d62426b9503cdb0c87a6de81442626aa7d69707ba", size = 5352421, upload-time = "2025-09-22T04:02:40.413Z" }, + { url = "https://files.pythonhosted.org/packages/48/5b/fc2ddfc94ddbe3eebb8e9af6e3fd65e2feba4967f6a4e9683875c394c2d8/lxml-6.0.2-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2c7fdaa4d7c3d886a42534adec7cfac73860b89b4e5298752f60aa5984641a0", size = 5673684, upload-time = "2025-09-22T04:02:42.288Z" }, + { url = "https://files.pythonhosted.org/packages/29/9c/47293c58cc91769130fbf85531280e8cc7868f7fbb6d92f4670071b9cb3e/lxml-6.0.2-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:98a5e1660dc7de2200b00d53fa00bcd3c35a3608c305d45a7bbcaf29fa16e83d", size = 5252463, upload-time = "2025-09-22T04:02:44.165Z" }, + { url = "https://files.pythonhosted.org/packages/9b/da/ba6eceb830c762b48e711ded880d7e3e89fc6c7323e587c36540b6b23c6b/lxml-6.0.2-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:dc051506c30b609238d79eda75ee9cab3e520570ec8219844a72a46020901e37", size = 4698437, upload-time = "2025-09-22T04:02:46.524Z" }, + { url = "https://files.pythonhosted.org/packages/a5/24/7be3f82cb7990b89118d944b619e53c656c97dc89c28cfb143fdb7cd6f4d/lxml-6.0.2-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8799481bbdd212470d17513a54d568f44416db01250f49449647b5ab5b5dccb9", size = 5269890, upload-time = "2025-09-22T04:02:48.812Z" }, + { url = "https://files.pythonhosted.org/packages/1b/bd/dcfb9ea1e16c665efd7538fc5d5c34071276ce9220e234217682e7d2c4a5/lxml-6.0.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9261bb77c2dab42f3ecd9103951aeca2c40277701eb7e912c545c1b16e0e4917", size = 5097185, upload-time = "2025-09-22T04:02:50.746Z" }, + { url = "https://files.pythonhosted.org/packages/21/04/a60b0ff9314736316f28316b694bccbbabe100f8483ad83852d77fc7468e/lxml-6.0.2-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:65ac4a01aba353cfa6d5725b95d7aed6356ddc0a3cd734de00124d285b04b64f", size = 4745895, upload-time = "2025-09-22T04:02:52.968Z" }, + { url = "https://files.pythonhosted.org/packages/d6/bd/7d54bd1846e5a310d9c715921c5faa71cf5c0853372adf78aee70c8d7aa2/lxml-6.0.2-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:b22a07cbb82fea98f8a2fd814f3d1811ff9ed76d0fc6abc84eb21527596e7cc8", size = 5695246, upload-time = "2025-09-22T04:02:54.798Z" }, + { url = "https://files.pythonhosted.org/packages/fd/32/5643d6ab947bc371da21323acb2a6e603cedbe71cb4c99c8254289ab6f4e/lxml-6.0.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:d759cdd7f3e055d6bc8d9bec3ad905227b2e4c785dc16c372eb5b5e83123f48a", size = 5260797, upload-time = "2025-09-22T04:02:57.058Z" }, + { url = "https://files.pythonhosted.org/packages/33/da/34c1ec4cff1eea7d0b4cd44af8411806ed943141804ac9c5d565302afb78/lxml-6.0.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:945da35a48d193d27c188037a05fec5492937f66fb1958c24fc761fb9d40d43c", size = 5277404, upload-time = "2025-09-22T04:02:58.966Z" }, + { url = "https://files.pythonhosted.org/packages/82/57/4eca3e31e54dc89e2c3507e1cd411074a17565fa5ffc437c4ae0a00d439e/lxml-6.0.2-cp314-cp314-win32.whl", hash = "sha256:be3aaa60da67e6153eb15715cc2e19091af5dc75faef8b8a585aea372507384b", size = 3670072, upload-time = "2025-09-22T04:03:38.05Z" }, + { url = "https://files.pythonhosted.org/packages/e3/e0/c96cf13eccd20c9421ba910304dae0f619724dcf1702864fd59dd386404d/lxml-6.0.2-cp314-cp314-win_amd64.whl", hash = "sha256:fa25afbadead523f7001caf0c2382afd272c315a033a7b06336da2637d92d6ed", size = 4080617, upload-time = "2025-09-22T04:03:39.835Z" }, + { url = "https://files.pythonhosted.org/packages/d5/5d/b3f03e22b3d38d6f188ef044900a9b29b2fe0aebb94625ce9fe244011d34/lxml-6.0.2-cp314-cp314-win_arm64.whl", hash = "sha256:063eccf89df5b24e361b123e257e437f9e9878f425ee9aae3144c77faf6da6d8", size = 3754930, upload-time = "2025-09-22T04:03:41.565Z" }, + { url = "https://files.pythonhosted.org/packages/5e/5c/42c2c4c03554580708fc738d13414801f340c04c3eff90d8d2d227145275/lxml-6.0.2-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:6162a86d86893d63084faaf4ff937b3daea233e3682fb4474db07395794fa80d", size = 8910380, upload-time = "2025-09-22T04:03:01.645Z" }, + { url = "https://files.pythonhosted.org/packages/bf/4f/12df843e3e10d18d468a7557058f8d3733e8b6e12401f30b1ef29360740f/lxml-6.0.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:414aaa94e974e23a3e92e7ca5b97d10c0cf37b6481f50911032c69eeb3991bba", size = 4775632, upload-time = "2025-09-22T04:03:03.814Z" }, + { url = "https://files.pythonhosted.org/packages/e4/0c/9dc31e6c2d0d418483cbcb469d1f5a582a1cd00a1f4081953d44051f3c50/lxml-6.0.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:48461bd21625458dd01e14e2c38dd0aea69addc3c4f960c30d9f59d7f93be601", size = 4975171, upload-time = "2025-09-22T04:03:05.651Z" }, + { url = "https://files.pythonhosted.org/packages/e7/2b/9b870c6ca24c841bdd887504808f0417aa9d8d564114689266f19ddf29c8/lxml-6.0.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:25fcc59afc57d527cfc78a58f40ab4c9b8fd096a9a3f964d2781ffb6eb33f4ed", size = 5110109, upload-time = "2025-09-22T04:03:07.452Z" }, + { url = "https://files.pythonhosted.org/packages/bf/0c/4f5f2a4dd319a178912751564471355d9019e220c20d7db3fb8307ed8582/lxml-6.0.2-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5179c60288204e6ddde3f774a93350177e08876eaf3ab78aa3a3649d43eb7d37", size = 5041061, upload-time = "2025-09-22T04:03:09.297Z" }, + { url = "https://files.pythonhosted.org/packages/12/64/554eed290365267671fe001a20d72d14f468ae4e6acef1e179b039436967/lxml-6.0.2-cp314-cp314t-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:967aab75434de148ec80597b75062d8123cadf2943fb4281f385141e18b21338", size = 5306233, upload-time = "2025-09-22T04:03:11.651Z" }, + { url = "https://files.pythonhosted.org/packages/7a/31/1d748aa275e71802ad9722df32a7a35034246b42c0ecdd8235412c3396ef/lxml-6.0.2-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d100fcc8930d697c6561156c6810ab4a508fb264c8b6779e6e61e2ed5e7558f9", size = 5604739, upload-time = "2025-09-22T04:03:13.592Z" }, + { url = "https://files.pythonhosted.org/packages/8f/41/2c11916bcac09ed561adccacceaedd2bf0e0b25b297ea92aab99fd03d0fa/lxml-6.0.2-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ca59e7e13e5981175b8b3e4ab84d7da57993eeff53c07764dcebda0d0e64ecd", size = 5225119, upload-time = "2025-09-22T04:03:15.408Z" }, + { url = "https://files.pythonhosted.org/packages/99/05/4e5c2873d8f17aa018e6afde417c80cc5d0c33be4854cce3ef5670c49367/lxml-6.0.2-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:957448ac63a42e2e49531b9d6c0fa449a1970dbc32467aaad46f11545be9af1d", size = 4633665, upload-time = "2025-09-22T04:03:17.262Z" }, + { url = "https://files.pythonhosted.org/packages/0f/c9/dcc2da1bebd6275cdc723b515f93edf548b82f36a5458cca3578bc899332/lxml-6.0.2-cp314-cp314t-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b7fc49c37f1786284b12af63152fe1d0990722497e2d5817acfe7a877522f9a9", size = 5234997, upload-time = "2025-09-22T04:03:19.14Z" }, + { url = "https://files.pythonhosted.org/packages/9c/e2/5172e4e7468afca64a37b81dba152fc5d90e30f9c83c7c3213d6a02a5ce4/lxml-6.0.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e19e0643cc936a22e837f79d01a550678da8377d7d801a14487c10c34ee49c7e", size = 5090957, upload-time = "2025-09-22T04:03:21.436Z" }, + { url = "https://files.pythonhosted.org/packages/a5/b3/15461fd3e5cd4ddcb7938b87fc20b14ab113b92312fc97afe65cd7c85de1/lxml-6.0.2-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:1db01e5cf14345628e0cbe71067204db658e2fb8e51e7f33631f5f4735fefd8d", size = 4764372, upload-time = "2025-09-22T04:03:23.27Z" }, + { url = "https://files.pythonhosted.org/packages/05/33/f310b987c8bf9e61c4dd8e8035c416bd3230098f5e3cfa69fc4232de7059/lxml-6.0.2-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:875c6b5ab39ad5291588aed6925fac99d0097af0dd62f33c7b43736043d4a2ec", size = 5634653, upload-time = "2025-09-22T04:03:25.767Z" }, + { url = "https://files.pythonhosted.org/packages/70/ff/51c80e75e0bc9382158133bdcf4e339b5886c6ee2418b5199b3f1a61ed6d/lxml-6.0.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:cdcbed9ad19da81c480dfd6dd161886db6096083c9938ead313d94b30aadf272", size = 5233795, upload-time = "2025-09-22T04:03:27.62Z" }, + { url = "https://files.pythonhosted.org/packages/56/4d/4856e897df0d588789dd844dbed9d91782c4ef0b327f96ce53c807e13128/lxml-6.0.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:80dadc234ebc532e09be1975ff538d154a7fa61ea5031c03d25178855544728f", size = 5257023, upload-time = "2025-09-22T04:03:30.056Z" }, + { url = "https://files.pythonhosted.org/packages/0f/85/86766dfebfa87bea0ab78e9ff7a4b4b45225df4b4d3b8cc3c03c5cd68464/lxml-6.0.2-cp314-cp314t-win32.whl", hash = "sha256:da08e7bb297b04e893d91087df19638dc7a6bb858a954b0cc2b9f5053c922312", size = 3911420, upload-time = "2025-09-22T04:03:32.198Z" }, + { url = "https://files.pythonhosted.org/packages/fe/1a/b248b355834c8e32614650b8008c69ffeb0ceb149c793961dd8c0b991bb3/lxml-6.0.2-cp314-cp314t-win_amd64.whl", hash = "sha256:252a22982dca42f6155125ac76d3432e548a7625d56f5a273ee78a5057216eca", size = 4406837, upload-time = "2025-09-22T04:03:34.027Z" }, + { url = "https://files.pythonhosted.org/packages/92/aa/df863bcc39c5e0946263454aba394de8a9084dbaff8ad143846b0d844739/lxml-6.0.2-cp314-cp314t-win_arm64.whl", hash = "sha256:bb4c1847b303835d89d785a18801a883436cdfd5dc3d62947f9c49e24f0f5a2c", size = 3822205, upload-time = "2025-09-22T04:03:36.249Z" }, +] + +[[package]] +name = "markdown-it-py" +version = "4.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mdurl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" }, +] + +[[package]] +name = "mdurl" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, +] + +[[package]] +name = "msoffcrypto-tool" +version = "6.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography" }, + { name = "olefile" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a6/34/6250bdddaeaae24098e45449ea362fb3555a65fba30cad0ad5630ea48d1a/msoffcrypto_tool-6.0.0.tar.gz", hash = "sha256:9a5ebc4c0096b42e5d7ebc2350afdc92dc511061e935ca188468094fdd032bbe", size = 40593, upload-time = "2026-01-12T08:59:56.73Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3c/85/9e359fa9279e1d6861faaf9b6f037a3226374deb20a054c3937be6992013/msoffcrypto_tool-6.0.0-py3-none-any.whl", hash = "sha256:46c394ed5d9641e802fc79bf3fb0666a53748b23fa8c4aa634ae9d30d46fe397", size = 48791, upload-time = "2026-01-12T08:59:55.394Z" }, +] + +[[package]] +name = "mypy" +version = "1.20.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "librt", marker = "platform_python_implementation != 'PyPy'" }, + { name = "mypy-extensions" }, + { name = "pathspec" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f8/5c/b0089fe7fef0a994ae5ee07029ced0526082c6cfaaa4c10d40a10e33b097/mypy-1.20.0.tar.gz", hash = "sha256:eb96c84efcc33f0b5e0e04beacf00129dd963b67226b01c00b9dfc8affb464c3", size = 3815028, upload-time = "2026-03-31T16:55:14.959Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/be/dd/3afa29b58c2e57c79116ed55d700721c3c3b15955e2b6251dd165d377c0e/mypy-1.20.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:002b613ae19f4ac7d18b7e168ffe1cb9013b37c57f7411984abbd3b817b0a214", size = 14509525, upload-time = "2026-03-31T16:55:01.824Z" }, + { url = "https://files.pythonhosted.org/packages/54/eb/227b516ab8cad9f2a13c5e7a98d28cd6aa75e9c83e82776ae6c1c4c046c7/mypy-1.20.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a9336b5e6712f4adaf5afc3203a99a40b379049104349d747eb3e5a3aa23ac2e", size = 13326469, upload-time = "2026-03-31T16:51:41.23Z" }, + { url = "https://files.pythonhosted.org/packages/57/d4/1ddb799860c1b5ac6117ec307b965f65deeb47044395ff01ab793248a591/mypy-1.20.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f13b3e41bce9d257eded794c0f12878af3129d80aacd8a3ee0dee51f3a978651", size = 13705953, upload-time = "2026-03-31T16:48:55.69Z" }, + { url = "https://files.pythonhosted.org/packages/c5/b7/54a720f565a87b893182a2a393370289ae7149e4715859e10e1c05e49154/mypy-1.20.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9804c3ad27f78e54e58b32e7cb532d128b43dbfb9f3f9f06262b821a0f6bd3f5", size = 14710363, upload-time = "2026-03-31T16:53:26.948Z" }, + { url = "https://files.pythonhosted.org/packages/b2/2a/74810274848d061f8a8ea4ac23aaad43bd3d8c1882457999c2e568341c57/mypy-1.20.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:697f102c5c1d526bdd761a69f17c6070f9892eebcb94b1a5963d679288c09e78", size = 14947005, upload-time = "2026-03-31T16:50:17.591Z" }, + { url = "https://files.pythonhosted.org/packages/77/91/21b8ba75f958bcda75690951ce6fa6b7138b03471618959529d74b8544e2/mypy-1.20.0-cp312-cp312-win_amd64.whl", hash = "sha256:0ecd63f75fdd30327e4ad8b5704bd6d91fc6c1b2e029f8ee14705e1207212489", size = 10880616, upload-time = "2026-03-31T16:52:19.986Z" }, + { url = "https://files.pythonhosted.org/packages/8a/15/3d8198ef97c1ca03aea010cce4f1d4f3bc5d9849e8c0140111ca2ead9fdd/mypy-1.20.0-cp312-cp312-win_arm64.whl", hash = "sha256:f194db59657c58593a3c47c6dfd7bad4ef4ac12dbc94d01b3a95521f78177e33", size = 9813091, upload-time = "2026-03-31T16:53:44.385Z" }, + { url = "https://files.pythonhosted.org/packages/d6/a7/f64ea7bd592fa431cb597418b6dec4a47f7d0c36325fec7ac67bc8402b94/mypy-1.20.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b20c8b0fd5877abdf402e79a3af987053de07e6fb208c18df6659f708b535134", size = 14485344, upload-time = "2026-03-31T16:49:16.78Z" }, + { url = "https://files.pythonhosted.org/packages/bb/72/8927d84cfc90c6abea6e96663576e2e417589347eb538749a464c4c218a0/mypy-1.20.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:367e5c993ba34d5054d11937d0485ad6dfc60ba760fa326c01090fc256adf15c", size = 13327400, upload-time = "2026-03-31T16:53:08.02Z" }, + { url = "https://files.pythonhosted.org/packages/ab/4a/11ab99f9afa41aa350178d24a7d2da17043228ea10f6456523f64b5a6cf6/mypy-1.20.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f799d9db89fc00446f03281f84a221e50018fc40113a3ba9864b132895619ebe", size = 13706384, upload-time = "2026-03-31T16:52:28.577Z" }, + { url = "https://files.pythonhosted.org/packages/42/79/694ca73979cfb3535ebfe78733844cd5aff2e63304f59bf90585110d975a/mypy-1.20.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:555658c611099455b2da507582ea20d2043dfdfe7f5ad0add472b1c6238b433f", size = 14700378, upload-time = "2026-03-31T16:48:45.527Z" }, + { url = "https://files.pythonhosted.org/packages/84/24/a022ccab3a46e3d2cdf2e0e260648633640eb396c7e75d5a42818a8d3971/mypy-1.20.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:efe8d70949c3023698c3fca1e94527e7e790a361ab8116f90d11221421cd8726", size = 14932170, upload-time = "2026-03-31T16:49:36.038Z" }, + { url = "https://files.pythonhosted.org/packages/d8/9b/549228d88f574d04117e736f55958bd4908f980f9f5700a07aeb85df005b/mypy-1.20.0-cp313-cp313-win_amd64.whl", hash = "sha256:f49590891d2c2f8a9de15614e32e459a794bcba84693c2394291a2038bbaaa69", size = 10888526, upload-time = "2026-03-31T16:50:59.827Z" }, + { url = "https://files.pythonhosted.org/packages/91/17/15095c0e54a8bc04d22d4ff06b2139d5f142c2e87520b4e39010c4862771/mypy-1.20.0-cp313-cp313-win_arm64.whl", hash = "sha256:76a70bf840495729be47510856b978f1b0ec7d08f257ca38c9d932720bf6b43e", size = 9816456, upload-time = "2026-03-31T16:49:59.537Z" }, + { url = "https://files.pythonhosted.org/packages/4e/0e/6ca4a84cbed9e62384bc0b2974c90395ece5ed672393e553996501625fc5/mypy-1.20.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:0f42dfaab7ec1baff3b383ad7af562ab0de573c5f6edb44b2dab016082b89948", size = 14483331, upload-time = "2026-03-31T16:52:57.999Z" }, + { url = "https://files.pythonhosted.org/packages/7d/c5/5fe9d8a729dd9605064691816243ae6c49fde0bd28f6e5e17f6a24203c43/mypy-1.20.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:31b5dbb55293c1bd27c0fc813a0d2bb5ceef9d65ac5afa2e58f829dab7921fd5", size = 13342047, upload-time = "2026-03-31T16:54:21.555Z" }, + { url = "https://files.pythonhosted.org/packages/4c/33/e18bcfa338ca4e6b2771c85d4c5203e627d0c69d9de5c1a2cf2ba13320ba/mypy-1.20.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:49d11c6f573a5a08f77fad13faff2139f6d0730ebed2cfa9b3d2702671dd7188", size = 13719585, upload-time = "2026-03-31T16:51:53.89Z" }, + { url = "https://files.pythonhosted.org/packages/6b/8d/93491ff7b79419edc7eabf95cb3b3f7490e2e574b2855c7c7e7394ff933f/mypy-1.20.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7d3243c406773185144527f83be0e0aefc7bf4601b0b2b956665608bf7c98a83", size = 14685075, upload-time = "2026-03-31T16:54:04.464Z" }, + { url = "https://files.pythonhosted.org/packages/b5/9d/d924b38a4923f8d164bf2b4ec98bf13beaf6e10a5348b4b137eadae40a6e/mypy-1.20.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:a79c1eba7ac4209f2d850f0edd0a2f8bba88cbfdfefe6fb76a19e9d4fe5e71a2", size = 14919141, upload-time = "2026-03-31T16:54:51.785Z" }, + { url = "https://files.pythonhosted.org/packages/59/98/1da9977016678c0b99d43afe52ed00bb3c1a0c4c995d3e6acca1a6ebb9b4/mypy-1.20.0-cp314-cp314-win_amd64.whl", hash = "sha256:00e047c74d3ec6e71a2eb88e9ea551a2edb90c21f993aefa9e0d2a898e0bb732", size = 11050925, upload-time = "2026-03-31T16:51:30.758Z" }, + { url = "https://files.pythonhosted.org/packages/5e/e3/ba0b7a3143e49a9c4f5967dde6ea4bf8e0b10ecbbcca69af84027160ee89/mypy-1.20.0-cp314-cp314-win_arm64.whl", hash = "sha256:931a7630bba591593dcf6e97224a21ff80fb357e7982628d25e3c618e7f598ef", size = 10001089, upload-time = "2026-03-31T16:49:43.632Z" }, + { url = "https://files.pythonhosted.org/packages/12/28/e617e67b3be9d213cda7277913269c874eb26472489f95d09d89765ce2d8/mypy-1.20.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:26c8b52627b6552f47ff11adb4e1509605f094e29815323e487fc0053ebe93d1", size = 15534710, upload-time = "2026-03-31T16:52:12.506Z" }, + { url = "https://files.pythonhosted.org/packages/6e/0c/3b5f2d3e45dc7169b811adce8451679d9430399d03b168f9b0489f43adaa/mypy-1.20.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:39362cdb4ba5f916e7976fccecaab1ba3a83e35f60fa68b64e9a70e221bb2436", size = 14393013, upload-time = "2026-03-31T16:54:41.186Z" }, + { url = "https://files.pythonhosted.org/packages/a3/49/edc8b0aa145cc09c1c74f7ce2858eead9329931dcbbb26e2ad40906daa4e/mypy-1.20.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:34506397dbf40c15dc567635d18a21d33827e9ab29014fb83d292a8f4f8953b6", size = 15047240, upload-time = "2026-03-31T16:54:31.955Z" }, + { url = "https://files.pythonhosted.org/packages/42/37/a946bb416e37a57fa752b3100fd5ede0e28df94f92366d1716555d47c454/mypy-1.20.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:555493c44a4f5a1b58d611a43333e71a9981c6dbe26270377b6f8174126a0526", size = 15858565, upload-time = "2026-03-31T16:53:36.997Z" }, + { url = "https://files.pythonhosted.org/packages/2f/99/7690b5b5b552db1bd4ff362e4c0eb3107b98d680835e65823fbe888c8b78/mypy-1.20.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:2721f0ce49cb74a38f00c50da67cb7d36317b5eda38877a49614dc018e91c787", size = 16087874, upload-time = "2026-03-31T16:52:48.313Z" }, + { url = "https://files.pythonhosted.org/packages/aa/76/53e893a498138066acd28192b77495c9357e5a58cc4be753182846b43315/mypy-1.20.0-cp314-cp314t-win_amd64.whl", hash = "sha256:47781555a7aa5fedcc2d16bcd72e0dc83eb272c10dd657f9fb3f9cc08e2e6abb", size = 12572380, upload-time = "2026-03-31T16:49:52.454Z" }, + { url = "https://files.pythonhosted.org/packages/76/9c/6dbdae21f01b7aacddc2c0bbf3c5557aa547827fdf271770fe1e521e7093/mypy-1.20.0-cp314-cp314t-win_arm64.whl", hash = "sha256:c70380fe5d64010f79fb863b9081c7004dd65225d2277333c219d93a10dad4dd", size = 10381174, upload-time = "2026-03-31T16:51:20.179Z" }, + { url = "https://files.pythonhosted.org/packages/21/66/4d734961ce167f0fd8380769b3b7c06dbdd6ff54c2190f3f2ecd22528158/mypy-1.20.0-py3-none-any.whl", hash = "sha256:a6e0641147cbfa7e4e94efdb95c2dab1aff8cfc159ded13e07f308ddccc8c48e", size = 2636365, upload-time = "2026-03-31T16:51:44.911Z" }, +] + +[[package]] +name = "mypy-extensions" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, +] + +[[package]] +name = "olefile" +version = "0.47" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/69/1b/077b508e3e500e1629d366249c3ccb32f95e50258b231705c09e3c7a4366/olefile-0.47.zip", hash = "sha256:599383381a0bf3dfbd932ca0ca6515acd174ed48870cbf7fee123d698c192c1c", size = 112240, upload-time = "2023-12-01T16:22:53.025Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/17/d3/b64c356a907242d719fc668b71befd73324e47ab46c8ebbbede252c154b2/olefile-0.47-py2.py3-none-any.whl", hash = "sha256:543c7da2a7adadf21214938bb79c83ea12b473a4b6ee4ad4bf854e7715e13d1f", size = 114565, upload-time = "2023-12-01T16:22:51.518Z" }, +] + +[[package]] +name = "oletools" +version = "0.60.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorclass" }, + { name = "easygui" }, + { name = "msoffcrypto-tool", marker = "(platform_python_implementation != 'PyPy' and sys_platform == 'darwin') or (platform_python_implementation != 'PyPy' and sys_platform == 'win32') or (sys_platform != 'darwin' and sys_platform != 'win32')" }, + { name = "olefile" }, + { name = "pcodedmp" }, + { name = "pyparsing" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5c/2f/037f40e44706d542b94a2312ccc33ee2701ebfc9a83b46b55263d49ce55a/oletools-0.60.2.zip", hash = "sha256:ad452099f4695ffd8855113f453348200d195ee9fa341a09e197d66ee7e0b2c3", size = 3433750, upload-time = "2024-07-02T14:50:38.242Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ac/ff/05257b7183279b80ecec6333744de23f48f0faeeba46c93e6d13ce835515/oletools-0.60.2-py2.py3-none-any.whl", hash = "sha256:72ad8bd748fd0c4e7b5b4733af770d11543ebb2bf2697455f99f975fcd50cc96", size = 989449, upload-time = "2024-07-02T14:50:29.122Z" }, +] + +[[package]] +name = "openpyxl" +version = "3.1.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "et-xmlfile" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464, upload-time = "2024-06-28T14:03:44.161Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c0/da/977ded879c29cbd04de313843e76868e6e13408a94ed6b987245dc7c8506/openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2", size = 250910, upload-time = "2024-06-28T14:03:41.161Z" }, +] + +[[package]] +name = "orjson" +version = "3.11.8" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9d/1b/2024d06792d0779f9dbc51531b61c24f76c75b9f4ce05e6f3377a1814cea/orjson-3.11.8.tar.gz", hash = "sha256:96163d9cdc5a202703e9ad1b9ae757d5f0ca62f4fa0cc93d1f27b0e180cc404e", size = 5603832, upload-time = "2026-03-31T16:16:27.878Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/01/f6/8d58b32ab32d9215973a1688aebd098252ee8af1766c0e4e36e7831f0295/orjson-3.11.8-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:1cd0b77e77c95758f8e1100139844e99f3ccc87e71e6fc8e1c027e55807c549f", size = 229233, upload-time = "2026-03-31T16:15:12.762Z" }, + { url = "https://files.pythonhosted.org/packages/a9/8b/2ffe35e71f6b92622e8ea4607bf33ecf7dfb51b3619dcfabfd36cbe2d0a5/orjson-3.11.8-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:6a3d159d5ffa0e3961f353c4b036540996bf8b9697ccc38261c0eac1fd3347a6", size = 128772, upload-time = "2026-03-31T16:15:14.237Z" }, + { url = "https://files.pythonhosted.org/packages/27/d2/1f8682ae50d5c6897a563cb96bc106da8c9cb5b7b6e81a52e4cc086679b9/orjson-3.11.8-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76070a76e9c5ae661e2d9848f216980d8d533e0f8143e6ed462807b242e3c5e8", size = 131946, upload-time = "2026-03-31T16:15:15.607Z" }, + { url = "https://files.pythonhosted.org/packages/52/4b/5500f76f0eece84226e0689cb48dcde081104c2fa6e2483d17ca13685ffb/orjson-3.11.8-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:54153d21520a71a4c82a0dbb4523e468941d549d221dc173de0f019678cf3813", size = 130368, upload-time = "2026-03-31T16:15:17.066Z" }, + { url = "https://files.pythonhosted.org/packages/da/4e/58b927e08fbe9840e6c920d9e299b051ea667463b1f39a56e668669f8508/orjson-3.11.8-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:469ac2125611b7c5741a0b3798cd9e5786cbad6345f9f400c77212be89563bec", size = 135540, upload-time = "2026-03-31T16:15:18.404Z" }, + { url = "https://files.pythonhosted.org/packages/56/7c/ba7cb871cba1bcd5cd02ee34f98d894c6cea96353ad87466e5aef2429c60/orjson-3.11.8-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:14778ffd0f6896aa613951a7fbf4690229aa7a543cb2bfbe9f358e08aafa9546", size = 146877, upload-time = "2026-03-31T16:15:19.833Z" }, + { url = "https://files.pythonhosted.org/packages/0b/5d/eb9c25fc1386696c6a342cd361c306452c75e0b55e86ad602dd4827a7fd7/orjson-3.11.8-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ea56a955056a6d6c550cf18b3348656a9d9a4f02e2d0c02cabf3c73f1055d506", size = 132837, upload-time = "2026-03-31T16:15:21.282Z" }, + { url = "https://files.pythonhosted.org/packages/37/87/5ddeb7fc1fbd9004aeccab08426f34c81a5b4c25c7061281862b015fce2b/orjson-3.11.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:53a0f57e59a530d18a142f4d4ba6dfc708dc5fdedce45e98ff06b44930a2a48f", size = 133624, upload-time = "2026-03-31T16:15:22.641Z" }, + { url = "https://files.pythonhosted.org/packages/22/09/90048793db94ee4b2fcec4ac8e5ddb077367637d6650be896b3494b79bb7/orjson-3.11.8-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9b48e274f8824567d74e2158199e269597edf00823a1b12b63d48462bbf5123e", size = 141904, upload-time = "2026-03-31T16:15:24.435Z" }, + { url = "https://files.pythonhosted.org/packages/c0/cf/eb284847487821a5d415e54149a6449ba9bfc5872ce63ab7be41b8ec401c/orjson-3.11.8-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:3f262401086a3960586af06c054609365e98407151f5ea24a62893a40d80dbbb", size = 423742, upload-time = "2026-03-31T16:15:26.155Z" }, + { url = "https://files.pythonhosted.org/packages/44/09/e12423d327071c851c13e76936f144a96adacfc037394dec35ac3fc8d1e8/orjson-3.11.8-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:8e8c6218b614badf8e229b697865df4301afa74b791b6c9ade01d19a9953a942", size = 147806, upload-time = "2026-03-31T16:15:27.909Z" }, + { url = "https://files.pythonhosted.org/packages/b3/6d/37c2589ba864e582ffe7611643314785c6afb1f83c701654ef05daa8fcc7/orjson-3.11.8-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:093d489fa039ddade2db541097dbb484999fcc65fc2b0ff9819141e2ab364f25", size = 136485, upload-time = "2026-03-31T16:15:29.749Z" }, + { url = "https://files.pythonhosted.org/packages/be/c9/135194a02ab76b04ed9a10f68624b7ebd238bbe55548878b11ff15a0f352/orjson-3.11.8-cp312-cp312-win32.whl", hash = "sha256:e0950ed1bcb9893f4293fd5c5a7ee10934fbf82c4101c70be360db23ce24b7d2", size = 131966, upload-time = "2026-03-31T16:15:31.687Z" }, + { url = "https://files.pythonhosted.org/packages/ed/9a/9796f8fbe3cf30ce9cb696748dbb535e5c87be4bf4fe2e9ca498ef1fa8cf/orjson-3.11.8-cp312-cp312-win_amd64.whl", hash = "sha256:3cf17c141617b88ced4536b2135c552490f07799f6ad565948ea07bef0dcb9a6", size = 127441, upload-time = "2026-03-31T16:15:33.333Z" }, + { url = "https://files.pythonhosted.org/packages/cc/47/5aaf54524a7a4a0dd09dd778f3fa65dd2108290615b652e23d944152bc8e/orjson-3.11.8-cp312-cp312-win_arm64.whl", hash = "sha256:48854463b0572cc87dac7d981aa72ed8bf6deedc0511853dc76b8bbd5482d36d", size = 127364, upload-time = "2026-03-31T16:15:34.748Z" }, + { url = "https://files.pythonhosted.org/packages/66/7f/95fba509bb2305fab0073558f1e8c3a2ec4b2afe58ed9fcb7d3b8beafe94/orjson-3.11.8-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:3f23426851d98478c8970da5991f84784a76682213cd50eb73a1da56b95239dc", size = 229180, upload-time = "2026-03-31T16:15:36.426Z" }, + { url = "https://files.pythonhosted.org/packages/f6/9d/b237215c743ca073697d759b5503abd2cb8a0d7b9c9e21f524bcf176ab66/orjson-3.11.8-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:ebaed4cef74a045b83e23537b52ef19a367c7e3f536751e355a2a394f8648559", size = 128754, upload-time = "2026-03-31T16:15:38.049Z" }, + { url = "https://files.pythonhosted.org/packages/42/3d/27d65b6d11e63f133781425f132807aef793ed25075fec686fc8e46dd528/orjson-3.11.8-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:97c8f5d3b62380b70c36ffacb2a356b7c6becec86099b177f73851ba095ef623", size = 131877, upload-time = "2026-03-31T16:15:39.484Z" }, + { url = "https://files.pythonhosted.org/packages/dd/cc/faee30cd8f00421999e40ef0eba7332e3a625ce91a58200a2f52c7fef235/orjson-3.11.8-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:436c4922968a619fb7fef1ccd4b8b3a76c13b67d607073914d675026e911a65c", size = 130361, upload-time = "2026-03-31T16:15:41.274Z" }, + { url = "https://files.pythonhosted.org/packages/5c/bb/a6c55896197f97b6d4b4e7c7fd77e7235517c34f5d6ad5aadd43c54c6d7c/orjson-3.11.8-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1ab359aff0436d80bfe8a23b46b5fea69f1e18aaf1760a709b4787f1318b317f", size = 135521, upload-time = "2026-03-31T16:15:42.758Z" }, + { url = "https://files.pythonhosted.org/packages/9c/7c/ca3a3525aa32ff636ebb1778e77e3587b016ab2edb1b618b36ba96f8f2c0/orjson-3.11.8-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f89b6d0b3a8d81e1929d3ab3d92bbc225688bd80a770c49432543928fe09ac55", size = 146862, upload-time = "2026-03-31T16:15:44.341Z" }, + { url = "https://files.pythonhosted.org/packages/3c/0c/18a9d7f18b5edd37344d1fd5be17e94dc652c67826ab749c6e5948a78112/orjson-3.11.8-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:29c009e7a2ca9ad0ed1376ce20dd692146a5d9fe4310848904b6b4fee5c5c137", size = 132847, upload-time = "2026-03-31T16:15:46.368Z" }, + { url = "https://files.pythonhosted.org/packages/23/91/7e722f352ad67ca573cee44de2a58fb810d0f4eb4e33276c6a557979fd8a/orjson-3.11.8-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:705b895b781b3e395c067129d8551655642dfe9437273211d5404e87ac752b53", size = 133637, upload-time = "2026-03-31T16:15:48.123Z" }, + { url = "https://files.pythonhosted.org/packages/af/04/32845ce13ac5bd1046ddb02ac9432ba856cc35f6d74dde95864fe0ad5523/orjson-3.11.8-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:88006eda83858a9fdf73985ce3804e885c2befb2f506c9a3723cdeb5a2880e3e", size = 141906, upload-time = "2026-03-31T16:15:49.626Z" }, + { url = "https://files.pythonhosted.org/packages/02/5e/c551387ddf2d7106d9039369862245c85738b828844d13b99ccb8d61fd06/orjson-3.11.8-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:55120759e61309af7fcf9e961c6f6af3dde5921cdb3ee863ef63fd9db126cae6", size = 423722, upload-time = "2026-03-31T16:15:51.176Z" }, + { url = "https://files.pythonhosted.org/packages/00/a3/ecfe62434096f8a794d4976728cb59bcfc4a643977f21c2040545d37eb4c/orjson-3.11.8-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:98bdc6cb889d19bed01de46e67574a2eab61f5cc6b768ed50e8ac68e9d6ffab6", size = 147801, upload-time = "2026-03-31T16:15:52.939Z" }, + { url = "https://files.pythonhosted.org/packages/18/6d/0dce10b9f6643fdc59d99333871a38fa5a769d8e2fc34a18e5d2bfdee900/orjson-3.11.8-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:708c95f925a43ab9f34625e45dcdadf09ec8a6e7b664a938f2f8d5650f6c090b", size = 136460, upload-time = "2026-03-31T16:15:54.431Z" }, + { url = "https://files.pythonhosted.org/packages/01/d6/6dde4f31842d87099238f1f07b459d24edc1a774d20687187443ab044191/orjson-3.11.8-cp313-cp313-win32.whl", hash = "sha256:01c4e5a6695dc09098f2e6468a251bc4671c50922d4d745aff1a0a33a0cf5b8d", size = 131956, upload-time = "2026-03-31T16:15:56.081Z" }, + { url = "https://files.pythonhosted.org/packages/c1/f9/4e494a56e013db957fb77186b818b916d4695b8fa2aa612364974160e91b/orjson-3.11.8-cp313-cp313-win_amd64.whl", hash = "sha256:c154a35dd1330707450bb4d4e7dd1f17fa6f42267a40c1e8a1daa5e13719b4b8", size = 127410, upload-time = "2026-03-31T16:15:57.54Z" }, + { url = "https://files.pythonhosted.org/packages/57/7f/803203d00d6edb6e9e7eef421d4e1adbb5ea973e40b3533f3cfd9aeb374e/orjson-3.11.8-cp313-cp313-win_arm64.whl", hash = "sha256:4861bde57f4d253ab041e374f44023460e60e71efaa121f3c5f0ed457c3a701e", size = 127338, upload-time = "2026-03-31T16:15:59.106Z" }, + { url = "https://files.pythonhosted.org/packages/6d/35/b01910c3d6b85dc882442afe5060cbf719c7d1fc85749294beda23d17873/orjson-3.11.8-cp314-cp314-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:ec795530a73c269a55130498842aaa762e4a939f6ce481a7e986eeaa790e9da4", size = 229171, upload-time = "2026-03-31T16:16:00.651Z" }, + { url = "https://files.pythonhosted.org/packages/c2/56/c9ec97bd11240abef39b9e5d99a15462809c45f677420fd148a6c5e6295e/orjson-3.11.8-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:c492a0e011c0f9066e9ceaa896fbc5b068c54d365fea5f3444b697ee01bc8625", size = 128746, upload-time = "2026-03-31T16:16:02.673Z" }, + { url = "https://files.pythonhosted.org/packages/3b/e4/66d4f30a90de45e2f0cbd9623588e8ae71eef7679dbe2ae954ed6d66a41f/orjson-3.11.8-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:883206d55b1bd5f5679ad5e6ddd3d1a5e3cac5190482927fdb8c78fb699193b5", size = 131867, upload-time = "2026-03-31T16:16:04.342Z" }, + { url = "https://files.pythonhosted.org/packages/19/30/2a645fc9286b928675e43fa2a3a16fb7b6764aa78cc719dc82141e00f30b/orjson-3.11.8-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5774c1fdcc98b2259800b683b19599c133baeb11d60033e2095fd9d4667b82db", size = 124664, upload-time = "2026-03-31T16:16:05.837Z" }, + { url = "https://files.pythonhosted.org/packages/db/44/77b9a86d84a28d52ba3316d77737f6514e17118119ade3f91b639e859029/orjson-3.11.8-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ac7381c83dd3d4a6347e6635950aa448f54e7b8406a27c7ecb4a37e9f1ae08b", size = 129701, upload-time = "2026-03-31T16:16:07.407Z" }, + { url = "https://files.pythonhosted.org/packages/b3/ea/eff3d9bfe47e9bc6969c9181c58d9f71237f923f9c86a2d2f490cd898c82/orjson-3.11.8-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:14439063aebcb92401c11afc68ee4e407258d2752e62d748b6942dad20d2a70d", size = 141202, upload-time = "2026-03-31T16:16:09.48Z" }, + { url = "https://files.pythonhosted.org/packages/52/c8/90d4b4c60c84d62068d0cf9e4d8f0a4e05e76971d133ac0c60d818d4db20/orjson-3.11.8-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fa72e71977bff96567b0f500fc5bfd2fdf915f34052c782a4c6ebbdaa97aa858", size = 127194, upload-time = "2026-03-31T16:16:11.02Z" }, + { url = "https://files.pythonhosted.org/packages/8d/c7/ea9e08d1f0ba981adffb629811148b44774d935171e7b3d780ae43c4c254/orjson-3.11.8-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7679bc2f01bb0d219758f1a5f87bb7c8a81c0a186824a393b366876b4948e14f", size = 133639, upload-time = "2026-03-31T16:16:13.434Z" }, + { url = "https://files.pythonhosted.org/packages/6c/8c/ddbbfd6ba59453c8fc7fe1d0e5983895864e264c37481b2a791db635f046/orjson-3.11.8-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:14f7b8fcb35ef403b42fa5ecfa4ed032332a91f3dc7368fbce4184d59e1eae0d", size = 141914, upload-time = "2026-03-31T16:16:14.955Z" }, + { url = "https://files.pythonhosted.org/packages/4e/31/dbfbefec9df060d34ef4962cd0afcb6fa7a9ec65884cb78f04a7859526c3/orjson-3.11.8-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:c2bdf7b2facc80b5e34f48a2d557727d5c5c57a8a450de122ae81fa26a81c1bc", size = 423800, upload-time = "2026-03-31T16:16:16.594Z" }, + { url = "https://files.pythonhosted.org/packages/87/cf/f74e9ae9803d4ab46b163494adba636c6d7ea955af5cc23b8aaa94cfd528/orjson-3.11.8-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:ccd7ba1b0605813a0715171d39ec4c314cb97a9c85893c2c5c0c3a3729df38bf", size = 147837, upload-time = "2026-03-31T16:16:18.585Z" }, + { url = "https://files.pythonhosted.org/packages/64/e6/9214f017b5db85e84e68602792f742e5dc5249e963503d1b356bee611e01/orjson-3.11.8-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:cdbc8c9c02463fef4d3c53a9ba3336d05496ec8e1f1c53326a1e4acc11f5c600", size = 136441, upload-time = "2026-03-31T16:16:20.151Z" }, + { url = "https://files.pythonhosted.org/packages/24/dd/3590348818f58f837a75fb969b04cdf187ae197e14d60b5e5a794a38b79d/orjson-3.11.8-cp314-cp314-win32.whl", hash = "sha256:0b57f67710a8cd459e4e54eb96d5f77f3624eba0c661ba19a525807e42eccade", size = 131983, upload-time = "2026-03-31T16:16:21.823Z" }, + { url = "https://files.pythonhosted.org/packages/3f/0f/b6cb692116e05d058f31ceee819c70f097fa9167c82f67fabe7516289abc/orjson-3.11.8-cp314-cp314-win_amd64.whl", hash = "sha256:735e2262363dcbe05c35e3a8869898022af78f89dde9e256924dc02e99fe69ca", size = 127396, upload-time = "2026-03-31T16:16:23.685Z" }, + { url = "https://files.pythonhosted.org/packages/c0/d1/facb5b5051fabb0ef9d26c6544d87ef19a939a9a001198655d0d891062dd/orjson-3.11.8-cp314-cp314-win_arm64.whl", hash = "sha256:6ccdea2c213cf9f3d9490cbd5d427693c870753df41e6cb375bd79bcbafc8817", size = 127330, upload-time = "2026-03-31T16:16:25.496Z" }, +] + +[[package]] +name = "packaging" +version = "26.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" }, +] + +[[package]] +name = "pathspec" +version = "1.0.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fa/36/e27608899f9b8d4dff0617b2d9ab17ca5608956ca44461ac14ac48b44015/pathspec-1.0.4.tar.gz", hash = "sha256:0210e2ae8a21a9137c0d470578cb0e595af87edaa6ebf12ff176f14a02e0e645", size = 131200, upload-time = "2026-01-27T03:59:46.938Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/3c/2c197d226f9ea224a9ab8d197933f9da0ae0aac5b6e0f884e2b8d9c8e9f7/pathspec-1.0.4-py3-none-any.whl", hash = "sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723", size = 55206, upload-time = "2026-01-27T03:59:45.137Z" }, +] + +[[package]] +name = "pcodedmp" +version = "1.2.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "oletools" }, + { name = "win-unicode-console", marker = "platform_python_implementation != 'PyPy' and sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3d/20/6d461e29135f474408d0d7f95b2456a9ba245560768ee51b788af10f7429/pcodedmp-1.2.6.tar.gz", hash = "sha256:025f8c809a126f45a082ffa820893e6a8d990d9d7ddb68694b5a9f0a6dbcd955", size = 35549, upload-time = "2019-07-30T18:05:42.516Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ba/72/b380fb5c89d89c3afafac8cf02a71a45f4f4a4f35531ca949a34683962d1/pcodedmp-1.2.6-py2.py3-none-any.whl", hash = "sha256:4441f7c0ab4cbda27bd4668db3b14f36261d86e5059ce06c0828602cbe1c4278", size = 30939, upload-time = "2019-07-30T18:05:40.483Z" }, +] + +[[package]] +name = "platformdirs" +version = "4.9.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/19/56/8d4c30c8a1d07013911a8fdbd8f89440ef9f08d07a1b50ab8ca8be5a20f9/platformdirs-4.9.4.tar.gz", hash = "sha256:1ec356301b7dc906d83f371c8f487070e99d3ccf9e501686456394622a01a934", size = 28737, upload-time = "2026-03-05T18:34:13.271Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/63/d7/97f7e3a6abb67d8080dd406fd4df842c2be0efaf712d1c899c32a075027c/platformdirs-4.9.4-py3-none-any.whl", hash = "sha256:68a9a4619a666ea6439f2ff250c12a853cd1cbd5158d258bd824a7df6be2f868", size = 21216, upload-time = "2026-03-05T18:34:12.172Z" }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, +] + +[[package]] +name = "pyarrow" +version = "23.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/88/22/134986a4cc224d593c1afde5494d18ff629393d74cc2eddb176669f234a4/pyarrow-23.0.1.tar.gz", hash = "sha256:b8c5873e33440b2bc2f4a79d2b47017a89c5a24116c055625e6f2ee50523f019", size = 1167336, upload-time = "2026-02-16T10:14:12.39Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/4b/4166bb5abbfe6f750fc60ad337c43ecf61340fa52ab386da6e8dbf9e63c4/pyarrow-23.0.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:f4b0dbfa124c0bb161f8b5ebb40f1a680b70279aa0c9901d44a2b5a20806039f", size = 34214575, upload-time = "2026-02-16T10:09:56.225Z" }, + { url = "https://files.pythonhosted.org/packages/e1/da/3f941e3734ac8088ea588b53e860baeddac8323ea40ce22e3d0baa865cc9/pyarrow-23.0.1-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:7707d2b6673f7de054e2e83d59f9e805939038eebe1763fe811ee8fa5c0cd1a7", size = 35832540, upload-time = "2026-02-16T10:10:03.428Z" }, + { url = "https://files.pythonhosted.org/packages/88/7c/3d841c366620e906d54430817531b877ba646310296df42ef697308c2705/pyarrow-23.0.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:86ff03fb9f1a320266e0de855dee4b17da6794c595d207f89bba40d16b5c78b9", size = 44470940, upload-time = "2026-02-16T10:10:10.704Z" }, + { url = "https://files.pythonhosted.org/packages/2c/a5/da83046273d990f256cb79796a190bbf7ec999269705ddc609403f8c6b06/pyarrow-23.0.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:813d99f31275919c383aab17f0f455a04f5a429c261cc411b1e9a8f5e4aaaa05", size = 47586063, upload-time = "2026-02-16T10:10:17.95Z" }, + { url = "https://files.pythonhosted.org/packages/5b/3c/b7d2ebcff47a514f47f9da1e74b7949138c58cfeb108cdd4ee62f43f0cf3/pyarrow-23.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bf5842f960cddd2ef757d486041d57c96483efc295a8c4a0e20e704cbbf39c67", size = 48173045, upload-time = "2026-02-16T10:10:25.363Z" }, + { url = "https://files.pythonhosted.org/packages/43/b2/b40961262213beaba6acfc88698eb773dfce32ecdf34d19291db94c2bd73/pyarrow-23.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:564baf97c858ecc03ec01a41062e8f4698abc3e6e2acd79c01c2e97880a19730", size = 50621741, upload-time = "2026-02-16T10:10:33.477Z" }, + { url = "https://files.pythonhosted.org/packages/f6/70/1fdda42d65b28b078e93d75d371b2185a61da89dda4def8ba6ba41ebdeb4/pyarrow-23.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:07deae7783782ac7250989a7b2ecde9b3c343a643f82e8a4df03d93b633006f0", size = 27620678, upload-time = "2026-02-16T10:10:39.31Z" }, + { url = "https://files.pythonhosted.org/packages/47/10/2cbe4c6f0fb83d2de37249567373d64327a5e4d8db72f486db42875b08f6/pyarrow-23.0.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:6b8fda694640b00e8af3c824f99f789e836720aa8c9379fb435d4c4953a756b8", size = 34210066, upload-time = "2026-02-16T10:10:45.487Z" }, + { url = "https://files.pythonhosted.org/packages/cb/4f/679fa7e84dadbaca7a65f7cdba8d6c83febbd93ca12fa4adf40ba3b6362b/pyarrow-23.0.1-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:8ff51b1addc469b9444b7c6f3548e19dc931b172ab234e995a60aea9f6e6025f", size = 35825526, upload-time = "2026-02-16T10:10:52.266Z" }, + { url = "https://files.pythonhosted.org/packages/f9/63/d2747d930882c9d661e9398eefc54f15696547b8983aaaf11d4a2e8b5426/pyarrow-23.0.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:71c5be5cbf1e1cb6169d2a0980850bccb558ddc9b747b6206435313c47c37677", size = 44473279, upload-time = "2026-02-16T10:11:01.557Z" }, + { url = "https://files.pythonhosted.org/packages/b3/93/10a48b5e238de6d562a411af6467e71e7aedbc9b87f8d3a35f1560ae30fb/pyarrow-23.0.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:9b6f4f17b43bc39d56fec96e53fe89d94bac3eb134137964371b45352d40d0c2", size = 47585798, upload-time = "2026-02-16T10:11:09.401Z" }, + { url = "https://files.pythonhosted.org/packages/5c/20/476943001c54ef078dbf9542280e22741219a184a0632862bca4feccd666/pyarrow-23.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fc13fc6c403d1337acab46a2c4346ca6c9dec5780c3c697cf8abfd5e19b6b37", size = 48179446, upload-time = "2026-02-16T10:11:17.781Z" }, + { url = "https://files.pythonhosted.org/packages/4b/b6/5dd0c47b335fcd8edba9bfab78ad961bd0fd55ebe53468cc393f45e0be60/pyarrow-23.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5c16ed4f53247fa3ffb12a14d236de4213a4415d127fe9cebed33d51671113e2", size = 50623972, upload-time = "2026-02-16T10:11:26.185Z" }, + { url = "https://files.pythonhosted.org/packages/d5/09/a532297c9591a727d67760e2e756b83905dd89adb365a7f6e9c72578bcc1/pyarrow-23.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:cecfb12ef629cf6be0b1887f9f86463b0dd3dc3195ae6224e74006be4736035a", size = 27540749, upload-time = "2026-02-16T10:12:23.297Z" }, + { url = "https://files.pythonhosted.org/packages/a5/8e/38749c4b1303e6ae76b3c80618f84861ae0c55dd3c2273842ea6f8258233/pyarrow-23.0.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:29f7f7419a0e30264ea261fdc0e5fe63ce5a6095003db2945d7cd78df391a7e1", size = 34471544, upload-time = "2026-02-16T10:11:32.535Z" }, + { url = "https://files.pythonhosted.org/packages/a3/73/f237b2bc8c669212f842bcfd842b04fc8d936bfc9d471630569132dc920d/pyarrow-23.0.1-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:33d648dc25b51fd8055c19e4261e813dfc4d2427f068bcecc8b53d01b81b0500", size = 35949911, upload-time = "2026-02-16T10:11:39.813Z" }, + { url = "https://files.pythonhosted.org/packages/0c/86/b912195eee0903b5611bf596833def7d146ab2d301afeb4b722c57ffc966/pyarrow-23.0.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:cd395abf8f91c673dd3589cadc8cc1ee4e8674fa61b2e923c8dd215d9c7d1f41", size = 44520337, upload-time = "2026-02-16T10:11:47.764Z" }, + { url = "https://files.pythonhosted.org/packages/69/c2/f2a717fb824f62d0be952ea724b4f6f9372a17eed6f704b5c9526f12f2f1/pyarrow-23.0.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:00be9576d970c31defb5c32eb72ef585bf600ef6d0a82d5eccaae96639cf9d07", size = 47548944, upload-time = "2026-02-16T10:11:56.607Z" }, + { url = "https://files.pythonhosted.org/packages/84/a7/90007d476b9f0dc308e3bc57b832d004f848fd6c0da601375d20d92d1519/pyarrow-23.0.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c2139549494445609f35a5cda4eb94e2c9e4d704ce60a095b342f82460c73a83", size = 48236269, upload-time = "2026-02-16T10:12:04.47Z" }, + { url = "https://files.pythonhosted.org/packages/b0/3f/b16fab3e77709856eb6ac328ce35f57a6d4a18462c7ca5186ef31b45e0e0/pyarrow-23.0.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7044b442f184d84e2351e5084600f0d7343d6117aabcbc1ac78eb1ae11eb4125", size = 50604794, upload-time = "2026-02-16T10:12:11.797Z" }, + { url = "https://files.pythonhosted.org/packages/e9/a1/22df0620a9fac31d68397a75465c344e83c3dfe521f7612aea33e27ab6c0/pyarrow-23.0.1-cp313-cp313t-win_amd64.whl", hash = "sha256:a35581e856a2fafa12f3f54fce4331862b1cfb0bef5758347a858a4aa9d6bae8", size = 27660642, upload-time = "2026-02-16T10:12:17.746Z" }, + { url = "https://files.pythonhosted.org/packages/8d/1b/6da9a89583ce7b23ac611f183ae4843cd3a6cf54f079549b0e8c14031e73/pyarrow-23.0.1-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:5df1161da23636a70838099d4aaa65142777185cc0cdba4037a18cee7d8db9ca", size = 34238755, upload-time = "2026-02-16T10:12:32.819Z" }, + { url = "https://files.pythonhosted.org/packages/ae/b5/d58a241fbe324dbaeb8df07be6af8752c846192d78d2272e551098f74e88/pyarrow-23.0.1-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:fa8e51cb04b9f8c9c5ace6bab63af9a1f88d35c0d6cbf53e8c17c098552285e1", size = 35847826, upload-time = "2026-02-16T10:12:38.949Z" }, + { url = "https://files.pythonhosted.org/packages/54/a5/8cbc83f04aba433ca7b331b38f39e000efd9f0c7ce47128670e737542996/pyarrow-23.0.1-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:0b95a3994f015be13c63148fef8832e8a23938128c185ee951c98908a696e0eb", size = 44536859, upload-time = "2026-02-16T10:12:45.467Z" }, + { url = "https://files.pythonhosted.org/packages/36/2e/c0f017c405fcdc252dbccafbe05e36b0d0eb1ea9a958f081e01c6972927f/pyarrow-23.0.1-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:4982d71350b1a6e5cfe1af742c53dfb759b11ce14141870d05d9e540d13bc5d1", size = 47614443, upload-time = "2026-02-16T10:12:55.525Z" }, + { url = "https://files.pythonhosted.org/packages/af/6b/2314a78057912f5627afa13ba43809d9d653e6630859618b0fd81a4e0759/pyarrow-23.0.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c250248f1fe266db627921c89b47b7c06fee0489ad95b04d50353537d74d6886", size = 48232991, upload-time = "2026-02-16T10:13:04.729Z" }, + { url = "https://files.pythonhosted.org/packages/40/f2/1bcb1d3be3460832ef3370d621142216e15a2c7c62602a4ea19ec240dd64/pyarrow-23.0.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5f4763b83c11c16e5f4c15601ba6dfa849e20723b46aa2617cb4bffe8768479f", size = 50645077, upload-time = "2026-02-16T10:13:14.147Z" }, + { url = "https://files.pythonhosted.org/packages/eb/3f/b1da7b61cd66566a4d4c8383d376c606d1c34a906c3f1cb35c479f59d1aa/pyarrow-23.0.1-cp314-cp314-win_amd64.whl", hash = "sha256:3a4c85ef66c134161987c17b147d6bffdca4566f9a4c1d81a0a01cdf08414ea5", size = 28234271, upload-time = "2026-02-16T10:14:09.397Z" }, + { url = "https://files.pythonhosted.org/packages/b5/78/07f67434e910a0f7323269be7bfbf58699bd0c1d080b18a1ab49ba943fe8/pyarrow-23.0.1-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:17cd28e906c18af486a499422740298c52d7c6795344ea5002a7720b4eadf16d", size = 34488692, upload-time = "2026-02-16T10:13:21.541Z" }, + { url = "https://files.pythonhosted.org/packages/50/76/34cf7ae93ece1f740a04910d9f7e80ba166b9b4ab9596a953e9e62b90fe1/pyarrow-23.0.1-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:76e823d0e86b4fb5e1cf4a58d293036e678b5a4b03539be933d3b31f9406859f", size = 35964383, upload-time = "2026-02-16T10:13:28.63Z" }, + { url = "https://files.pythonhosted.org/packages/46/90/459b827238936d4244214be7c684e1b366a63f8c78c380807ae25ed92199/pyarrow-23.0.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:a62e1899e3078bf65943078b3ad2a6ddcacf2373bc06379aac61b1e548a75814", size = 44538119, upload-time = "2026-02-16T10:13:35.506Z" }, + { url = "https://files.pythonhosted.org/packages/28/a1/93a71ae5881e99d1f9de1d4554a87be37da11cd6b152239fb5bd924fdc64/pyarrow-23.0.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:df088e8f640c9fae3b1f495b3c64755c4e719091caf250f3a74d095ddf3c836d", size = 47571199, upload-time = "2026-02-16T10:13:42.504Z" }, + { url = "https://files.pythonhosted.org/packages/88/a3/d2c462d4ef313521eaf2eff04d204ac60775263f1fb08c374b543f79f610/pyarrow-23.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:46718a220d64677c93bc243af1d44b55998255427588e400677d7192671845c7", size = 48259435, upload-time = "2026-02-16T10:13:49.226Z" }, + { url = "https://files.pythonhosted.org/packages/cc/f1/11a544b8c3d38a759eb3fbb022039117fd633e9a7b19e4841cc3da091915/pyarrow-23.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a09f3876e87f48bc2f13583ab551f0379e5dfb83210391e68ace404181a20690", size = 50629149, upload-time = "2026-02-16T10:13:57.238Z" }, + { url = "https://files.pythonhosted.org/packages/50/f2/c0e76a0b451ffdf0cf788932e182758eb7558953f4f27f1aff8e2518b653/pyarrow-23.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:527e8d899f14bd15b740cd5a54ad56b7f98044955373a17179d5956ddb93d9ce", size = 28365807, upload-time = "2026-02-16T10:14:03.892Z" }, +] + +[[package]] +name = "pycparser" +version = "3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1b/7d/92392ff7815c21062bea51aa7b87d45576f649f16458d78b7cf94b9ab2e6/pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29", size = 103492, upload-time = "2026-01-21T14:26:51.89Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172, upload-time = "2026-01-21T14:26:50.693Z" }, +] + +[[package]] +name = "pydantic" +version = "2.12.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591, upload-time = "2025-11-26T15:11:46.471Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580, upload-time = "2025-11-26T15:11:44.605Z" }, +] + +[[package]] +name = "pydantic-core" +version = "2.41.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990, upload-time = "2025-11-04T13:39:58.079Z" }, + { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003, upload-time = "2025-11-04T13:39:59.956Z" }, + { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200, upload-time = "2025-11-04T13:40:02.241Z" }, + { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578, upload-time = "2025-11-04T13:40:04.401Z" }, + { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504, upload-time = "2025-11-04T13:40:06.072Z" }, + { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816, upload-time = "2025-11-04T13:40:07.835Z" }, + { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366, upload-time = "2025-11-04T13:40:09.804Z" }, + { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698, upload-time = "2025-11-04T13:40:12.004Z" }, + { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603, upload-time = "2025-11-04T13:40:13.868Z" }, + { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591, upload-time = "2025-11-04T13:40:15.672Z" }, + { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068, upload-time = "2025-11-04T13:40:17.532Z" }, + { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908, upload-time = "2025-11-04T13:40:19.309Z" }, + { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145, upload-time = "2025-11-04T13:40:21.548Z" }, + { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179, upload-time = "2025-11-04T13:40:23.393Z" }, + { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403, upload-time = "2025-11-04T13:40:25.248Z" }, + { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206, upload-time = "2025-11-04T13:40:27.099Z" }, + { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307, upload-time = "2025-11-04T13:40:29.806Z" }, + { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258, upload-time = "2025-11-04T13:40:33.544Z" }, + { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917, upload-time = "2025-11-04T13:40:35.479Z" }, + { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186, upload-time = "2025-11-04T13:40:37.436Z" }, + { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164, upload-time = "2025-11-04T13:40:40.289Z" }, + { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146, upload-time = "2025-11-04T13:40:42.809Z" }, + { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788, upload-time = "2025-11-04T13:40:44.752Z" }, + { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133, upload-time = "2025-11-04T13:40:46.66Z" }, + { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852, upload-time = "2025-11-04T13:40:48.575Z" }, + { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679, upload-time = "2025-11-04T13:40:50.619Z" }, + { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766, upload-time = "2025-11-04T13:40:52.631Z" }, + { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005, upload-time = "2025-11-04T13:40:54.734Z" }, + { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622, upload-time = "2025-11-04T13:40:56.68Z" }, + { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725, upload-time = "2025-11-04T13:40:58.807Z" }, + { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040, upload-time = "2025-11-04T13:41:00.853Z" }, + { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691, upload-time = "2025-11-04T13:41:03.504Z" }, + { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897, upload-time = "2025-11-04T13:41:05.804Z" }, + { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302, upload-time = "2025-11-04T13:41:07.809Z" }, + { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877, upload-time = "2025-11-04T13:41:09.827Z" }, + { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680, upload-time = "2025-11-04T13:41:12.379Z" }, + { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960, upload-time = "2025-11-04T13:41:14.627Z" }, + { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102, upload-time = "2025-11-04T13:41:16.868Z" }, + { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039, upload-time = "2025-11-04T13:41:18.934Z" }, + { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126, upload-time = "2025-11-04T13:41:21.418Z" }, + { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489, upload-time = "2025-11-04T13:41:24.076Z" }, + { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288, upload-time = "2025-11-04T13:41:26.33Z" }, + { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255, upload-time = "2025-11-04T13:41:28.569Z" }, + { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760, upload-time = "2025-11-04T13:41:31.055Z" }, + { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092, upload-time = "2025-11-04T13:41:33.21Z" }, + { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385, upload-time = "2025-11-04T13:41:35.508Z" }, + { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832, upload-time = "2025-11-04T13:41:37.732Z" }, + { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585, upload-time = "2025-11-04T13:41:40Z" }, + { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078, upload-time = "2025-11-04T13:41:42.323Z" }, + { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914, upload-time = "2025-11-04T13:41:45.221Z" }, + { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560, upload-time = "2025-11-04T13:41:47.474Z" }, + { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244, upload-time = "2025-11-04T13:41:49.992Z" }, + { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955, upload-time = "2025-11-04T13:41:54.079Z" }, + { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906, upload-time = "2025-11-04T13:41:56.606Z" }, + { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607, upload-time = "2025-11-04T13:41:58.889Z" }, + { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769, upload-time = "2025-11-04T13:42:01.186Z" }, + { url = "https://files.pythonhosted.org/packages/09/32/59b0c7e63e277fa7911c2fc70ccfb45ce4b98991e7ef37110663437005af/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd", size = 2110495, upload-time = "2025-11-04T13:42:49.689Z" }, + { url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388, upload-time = "2025-11-04T13:42:52.215Z" }, + { url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879, upload-time = "2025-11-04T13:42:56.483Z" }, + { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" }, +] + +[[package]] +name = "pygments" +version = "2.20.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/b2/bc9c9196916376152d655522fdcebac55e66de6603a76a02bca1b6414f6c/pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f", size = 4955991, upload-time = "2026-03-29T13:29:33.898Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f4/7e/a72dd26f3b0f4f2bf1dd8923c85f7ceb43172af56d63c7383eb62b332364/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176", size = 1231151, upload-time = "2026-03-29T13:29:30.038Z" }, +] + +[[package]] +name = "pymongo" +version = "4.16.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "dnspython" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/65/9c/a4895c4b785fc9865a84a56e14b5bd21ca75aadc3dab79c14187cdca189b/pymongo-4.16.0.tar.gz", hash = "sha256:8ba8405065f6e258a6f872fe62d797a28f383a12178c7153c01ed04e845c600c", size = 2495323, upload-time = "2026-01-07T18:05:48.107Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6a/03/6dd7c53cbde98de469a3e6fb893af896dca644c476beb0f0c6342bcc368b/pymongo-4.16.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:bd4911c40a43a821dfd93038ac824b756b6e703e26e951718522d29f6eb166a8", size = 917619, upload-time = "2026-01-07T18:04:19.173Z" }, + { url = "https://files.pythonhosted.org/packages/73/e1/328915f2734ea1f355dc9b0e98505ff670f5fab8be5e951d6ed70971c6aa/pymongo-4.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:25a6b03a68f9907ea6ec8bc7cf4c58a1b51a18e23394f962a6402f8e46d41211", size = 917364, upload-time = "2026-01-07T18:04:20.861Z" }, + { url = "https://files.pythonhosted.org/packages/41/fe/4769874dd9812a1bc2880a9785e61eba5340da966af888dd430392790ae0/pymongo-4.16.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:91ac0cb0fe2bf17616c2039dac88d7c9a5088f5cb5829b27c9d250e053664d31", size = 1686901, upload-time = "2026-01-07T18:04:22.219Z" }, + { url = "https://files.pythonhosted.org/packages/fa/8d/15707b9669fdc517bbc552ac60da7124dafe7ac1552819b51e97ed4038b4/pymongo-4.16.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cf0ec79e8ca7077f455d14d915d629385153b6a11abc0b93283ed73a8013e376", size = 1723034, upload-time = "2026-01-07T18:04:24.055Z" }, + { url = "https://files.pythonhosted.org/packages/5b/af/3d5d16ff11d447d40c1472da1b366a31c7380d7ea2922a449c7f7f495567/pymongo-4.16.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2d0082631a7510318befc2b4fdab140481eb4b9dd62d9245e042157085da2a70", size = 1797161, upload-time = "2026-01-07T18:04:25.964Z" }, + { url = "https://files.pythonhosted.org/packages/fb/04/725ab8664eeec73ec125b5a873448d80f5d8cf2750aaaf804cbc538a50a5/pymongo-4.16.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:85dc2f3444c346ea019a371e321ac868a4fab513b7a55fe368f0cc78de8177cc", size = 1780938, upload-time = "2026-01-07T18:04:28.745Z" }, + { url = "https://files.pythonhosted.org/packages/22/50/dd7e9095e1ca35f93c3c844c92eb6eb0bc491caeb2c9bff3b32fe3c9b18f/pymongo-4.16.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dabbf3c14de75a20cc3c30bf0c6527157224a93dfb605838eabb1a2ee3be008d", size = 1714342, upload-time = "2026-01-07T18:04:30.331Z" }, + { url = "https://files.pythonhosted.org/packages/03/c9/542776987d5c31ae8e93e92680ea2b6e5a2295f398b25756234cabf38a39/pymongo-4.16.0-cp312-cp312-win32.whl", hash = "sha256:60307bb91e0ab44e560fe3a211087748b2b5f3e31f403baf41f5b7b0a70bd104", size = 887868, upload-time = "2026-01-07T18:04:32.124Z" }, + { url = "https://files.pythonhosted.org/packages/2e/d4/b4045a7ccc5680fb496d01edf749c7a9367cc8762fbdf7516cf807ef679b/pymongo-4.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:f513b2c6c0d5c491f478422f6b5b5c27ac1af06a54c93ef8631806f7231bd92e", size = 907554, upload-time = "2026-01-07T18:04:33.685Z" }, + { url = "https://files.pythonhosted.org/packages/60/4c/33f75713d50d5247f2258405142c0318ff32c6f8976171c4fcae87a9dbdf/pymongo-4.16.0-cp312-cp312-win_arm64.whl", hash = "sha256:dfc320f08ea9a7ec5b2403dc4e8150636f0d6150f4b9792faaae539c88e7db3b", size = 892971, upload-time = "2026-01-07T18:04:35.594Z" }, + { url = "https://files.pythonhosted.org/packages/47/84/148d8b5da8260f4679d6665196ae04ab14ffdf06f5fe670b0ab11942951f/pymongo-4.16.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d15f060bc6d0964a8bb70aba8f0cb6d11ae99715438f640cff11bbcf172eb0e8", size = 972009, upload-time = "2026-01-07T18:04:38.303Z" }, + { url = "https://files.pythonhosted.org/packages/1e/5e/9f3a8daf583d0adaaa033a3e3e58194d2282737dc164014ff33c7a081103/pymongo-4.16.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4a19ea46a0fe71248965305a020bc076a163311aefbaa1d83e47d06fa30ac747", size = 971784, upload-time = "2026-01-07T18:04:39.669Z" }, + { url = "https://files.pythonhosted.org/packages/ad/f2/b6c24361fcde24946198573c0176406bfd5f7b8538335f3d939487055322/pymongo-4.16.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:311d4549d6bf1f8c61d025965aebb5ba29d1481dc6471693ab91610aaffbc0eb", size = 1947174, upload-time = "2026-01-07T18:04:41.368Z" }, + { url = "https://files.pythonhosted.org/packages/47/1a/8634192f98cf740b3d174e1018dd0350018607d5bd8ac35a666dc49c732b/pymongo-4.16.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:46ffb728d92dd5b09fc034ed91acf5595657c7ca17d4cf3751322cd554153c17", size = 1991727, upload-time = "2026-01-07T18:04:42.965Z" }, + { url = "https://files.pythonhosted.org/packages/5a/2f/0c47ac84572b28e23028a23a3798a1f725e1c23b0cf1c1424678d16aff42/pymongo-4.16.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:acda193f440dd88c2023cb00aa8bd7b93a9df59978306d14d87a8b12fe426b05", size = 2082497, upload-time = "2026-01-07T18:04:44.652Z" }, + { url = "https://files.pythonhosted.org/packages/ba/57/9f46ef9c862b2f0cf5ce798f3541c201c574128d31ded407ba4b3918d7b6/pymongo-4.16.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5d9fdb386cf958e6ef6ff537d6149be7edb76c3268cd6833e6c36aa447e4443f", size = 2064947, upload-time = "2026-01-07T18:04:46.228Z" }, + { url = "https://files.pythonhosted.org/packages/b8/56/5421c0998f38e32288100a07f6cb2f5f9f352522157c901910cb2927e211/pymongo-4.16.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:91899dd7fb9a8c50f09c3c1cf0cb73bfbe2737f511f641f19b9650deb61c00ca", size = 1980478, upload-time = "2026-01-07T18:04:48.017Z" }, + { url = "https://files.pythonhosted.org/packages/92/93/bfc448d025e12313a937d6e1e0101b50cc9751636b4b170e600fe3203063/pymongo-4.16.0-cp313-cp313-win32.whl", hash = "sha256:2cd60cd1e05de7f01927f8e25ca26b3ea2c09de8723241e5d3bcfdc70eaff76b", size = 934672, upload-time = "2026-01-07T18:04:49.538Z" }, + { url = "https://files.pythonhosted.org/packages/96/10/12710a5e01218d50c3dd165fd72c5ed2699285f77348a3b1a119a191d826/pymongo-4.16.0-cp313-cp313-win_amd64.whl", hash = "sha256:3ead8a0050c53eaa55935895d6919d393d0328ec24b2b9115bdbe881aa222673", size = 959237, upload-time = "2026-01-07T18:04:51.382Z" }, + { url = "https://files.pythonhosted.org/packages/0c/56/d288bcd1d05bc17ec69df1d0b1d67bc710c7c5dbef86033a5a4d2e2b08e6/pymongo-4.16.0-cp313-cp313-win_arm64.whl", hash = "sha256:dbbc5b254c36c37d10abb50e899bc3939bbb7ab1e7c659614409af99bd3e7675", size = 940909, upload-time = "2026-01-07T18:04:52.904Z" }, + { url = "https://files.pythonhosted.org/packages/30/9e/4d343f8d0512002fce17915a89477b9f916bda1205729e042d8f23acf194/pymongo-4.16.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:8a254d49a9ffe9d7f888e3c677eed3729b14ce85abb08cd74732cead6ccc3c66", size = 1026634, upload-time = "2026-01-07T18:04:54.359Z" }, + { url = "https://files.pythonhosted.org/packages/c3/e3/341f88c5535df40c0450fda915f582757bb7d988cdfc92990a5e27c4c324/pymongo-4.16.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a1bf44e13cf2d44d2ea2e928a8140d5d667304abe1a61c4d55b4906f389fbe64", size = 1026252, upload-time = "2026-01-07T18:04:56.642Z" }, + { url = "https://files.pythonhosted.org/packages/af/64/9471b22eb98f0a2ca0b8e09393de048502111b2b5b14ab1bd9e39708aab5/pymongo-4.16.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f1c5f1f818b669875d191323a48912d3fcd2e4906410e8297bb09ac50c4d5ccc", size = 2207399, upload-time = "2026-01-07T18:04:58.255Z" }, + { url = "https://files.pythonhosted.org/packages/87/ac/47c4d50b25a02f21764f140295a2efaa583ee7f17992a5e5fa542b3a690f/pymongo-4.16.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:77cfd37a43a53b02b7bd930457c7994c924ad8bbe8dff91817904bcbf291b371", size = 2260595, upload-time = "2026-01-07T18:04:59.788Z" }, + { url = "https://files.pythonhosted.org/packages/ee/1b/0ce1ce9dd036417646b2fe6f63b58127acff3cf96eeb630c34ec9cd675ff/pymongo-4.16.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:36ef2fee50eee669587d742fb456e349634b4fcf8926208766078b089054b24b", size = 2366958, upload-time = "2026-01-07T18:05:01.942Z" }, + { url = "https://files.pythonhosted.org/packages/3e/3c/a5a17c0d413aa9d6c17bc35c2b472e9e79cda8068ba8e93433b5f43028e9/pymongo-4.16.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:55f8d5a6fe2fa0b823674db2293f92d74cd5f970bc0360f409a1fc21003862d3", size = 2346081, upload-time = "2026-01-07T18:05:03.576Z" }, + { url = "https://files.pythonhosted.org/packages/65/19/f815533d1a88fb8a3b6c6e895bb085ffdae68ccb1e6ed7102202a307f8e2/pymongo-4.16.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9caacac0dd105e2555521002e2d17afc08665187017b466b5753e84c016628e6", size = 2246053, upload-time = "2026-01-07T18:05:05.459Z" }, + { url = "https://files.pythonhosted.org/packages/c6/88/4be3ec78828dc64b212c123114bd6ae8db5b7676085a7b43cc75d0131bd2/pymongo-4.16.0-cp314-cp314-win32.whl", hash = "sha256:c789236366525c3ee3cd6e4e450a9ff629a7d1f4d88b8e18a0aea0615fd7ecf8", size = 989461, upload-time = "2026-01-07T18:05:07.018Z" }, + { url = "https://files.pythonhosted.org/packages/af/5a/ab8d5af76421b34db483c9c8ebc3a2199fb80ae63dc7e18f4cf1df46306a/pymongo-4.16.0-cp314-cp314-win_amd64.whl", hash = "sha256:2b0714d7764efb29bf9d3c51c964aed7c4c7237b341f9346f15ceaf8321fdb35", size = 1017803, upload-time = "2026-01-07T18:05:08.499Z" }, + { url = "https://files.pythonhosted.org/packages/f6/f4/98d68020728ac6423cf02d17cfd8226bf6cce5690b163d30d3f705e8297e/pymongo-4.16.0-cp314-cp314-win_arm64.whl", hash = "sha256:12762e7cc0f8374a8cae3b9f9ed8dabb5d438c7b33329232dd9b7de783454033", size = 997184, upload-time = "2026-01-07T18:05:09.944Z" }, + { url = "https://files.pythonhosted.org/packages/50/00/dc3a271daf06401825b9c1f4f76f018182c7738281ea54b9762aea0560c1/pymongo-4.16.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:1c01e8a7cd0ea66baf64a118005535ab5bf9f9eb63a1b50ac3935dccf9a54abe", size = 1083303, upload-time = "2026-01-07T18:05:11.702Z" }, + { url = "https://files.pythonhosted.org/packages/b8/4b/b5375ee21d12eababe46215011ebc63801c0d2c5ffdf203849d0d79f9852/pymongo-4.16.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:4c4872299ebe315a79f7f922051061634a64fda95b6b17677ba57ef00b2ba2a4", size = 1083233, upload-time = "2026-01-07T18:05:13.182Z" }, + { url = "https://files.pythonhosted.org/packages/ee/e3/52efa3ca900622c7dcb56c5e70f15c906816d98905c22d2ee1f84d9a7b60/pymongo-4.16.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:78037d02389745e247fe5ab0bcad5d1ab30726eaac3ad79219c7d6bbb07eec53", size = 2527438, upload-time = "2026-01-07T18:05:14.981Z" }, + { url = "https://files.pythonhosted.org/packages/cb/96/43b1be151c734e7766c725444bcbfa1de6b60cc66bfb406203746839dd25/pymongo-4.16.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c126fb72be2518395cc0465d4bae03125119136462e1945aea19840e45d89cfc", size = 2600399, upload-time = "2026-01-07T18:05:16.794Z" }, + { url = "https://files.pythonhosted.org/packages/e7/62/fa64a5045dfe3a1cd9217232c848256e7bc0136cffb7da4735c5e0d30e40/pymongo-4.16.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f3867dc225d9423c245a51eaac2cfcd53dde8e0a8d8090bb6aed6e31bd6c2d4f", size = 2720960, upload-time = "2026-01-07T18:05:18.498Z" }, + { url = "https://files.pythonhosted.org/packages/54/7b/01577eb97e605502821273a5bc16ce0fb0be5c978fe03acdbff471471202/pymongo-4.16.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f25001a955073b80510c0c3db0e043dbbc36904fd69e511c74e3d8640b8a5111", size = 2699344, upload-time = "2026-01-07T18:05:20.073Z" }, + { url = "https://files.pythonhosted.org/packages/55/68/6ef6372d516f703479c3b6cbbc45a5afd307173b1cbaccd724e23919bb1a/pymongo-4.16.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d9885aad05f82fd7ea0c9ca505d60939746b39263fa273d0125170da8f59098", size = 2577133, upload-time = "2026-01-07T18:05:22.052Z" }, + { url = "https://files.pythonhosted.org/packages/15/c7/b5337093bb01da852f945802328665f85f8109dbe91d81ea2afe5ff059b9/pymongo-4.16.0-cp314-cp314t-win32.whl", hash = "sha256:948152b30eddeae8355495f9943a3bf66b708295c0b9b6f467de1c620f215487", size = 1040560, upload-time = "2026-01-07T18:05:23.888Z" }, + { url = "https://files.pythonhosted.org/packages/96/8c/5b448cd1b103f3889d5713dda37304c81020ff88e38a826e8a75ddff4610/pymongo-4.16.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f6e42c1bc985d9beee884780ae6048790eb4cd565c46251932906bdb1630034a", size = 1075081, upload-time = "2026-01-07T18:05:26.874Z" }, + { url = "https://files.pythonhosted.org/packages/32/cd/ddc794cdc8500f6f28c119c624252fb6dfb19481c6d7ed150f13cf468a6d/pymongo-4.16.0-cp314-cp314t-win_arm64.whl", hash = "sha256:6b2a20edb5452ac8daa395890eeb076c570790dfce6b7a44d788af74c2f8cf96", size = 1047725, upload-time = "2026-01-07T18:05:28.47Z" }, +] + +[[package]] +name = "pymupdf" +version = "1.27.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/32/f6b645c51d79a188a4844140c5dabca7b487ad56c4be69c4bc782d0d11a9/pymupdf-1.27.2.2.tar.gz", hash = "sha256:ea8fdc3ab6671ca98f629d5ec3032d662c8cf1796b146996b7ad306ac7ed3335", size = 85354380, upload-time = "2026-03-20T09:47:58.386Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/90/88/d01992a50165e22dec057a1129826846c547feb4ba07f42720ac030ce438/pymupdf-1.27.2.2-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:800f43e60a6f01f644343c2213b8613db02eaf4f4ba235b417b3351fa99e01c0", size = 23987563, upload-time = "2026-03-19T12:35:42.989Z" }, + { url = "https://files.pythonhosted.org/packages/6d/0e/9f526bc1d49d8082eff0d1547a69d541a0c5a052e71da625559efaba46a6/pymupdf-1.27.2.2-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:8e2e4299ef1ac0c9dff9be096cbd22783699673abecfa7c3f73173ae06421d73", size = 23263089, upload-time = "2026-03-20T09:44:16.982Z" }, + { url = "https://files.pythonhosted.org/packages/42/be/984f0d6343935b5dd30afaed6be04fc753146bf55709e63ef28bf9ef7497/pymupdf-1.27.2.2-cp310-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c5e3d54922db1c7da844f1208ac1db05704770988752311f81dd36694ae0a07b", size = 24318817, upload-time = "2026-03-20T09:44:33.209Z" }, + { url = "https://files.pythonhosted.org/packages/22/8e/85e9d9f11dbf34036eb1df283805ef6b885f2005a56d6533bb58ab0b8a11/pymupdf-1.27.2.2-cp310-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:892698c9768457eb0991c102c96a856c0a7062539371df5e6bee0816f3ef498e", size = 24948135, upload-time = "2026-03-20T09:44:51.012Z" }, + { url = "https://files.pythonhosted.org/packages/db/e6/386edb017e5b93f1ab0bf6653ae32f3dd8dfc834ed770212e10ca62f4af9/pymupdf-1.27.2.2-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:8b4bbfa6ef347fade678771a93f6364971c51a2cdc44cd2400dc4eeed1ddb4e6", size = 25169585, upload-time = "2026-03-20T09:45:05.393Z" }, + { url = "https://files.pythonhosted.org/packages/ba/fd/f1ebe24fcd31aaea8b85b3a7ac4c3fc96e20388be5466ace27c9a3c546d9/pymupdf-1.27.2.2-cp310-abi3-win32.whl", hash = "sha256:0b8e924433b7e0bd46be820899300259235997d5a747638471fb2762baa8ee30", size = 18008861, upload-time = "2026-03-20T09:45:21.353Z" }, + { url = "https://files.pythonhosted.org/packages/a8/b6/2a9a8556000199bbf80a5915dcd15d550d1e5288894316445c54726aaf53/pymupdf-1.27.2.2-cp310-abi3-win_amd64.whl", hash = "sha256:09bb53f9486ccb5297030cbc2dbdae845ba1c3c5126e96eb2d16c4f118de0b5b", size = 19238032, upload-time = "2026-03-20T09:45:37.941Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c6/e3e11c42f09b9c34ec332c0f37b817671b59ef4001895b854f0494092105/pymupdf-1.27.2.2-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:6cebfbbdfd219ebdebf4d8e3914624b2e3d3a844c43f4f76935822dd9b13cc12", size = 24985299, upload-time = "2026-03-20T09:45:53.26Z" }, +] + +[[package]] +name = "pymysql" +version = "1.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f5/ae/1fe3fcd9f959efa0ebe200b8de88b5a5ce3e767e38c7ac32fb179f16a388/pymysql-1.1.2.tar.gz", hash = "sha256:4961d3e165614ae65014e361811a724e2044ad3ea3739de9903ae7c21f539f03", size = 48258, upload-time = "2025-08-24T12:55:55.146Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7c/4c/ad33b92b9864cbde84f259d5df035a6447f91891f5be77788e2a3892bce3/pymysql-1.1.2-py3-none-any.whl", hash = "sha256:e6b1d89711dd51f8f74b1631fe08f039e7d76cf67a42a323d3178f0f25762ed9", size = 45300, upload-time = "2025-08-24T12:55:53.394Z" }, +] + +[[package]] +name = "pyparsing" +version = "3.3.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f3/91/9c6ee907786a473bf81c5f53cf703ba0957b23ab84c264080fb5a450416f/pyparsing-3.3.2.tar.gz", hash = "sha256:c777f4d763f140633dcb6d8a3eda953bf7a214dc4eff598413c070bcdc117cbc", size = 6851574, upload-time = "2026-01-21T03:57:59.36Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload-time = "2026-01-21T03:57:55.912Z" }, +] + +[[package]] +name = "pytest" +version = "9.0.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" }, +] + +[[package]] +name = "pytest-asyncio" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087, upload-time = "2025-11-10T16:07:47.256Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/35/f8b19922b6a25bc0880171a2f1a003eaeb93657475193ab516fd87cac9da/pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5", size = 15075, upload-time = "2025-11-10T16:07:45.537Z" }, +] + +[[package]] +name = "pytest-cov" +version = "7.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "coverage" }, + { name = "pluggy" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/51/a849f96e117386044471c8ec2bd6cfebacda285da9525c9106aeb28da671/pytest_cov-7.1.0.tar.gz", hash = "sha256:30674f2b5f6351aa09702a9c8c364f6a01c27aae0c1366ae8016160d1efc56b2", size = 55592, upload-time = "2026-03-21T20:11:16.284Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9d/7a/d968e294073affff457b041c2be9868a40c1c71f4a35fcc1e45e5493067b/pytest_cov-7.1.0-py3-none-any.whl", hash = "sha256:a0461110b7865f9a271aa1b51e516c9a95de9d696734a2f71e3e78f46e1d4678", size = 22876, upload-time = "2026-03-21T20:11:14.438Z" }, +] + +[[package]] +name = "python-docx" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "lxml" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a9/f7/eddfe33871520adab45aaa1a71f0402a2252050c14c7e3009446c8f4701c/python_docx-1.2.0.tar.gz", hash = "sha256:7bc9d7b7d8a69c9c02ca09216118c86552704edc23bac179283f2e38f86220ce", size = 5723256, upload-time = "2025-06-16T20:46:27.921Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d0/00/1e03a4989fa5795da308cd774f05b704ace555a70f9bf9d3be057b680bcf/python_docx-1.2.0-py3-none-any.whl", hash = "sha256:3fd478f3250fbbbfd3b94fe1e985955737c145627498896a8a6bf81f4baf66c7", size = 252987, upload-time = "2025-06-16T20:46:22.506Z" }, +] + +[[package]] +name = "red-black-tree-mod" +version = "1.22" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/48/75/bfa342a2ebfc9623b701f1c6995b9906fd6dd2cedf6bce777d09e23303ac/red-black-tree-mod-1.22.tar.gz", hash = "sha256:38e3652903a2bf96379c27c2082ca0b7b905158662dd7ef0c97f4fd93a9aa908", size = 34173, upload-time = "2023-12-26T14:00:22.056Z" } + +[[package]] +name = "rich" +version = "14.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b3/c6/f3b320c27991c46f43ee9d856302c70dc2d0fb2dba4842ff739d5f46b393/rich-14.3.3.tar.gz", hash = "sha256:b8daa0b9e4eef54dd8cf7c86c03713f53241884e814f4e2f5fb342fe520f639b", size = 230582, upload-time = "2026-02-19T17:23:12.474Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/25/b208c5683343959b670dc001595f2f3737e051da617f66c31f7c4fa93abc/rich-14.3.3-py3-none-any.whl", hash = "sha256:793431c1f8619afa7d3b52b2cdec859562b950ea0d4b6b505397612db8d5362d", size = 310458, upload-time = "2026-02-19T17:23:13.732Z" }, +] + +[[package]] +name = "rtfde" +version = "0.1.2.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "lark" }, + { name = "oletools" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9e/5c/116a016b38af589e8141160bc9b034b73dde2e50c22a921751f4d982a7ca/rtfde-0.1.2.2.tar.gz", hash = "sha256:2f0cd6ecd644071e39452e6fc4f4a1435453af0ec7c90ea86fb4fc96010c7f1b", size = 33408, upload-time = "2025-12-09T17:10:31.805Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/24/5a653278259be44c1845ddd56dd30cfa7265281ba149b9342b79f9d4f788/rtfde-0.1.2.2-py3-none-any.whl", hash = "sha256:d43868c74f21ae9ea5acbfd4176d5de1f2cfae0ff7f267698471c606287c04ec", size = 36713, upload-time = "2025-12-09T17:10:30.893Z" }, +] + +[[package]] +name = "ruamel-yaml" +version = "0.19.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c7/3b/ebda527b56beb90cb7652cb1c7e4f91f48649fbcd8d2eb2fb6e77cd3329b/ruamel_yaml-0.19.1.tar.gz", hash = "sha256:53eb66cd27849eff968ebf8f0bf61f46cdac2da1d1f3576dd4ccee9b25c31993", size = 142709, upload-time = "2026-01-02T16:50:31.84Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b8/0c/51f6841f1d84f404f92463fc2b1ba0da357ca1e3db6b7fbda26956c3b82a/ruamel_yaml-0.19.1-py3-none-any.whl", hash = "sha256:27592957fedf6e0b62f281e96effd28043345e0e66001f97683aa9a40c667c93", size = 118102, upload-time = "2026-01-02T16:50:29.201Z" }, +] + +[[package]] +name = "ruff" +version = "0.15.9" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e6/97/e9f1ca355108ef7194e38c812ef40ba98c7208f47b13ad78d023caa583da/ruff-0.15.9.tar.gz", hash = "sha256:29cbb1255a9797903f6dde5ba0188c707907ff44a9006eb273b5a17bfa0739a2", size = 4617361, upload-time = "2026-04-02T18:17:20.829Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/1f/9cdfd0ac4b9d1e5a6cf09bedabdf0b56306ab5e333c85c87281273e7b041/ruff-0.15.9-py3-none-linux_armv6l.whl", hash = "sha256:6efbe303983441c51975c243e26dff328aca11f94b70992f35b093c2e71801e1", size = 10511206, upload-time = "2026-04-02T18:16:41.574Z" }, + { url = "https://files.pythonhosted.org/packages/3d/f6/32bfe3e9c136b35f02e489778d94384118bb80fd92c6d92e7ccd97db12ce/ruff-0.15.9-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:4965bac6ac9ea86772f4e23587746f0b7a395eccabb823eb8bfacc3fa06069f7", size = 10923307, upload-time = "2026-04-02T18:17:08.645Z" }, + { url = "https://files.pythonhosted.org/packages/ca/25/de55f52ab5535d12e7aaba1de37a84be6179fb20bddcbe71ec091b4a3243/ruff-0.15.9-py3-none-macosx_11_0_arm64.whl", hash = "sha256:eaf05aad70ca5b5a0a4b0e080df3a6b699803916d88f006efd1f5b46302daab8", size = 10316722, upload-time = "2026-04-02T18:16:44.206Z" }, + { url = "https://files.pythonhosted.org/packages/48/11/690d75f3fd6278fe55fff7c9eb429c92d207e14b25d1cae4064a32677029/ruff-0.15.9-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9439a342adb8725f32f92732e2bafb6d5246bd7a5021101166b223d312e8fc59", size = 10623674, upload-time = "2026-04-02T18:16:50.951Z" }, + { url = "https://files.pythonhosted.org/packages/bd/ec/176f6987be248fc5404199255522f57af1b4a5a1b57727e942479fec98ad/ruff-0.15.9-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9c5e6faf9d97c8edc43877c3f406f47446fc48c40e1442d58cfcdaba2acea745", size = 10351516, upload-time = "2026-04-02T18:16:57.206Z" }, + { url = "https://files.pythonhosted.org/packages/b2/fc/51cffbd2b3f240accc380171d51446a32aa2ea43a40d4a45ada67368fbd2/ruff-0.15.9-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7b34a9766aeec27a222373d0b055722900fbc0582b24f39661aa96f3fe6ad901", size = 11150202, upload-time = "2026-04-02T18:17:06.452Z" }, + { url = "https://files.pythonhosted.org/packages/d6/d4/25292a6dfc125f6b6528fe6af31f5e996e19bf73ca8e3ce6eb7fa5b95885/ruff-0.15.9-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:89dd695bc72ae76ff484ae54b7e8b0f6b50f49046e198355e44ea656e521fef9", size = 11988891, upload-time = "2026-04-02T18:17:18.575Z" }, + { url = "https://files.pythonhosted.org/packages/13/e1/1eebcb885c10e19f969dcb93d8413dfee8172578709d7ee933640f5e7147/ruff-0.15.9-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ce187224ef1de1bd225bc9a152ac7102a6171107f026e81f317e4257052916d5", size = 11480576, upload-time = "2026-04-02T18:16:52.986Z" }, + { url = "https://files.pythonhosted.org/packages/ff/6b/a1548ac378a78332a4c3dcf4a134c2475a36d2a22ddfa272acd574140b50/ruff-0.15.9-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b0c7c341f68adb01c488c3b7d4b49aa8ea97409eae6462d860a79cf55f431b6", size = 11254525, upload-time = "2026-04-02T18:17:02.041Z" }, + { url = "https://files.pythonhosted.org/packages/42/aa/4bb3af8e61acd9b1281db2ab77e8b2c3c5e5599bf2a29d4a942f1c62b8d6/ruff-0.15.9-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:55cc15eee27dc0eebdfcb0d185a6153420efbedc15eb1d38fe5e685657b0f840", size = 11204072, upload-time = "2026-04-02T18:17:13.581Z" }, + { url = "https://files.pythonhosted.org/packages/69/48/d550dc2aa6e423ea0bcc1d0ff0699325ffe8a811e2dba156bd80750b86dc/ruff-0.15.9-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:a6537f6eed5cda688c81073d46ffdfb962a5f29ecb6f7e770b2dc920598997ed", size = 10594998, upload-time = "2026-04-02T18:16:46.369Z" }, + { url = "https://files.pythonhosted.org/packages/63/47/321167e17f5344ed5ec6b0aa2cff64efef5f9e985af8f5622cfa6536043f/ruff-0.15.9-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:6d3fcbca7388b066139c523bda744c822258ebdcfbba7d24410c3f454cc9af71", size = 10359769, upload-time = "2026-04-02T18:17:10.994Z" }, + { url = "https://files.pythonhosted.org/packages/67/5e/074f00b9785d1d2c6f8c22a21e023d0c2c1817838cfca4c8243200a1fa87/ruff-0.15.9-py3-none-musllinux_1_2_i686.whl", hash = "sha256:058d8e99e1bfe79d8a0def0b481c56059ee6716214f7e425d8e737e412d69677", size = 10850236, upload-time = "2026-04-02T18:16:48.749Z" }, + { url = "https://files.pythonhosted.org/packages/76/37/804c4135a2a2caf042925d30d5f68181bdbd4461fd0d7739da28305df593/ruff-0.15.9-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:8e1ddb11dbd61d5983fa2d7d6370ef3eb210951e443cace19594c01c72abab4c", size = 11358343, upload-time = "2026-04-02T18:16:55.068Z" }, + { url = "https://files.pythonhosted.org/packages/88/3d/1364fcde8656962782aa9ea93c92d98682b1ecec2f184e625a965ad3b4a6/ruff-0.15.9-py3-none-win32.whl", hash = "sha256:bde6ff36eaf72b700f32b7196088970bf8fdb2b917b7accd8c371bfc0fd573ec", size = 10583382, upload-time = "2026-04-02T18:17:04.261Z" }, + { url = "https://files.pythonhosted.org/packages/4c/56/5c7084299bd2cacaa07ae63a91c6f4ba66edc08bf28f356b24f6b717c799/ruff-0.15.9-py3-none-win_amd64.whl", hash = "sha256:45a70921b80e1c10cf0b734ef09421f71b5aa11d27404edc89d7e8a69505e43d", size = 11744969, upload-time = "2026-04-02T18:16:59.611Z" }, + { url = "https://files.pythonhosted.org/packages/03/36/76704c4f312257d6dbaae3c959add2a622f63fcca9d864659ce6d8d97d3d/ruff-0.15.9-py3-none-win_arm64.whl", hash = "sha256:0694e601c028fd97dc5c6ee244675bc241aeefced7ef80cd9c6935a871078f53", size = 11005870, upload-time = "2026-04-02T18:17:15.773Z" }, +] + +[[package]] +name = "shellingham" +version = "1.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, +] + +[[package]] +name = "sortedcontainers" +version = "2.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", size = 30594, upload-time = "2021-05-16T22:03:42.897Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575, upload-time = "2021-05-16T22:03:41.177Z" }, +] + +[[package]] +name = "soupsieve" +version = "2.8.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7b/ae/2d9c981590ed9999a0d91755b47fc74f74de286b0f5cee14c9269041e6c4/soupsieve-2.8.3.tar.gz", hash = "sha256:3267f1eeea4251fb42728b6dfb746edc9acaffc4a45b27e19450b676586e8349", size = 118627, upload-time = "2026-01-20T04:27:02.457Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/46/2c/1462b1d0a634697ae9e55b3cecdcb64788e8b7d63f54d923fcd0bb140aed/soupsieve-2.8.3-py3-none-any.whl", hash = "sha256:ed64f2ba4eebeab06cc4962affce381647455978ffc1e36bb79a545b91f45a95", size = 37016, upload-time = "2026-01-20T04:27:01.012Z" }, +] + +[[package]] +name = "structlog" +version = "25.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ef/52/9ba0f43b686e7f3ddfeaa78ac3af750292662284b3661e91ad5494f21dbc/structlog-25.5.0.tar.gz", hash = "sha256:098522a3bebed9153d4570c6d0288abf80a031dfdb2048d59a49e9dc2190fc98", size = 1460830, upload-time = "2025-10-27T08:28:23.028Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a8/45/a132b9074aa18e799b891b91ad72133c98d8042c70f6240e4c5f9dabee2f/structlog-25.5.0-py3-none-any.whl", hash = "sha256:a8453e9b9e636ec59bd9e79bbd4a72f025981b3ba0f5837aebf48f02f37a7f9f", size = 72510, upload-time = "2025-10-27T08:28:21.535Z" }, +] + +[[package]] +name = "typer" +version = "0.24.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-doc" }, + { name = "click" }, + { name = "rich" }, + { name = "shellingham" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f5/24/cb09efec5cc954f7f9b930bf8279447d24618bb6758d4f6adf2574c41780/typer-0.24.1.tar.gz", hash = "sha256:e39b4732d65fbdcde189ae76cf7cd48aeae72919dea1fdfc16593be016256b45", size = 118613, upload-time = "2026-02-21T16:54:40.609Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4a/91/48db081e7a63bb37284f9fbcefda7c44c277b18b0e13fbc36ea2335b71e6/typer-0.24.1-py3-none-any.whl", hash = "sha256:112c1f0ce578bfb4cab9ffdabc68f031416ebcc216536611ba21f04e9aa84c9e", size = 56085, upload-time = "2026-02-21T16:54:41.616Z" }, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, +] + +[[package]] +name = "typing-inspection" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" }, +] + +[[package]] +name = "tzdata" +version = "2026.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/19/f5/cd531b2d15a671a40c0f66cf06bc3570a12cd56eef98960068ebbad1bf5a/tzdata-2026.1.tar.gz", hash = "sha256:67658a1903c75917309e753fdc349ac0efd8c27db7a0cb406a25be4840f87f98", size = 197639, upload-time = "2026-04-03T11:25:22.002Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b0/70/d460bd685a170790ec89317e9bd33047988e4bce507b831f5db771e142de/tzdata-2026.1-py2.py3-none-any.whl", hash = "sha256:4b1d2be7ac37ceafd7327b961aa3a54e467efbdb563a23655fbfe0d39cfc42a9", size = 348952, upload-time = "2026-04-03T11:25:20.313Z" }, +] + +[[package]] +name = "tzlocal" +version = "5.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "tzdata", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8b/2e/c14812d3d4d9cd1773c6be938f89e5735a1f11a9f184ac3639b93cef35d5/tzlocal-5.3.1.tar.gz", hash = "sha256:cceffc7edecefea1f595541dbd6e990cb1ea3d19bf01b2809f362a03dd7921fd", size = 30761, upload-time = "2025-03-05T21:17:41.549Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c2/14/e2a54fabd4f08cd7af1c07030603c3356b74da07f7cc056e600436edfa17/tzlocal-5.3.1-py3-none-any.whl", hash = "sha256:eb1a66c3ef5847adf7a834f1be0800581b683b5608e74f86ecbcef8ab91bb85d", size = 18026, upload-time = "2025-03-05T21:17:39.857Z" }, +] + +[[package]] +name = "win-unicode-console" +version = "0.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/89/8d/7aad74930380c8972ab282304a2ff45f3d4927108bb6693cabcc9fc6a099/win_unicode_console-0.5.zip", hash = "sha256:d4142d4d56d46f449d6f00536a73625a871cba040f0bc1a2e305a04578f07d1e", size = 31420, upload-time = "2016-06-25T19:48:54.05Z" } + +[[package]] +name = "xlrd" +version = "2.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/07/5a/377161c2d3538d1990d7af382c79f3b2372e880b65de21b01b1a2b78691e/xlrd-2.0.2.tar.gz", hash = "sha256:08b5e25de58f21ce71dc7db3b3b8106c1fa776f3024c54e45b45b374e89234c9", size = 100167, upload-time = "2025-06-14T08:46:39.039Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1a/62/c8d562e7766786ba6587d09c5a8ba9f718ed3fa8af7f4553e8f91c36f302/xlrd-2.0.2-py2.py3-none-any.whl", hash = "sha256:ea762c3d29f4cca48d82df517b6d89fbce4db3107f9d78713e48cd321d5c9aa9", size = 96555, upload-time = "2025-06-14T08:46:37.766Z" }, +] + +[[package]] +name = "yapf" +version = "0.43.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "platformdirs" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/23/97/b6f296d1e9cc1ec25c7604178b48532fa5901f721bcf1b8d8148b13e5588/yapf-0.43.0.tar.gz", hash = "sha256:00d3aa24bfedff9420b2e0d5d9f5ab6d9d4268e72afbf59bb3fa542781d5218e", size = 254907, upload-time = "2024-11-14T00:11:41.584Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/37/81/6acd6601f61e31cfb8729d3da6d5df966f80f374b78eff83760714487338/yapf-0.43.0-py3-none-any.whl", hash = "sha256:224faffbc39c428cb095818cf6ef5511fdab6f7430a10783fdfb292ccf2852ca", size = 256158, upload-time = "2024-11-14T00:11:39.37Z" }, +]