Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions .github/workflows/pytest-sanity.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,19 @@ on:
merge_group:
types: [checks_requested]

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
pytest_sanity:
runs-on: ${{ matrix.os }}
permissions:
contents: read
strategy:
max-parallel: 4
max-parallel: 2
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ["3.10", "3.11", "3.12", "3.13"]
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os }}-${{ matrix.python-version }}
cancel-in-progress: true
steps:
- uses: actions/checkout@v4
with:
Expand Down
53 changes: 53 additions & 0 deletions VULNERABILITIES.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Vulnerability Update Guide

Use this file when updating dependency floors in [pyproject.toml](pyproject.toml) to address vulnerability findings.

## Rules

1. Keep the dependency line and its vulnerability note together.
Example:
```toml
"requests>=2.33.0",
# ⬆️ Required to fix vulnerability CVE-2026-25645
```
The up-arrow comment goes immediately below the package line, not above it.

2. Mention only the advisory that necessitates the current minimum version.
If `authlib>=1.6.11` is required because `GHSA-jj8c-mmj3-mmgv` is the first fix at that floor, mention only that advisory.
Do not list older advisories that are also fixed incidentally by the same newer version.

3. Do not keep stale version-history commentary.
Keep only the final package floor and the advisory tied to that floor.

4. Treat major-version upgrades as compatibility risks.
Before keeping a major bump such as `transformers` or `pytest`, add or run tests that exercise the affected public SDK behavior.

5. Prefer the smallest safe version bump that clears the finding.
If a vulnerability is fixed in `46.0.6`, do not cite it as justification for `46.0.7` unless `46.0.7` is required by another active finding.

## Workflow

1. Update the minimum version in `pyproject.toml`.
2. Place the vulnerability comment immediately below the changed dependency line.
3. Recompile the dependency set from `pyproject.toml`.
Example:
```bash
UV_CACHE_DIR=/tmp/uv-cache uv pip compile pyproject.toml --group dev --group docs --group deploy --extra pandas --extra polars --extra transformers -o /tmp/hirundo-all-requirements.txt
```
4. Audit the compiled output.
Example:
```bash
UV_CACHE_DIR=/tmp/uv-cache uv run pip-audit --no-deps --disable-pip -r /tmp/hirundo-all-requirements.txt -f json
```
5. Run validation for compatibility-sensitive changes.
Commands:
```bash
.venv/bin/pytest
.venv/bin/basedpyright
```

## Repo-specific notes

- This repository has many integration-heavy tests that require credentials such as `AWS_ACCESS_KEY`, `GCP_CREDENTIALS`, and `HUGGINGFACE_ACCESS_TOKEN`.
- If full `pytest` is unavailable because credentials are missing, report that explicitly and run any safe targeted tests that cover the changed behavior.
- For `transformers` changes, keep coverage on the Hugging Face pipeline path in [tests/unlearning-llm/llm_pipeline_transformers_test.py](tests/unlearning-llm/llm_pipeline_transformers_test.py).
6 changes: 3 additions & 3 deletions hirundo/_iter_sse_retrying.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
from collections.abc import AsyncGenerator, Generator

import httpx
import urllib3
from httpx_sse import ServerSentEvent, SSEError, aconnect_sse, connect_sse
from stamina import retry
from urllib3.exceptions import ReadTimeoutError

from hirundo._http import requests
from hirundo._timeouts import READ_TIMEOUT
Expand Down Expand Up @@ -41,7 +41,7 @@ def iter_sse_retrying(
on=(
httpx.ReadError,
httpx.RemoteProtocolError,
urllib3.exceptions.ReadTimeoutError,
ReadTimeoutError,
Comment thread
benglewis marked this conversation as resolved.
),
attempts=MAX_RETRIES,
)
Expand Down Expand Up @@ -106,7 +106,7 @@ async def aiter_sse_retrying(
on=(
httpx.ReadError,
httpx.RemoteProtocolError,
urllib3.exceptions.ReadTimeoutError,
ReadTimeoutError,
),
attempts=MAX_RETRIES,
)
Expand Down
28 changes: 5 additions & 23 deletions hirundo/unlearning_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from enum import Enum
from typing import TYPE_CHECKING, Literal, overload

from pydantic import BaseModel, ConfigDict
from pydantic import BaseModel, ConfigDict, Field
from tqdm import tqdm
from tqdm.contrib.logging import logging_redirect_tqdm

Expand Down Expand Up @@ -198,15 +198,6 @@ class UnlearningLlmAdvancedOptions(BaseModel):
max_tokens_for_model: dict[DatasetType, int] | int | None = None


class UtilityType(str, Enum):
DEFAULT = "DEFAULT"
CUSTOM = "CUSTOM"


class DefaultUtility(BaseModel):
utility_type: Literal[UtilityType.DEFAULT] = UtilityType.DEFAULT


class HirundoCSVDataset(BaseModel):
type: Literal["HirundoCSV"] = "HirundoCSV"
csv_url: str
Expand All @@ -221,7 +212,6 @@ class HuggingFaceDataset(BaseModel):


class CustomUtility(BaseModel):
utility_type: Literal[UtilityType.CUSTOM] = UtilityType.CUSTOM
dataset: CustomDataset


Expand Down Expand Up @@ -256,44 +246,36 @@ class CustomBehavior(BaseModel):
BiasBehavior | HallucinationBehavior | SecurityBehavior | CustomBehavior
)

TargetUtility = DefaultUtility | CustomUtility


class LlmRunInfo(BaseModel):
model_config = ConfigDict(protected_namespaces=("model_validate", "model_dump"))

organization_id: int | None = None
name: str | None = None
target_behaviors: list[TargetBehavior]
target_utilities: list[TargetUtility]
target_utilities: list[CustomUtility] = Field(default_factory=list)
advanced_options: UnlearningLlmAdvancedOptions | None = None


class BiasRunInfo(BaseModel):
bias_type: BBQBiasType
organization_id: int | None = None
name: str | None = None
target_utilities: list[TargetUtility] | None = None
target_utilities: list[CustomUtility] = Field(default_factory=list)
advanced_options: UnlearningLlmAdvancedOptions | None = None

def to_run_info(self) -> LlmRunInfo:
default_utilities: list[TargetUtility] = (
[DefaultUtility()]
if self.target_utilities is None
else list(self.target_utilities)
)
return LlmRunInfo(
organization_id=self.organization_id,
name=self.name,
target_behaviors=[BiasBehavior(bias_type=self.bias_type)],
target_utilities=default_utilities,
target_utilities=self.target_utilities,
advanced_options=self.advanced_options,
)


OutputLlm = dict[str, object]
BehaviorOptions = TargetBehavior
UtilityOptions = TargetUtility
CeleryTaskState = str


Expand All @@ -305,7 +287,7 @@ class OutputUnlearningLlmRun(BaseModel):
model_id: int
model: OutputLlm
target_behaviors: list[BehaviorOptions]
target_utilities: list[UtilityOptions]
target_utilities: list[CustomUtility]
Comment thread
benglewis marked this conversation as resolved.
advanced_options: UnlearningLlmAdvancedOptions | None
run_id: str
mlflow_run_id: str | None
Expand Down
33 changes: 21 additions & 12 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,20 @@ requires-python = ">= 3.10"
dependencies = [
"pyyaml>=6.0.1",
"types-PyYAML>=6.0.12",
"pydantic>=2.7.1",
"python-dotenv>=1.0.1",
"pydantic>=2.13.0",
# ⬆️ Required for Python 3.14 support
"python-dotenv>=1.2.2",
# ⬆️ Required to fix vulnerability CVE-2026-28684
"types-requests>=2.31.0",
"typer>=0.12.3",
"httpx>=0.27.0",
"stamina>=24.2.0",
"httpx-sse>=0.4.0",
"tqdm>=4.66.5",
"h11>=0.16.0",
"requests>=2.33.0",
# ⬆️ Required to fix vulnerability GHSA-vqfr-h8mv-ghfj
"requests>=2.32.4",
# ⬆️ Required to fix vulnerability GHSA-9hjg-9r4m-mvj7
# ⬆️ Required to fix vulnerability CVE-2026-25645
"urllib3>=2.6.3",
# ⬆️ Required to fix vulnerability CVE-2026-21441
"setuptools>=78.1.1",
Expand All @@ -51,28 +53,34 @@ Homepage = "https://github.com/Hirundo-io/hirundo-python-sdk"
[project.optional-dependencies]
pandas = ["pandas>=2.2.3"]
polars = ["polars>=1.0.0"]
transformers = ["transformers>=4.57.3", "peft>=0.18.1", "accelerate>=1.12.0"]
transformers = [
"transformers>=5.0.0rc3",
# ⬆️ Required to fix vulnerability CVE-2026-1839
"peft>=0.18.1",
"accelerate>=1.12.0",
]

[dependency-groups]
dev = [
"hirundo[pandas,polars,transformers]",
"numpy>=2.1.3; sys_platform == 'darwin'",
# ⬆️ Ensure macOS runners resolve a NumPy version with prebuilt wheels in `pytest_sanity`.
"types-setuptools>=69.5.0",
"pytest>=8.2.0",
"pytest>=9.0.3",
# ⬆️ Required to fix vulnerability CVE-2025-71176
"pytest-asyncio>=0.23.6",
"uv>=0.9.29",
"uv>=0.11.6",
# ⬆️ Required to fix vulnerability GHSA-pjjw-68hj-v9mw
"pre-commit>=3.7.1",
"basedpyright==1.37.1",
"virtualenv>=20.36.1",
# ⬆️ Needed for `pre-commit` version fix for vulnerability GHSA-rqc4-2hc7-8c8v
"authlib>=1.6.6",
# ⬆️ Required to fix vulnerability CVE-2025-68158
"authlib>=1.6.11",
# ⬆️ Required to fix vulnerability GHSA-jj8c-mmj3-mmgv
"ruff>=0.12.0",
"bumpver>=2025.1131",
"platformdirs>=4.3.6",
"cryptography>=46.0.5",
# ⬆️ Required to fix vulnerability CVE-2026-26007
"cryptography>=46.0.7",
# ⬆️ Required to fix vulnerability CVE-2026-39892
"jinja2>=3.1.6",
# ⬆️ Required to fix vulnerabilities GHSA-cpwx-vrp4-4pq7 , GHSA-gmj6-6f8f-6699 & GHSA-q2x7-8rv6-6q7h
"filelock>=3.20.3",
Expand Down Expand Up @@ -204,4 +212,5 @@ exclude = [
"private",
"notebooks",
".venv",
"build",
]
Loading
Loading