Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/daily-update.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,6 @@ jobs:
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add cloud_providers_v2.json README.md
git add cloud_providers_v3.json README.md
git commit -m "chore: daily signature update $(date -u +%Y-%m-%d)"
git push origin HEAD:stable
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "cloudcheck"
version = "9.3.0"
version = "10.0.0"
edition = "2024"
description = "CloudCheck is a simple Rust tool to check whether an IP address or hostname belongs to a cloud provider."
license = "GPL-3.0"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ CloudCheck is a simple Rust tool to check whether an IP address or hostname belo

## Cloud Provider Signatures

The latest cloud provider signatures are available in [`cloud_providers_v2.json`](https://github.com/blacklanternsecurity/cloudcheck/blob/master/cloud_providers_v2.json), which is updated daily via [CI/CD](.github/workflows/daily-update.yml). Domains associated with each cloud provider are fetched dynamically from the [v2fly community repository](https://github.com/v2fly/domain-list-community), and CIDRs are fetched from [ASNDB](https://asndb.api.bbot.io/).
The latest cloud provider signatures are available in [`cloud_providers_v3.json`](https://github.com/blacklanternsecurity/cloudcheck/blob/master/cloud_providers_v3.json), which is updated daily via [CI/CD](.github/workflows/daily-update.yml). Domains associated with each cloud provider are fetched dynamically from the [v2fly community repository](https://github.com/v2fly/domain-list-community), and CIDRs are fetched from [ASNDB](https://asndb.api.bbot.io/).

Used by [BBOT](https://github.com/blacklanternsecurity/bbot) and [BBOT Server](https://github.com/blacklanternsecurity/bbot-server).

Expand Down
17 changes: 15 additions & 2 deletions cloudcheck/helpers.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import ipaddress
import os
import sys
import httpx
from pathlib import Path
from typing import List, Set, Union

_warned_missing_api_key = False


def defrag_cidrs(
cidrs: List[Union[ipaddress.IPv4Network, ipaddress.IPv6Network]],
Expand Down Expand Up @@ -202,12 +205,22 @@ def strings_to_cidrs(


def request(url, include_api_key=False, browser_headers=False, timeout=60, **kwargs):
global _warned_missing_api_key
headers = kwargs.get("headers", {})
if browser_headers:
headers.update(browser_base_headers)
bbot_io_api_key = os.getenv("BBOT_IO_API_KEY")
if include_api_key and bbot_io_api_key:
headers["Authorization"] = f"Bearer {bbot_io_api_key}"
if include_api_key:
if bbot_io_api_key:
headers["Authorization"] = f"Bearer {bbot_io_api_key}"
elif not _warned_missing_api_key:
_warned_missing_api_key = True
print(
"WARNING: BBOT_IO_API_KEY env var is not set; asndb requests will be "
"unauthenticated and may be rate-limited. Export BBOT_IO_API_KEY before "
"running the update.",
file=sys.stderr,
)
kwargs["headers"] = headers
kwargs["timeout"] = timeout
kwargs.setdefault("follow_redirects", True)
Expand Down
15 changes: 13 additions & 2 deletions cloudcheck/providers/amazon.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,21 @@ class Amazon(BaseProvider):
]
tags: List[str] = ["cloud"]
_bucket_name_regex = r"[a-z0-9_][a-z0-9-\.]{1,61}[a-z0-9]"
_region_regex = r"[a-z]{2}-[a-z]+-\d+"
regexes: Dict[str, List[str]] = {
"STORAGE_BUCKET_NAME": [_bucket_name_regex],
"STORAGE_BUCKET_NAME": [r"(?P<name>" + _bucket_name_regex + r")"],
"STORAGE_BUCKET_HOSTNAME": [
r"(" + _bucket_name_regex + r")\.(s3-?(?:[a-z0-9-]*\.){1,2}amazonaws\.com)"
r"(?P<name>" + _bucket_name_regex + r")\.s3\.amazonaws\.com",
r"(?P<name>"
+ _bucket_name_regex
+ r")\.s3-(?P<region>"
+ _region_regex
+ r")\.amazonaws\.com",
r"(?P<name>"
+ _bucket_name_regex
+ r")\.s3\.(?P<region>"
+ _region_regex
+ r")\.amazonaws\.com",
],
}

Expand Down
3 changes: 2 additions & 1 deletion cloudcheck/providers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ def __init__(self, **data):
self._cache_dir = Path.home() / ".cache" / "cloudcheck"
self._repo_url = "https://github.com/v2fly/domain-list-community.git"
self._asndb_url = os.getenv("ASNDB_URL", "https://asndb.api.bbot.io/v1")
self._bbot_io_api_key = os.getenv("BBOT_IO_API_KEY")

def update(self):
print(f"Updating {self.name}")
Expand Down Expand Up @@ -184,6 +183,7 @@ def _fetch_org_id(self, org_id: str):
print(f"Fetching {url}")
res = self.request(url, include_api_key=True)
print(f"{url} -> {res}: {res.text}")
res.raise_for_status()
j = res.json()
return [a["asn"] for a in j.get("asns", [])], []
except Exception as e:
Expand Down Expand Up @@ -242,6 +242,7 @@ def fetch_asn(
try:
res = self.request(url, include_api_key=True)
print(f"{url} -> {res.text}")
res.raise_for_status()
j = res.json()
cidrs = j.get("subnets", [])
except Exception as e:
Expand Down
6 changes: 3 additions & 3 deletions cloudcheck/providers/cloudflare.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ class Cloudflare(BaseProvider):
]
_bucket_name_regex = r"[a-z0-9_][a-z0-9-\.]{1,61}[a-z0-9]"
regexes: Dict[str, List[str]] = {
"STORAGE_BUCKET_NAME": [_bucket_name_regex],
"STORAGE_BUCKET_NAME": [r"(?P<name>" + _bucket_name_regex + r")"],
"STORAGE_BUCKET_HOSTNAME": [
r"(" + _bucket_name_regex + r")\.(r2\.dev)",
r"(" + _bucket_name_regex + r")\.(r2\.cloudflarestorage\.com)",
r"(?P<name>" + _bucket_name_regex + r")\.r2\.dev",
r"(?P<name>" + _bucket_name_regex + r")\.r2\.cloudflarestorage\.com",
],
}

Expand Down
9 changes: 7 additions & 2 deletions cloudcheck/providers/digitalocean.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,15 @@ class DigitalOcean(BaseProvider):
"DO-13-ARIN",
]
_bucket_name_regex = r"[a-z0-9][a-z0-9-]{2,62}"
_region_regex = r"[a-z]{3}\d"
regexes: Dict[str, List[str]] = {
"STORAGE_BUCKET_NAME": [_bucket_name_regex],
"STORAGE_BUCKET_NAME": [r"(?P<name>" + _bucket_name_regex + r")"],
"STORAGE_BUCKET_HOSTNAME": [
r"(" + _bucket_name_regex + r")\.([a-z]{3}[\d]{1}\.digitaloceanspaces\.com)"
r"(?P<name>"
+ _bucket_name_regex
+ r")\.(?P<region>"
+ _region_regex
+ r")\.digitaloceanspaces\.com"
],
}

Expand Down
4 changes: 3 additions & 1 deletion cloudcheck/providers/gocache.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
class Gocache(BaseProvider):
tags: List[str] = ["cdn"]
short_description: str = "GoCache"
long_description: str = "A Brazilian content delivery network provider offering CDN services."
long_description: str = (
"A Brazilian content delivery network provider offering CDN services."
)

_ips_url = "https://gocache.com.br/ips"

Expand Down
9 changes: 6 additions & 3 deletions cloudcheck/providers/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,13 @@ class Google(BaseProvider):
_bucket_name_regex = r"[a-z0-9][a-z0-9-_\.]{1,61}[a-z0-9]"
_firebase_bucket_name_regex = r"[a-z0-9][a-z0-9-\.]{1,61}[a-z0-9]"
regexes: Dict[str, List[str]] = {
"STORAGE_BUCKET_NAME": [_bucket_name_regex, _firebase_bucket_name_regex],
"STORAGE_BUCKET_NAME": [
r"(?P<name>" + _bucket_name_regex + r")",
r"(?P<name>" + _firebase_bucket_name_regex + r")",
],
"STORAGE_BUCKET_HOSTNAME": [
r"(" + _firebase_bucket_name_regex + r")\.(firebaseio\.com)",
r"(" + _bucket_name_regex + r")\.(storage\.googleapis\.com)",
r"(?P<name>" + _firebase_bucket_name_regex + r")\.firebaseio\.com",
r"(?P<name>" + _bucket_name_regex + r")\.storage\.googleapis\.com",
],
}

Expand Down
9 changes: 7 additions & 2 deletions cloudcheck/providers/hetzner.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,14 @@ class Hetzner(BaseProvider):
"ORG-HOA1-RIPE",
]
_bucket_name_regex = r"[a-z0-9][a-z0-9-_\.]{1,61}[a-z0-9]"
_region_regex = r"[a-z]{3}\d"
regexes: Dict[str, List[str]] = {
"STORAGE_BUCKET_NAME": [_bucket_name_regex],
"STORAGE_BUCKET_NAME": [r"(?P<name>" + _bucket_name_regex + r")"],
"STORAGE_BUCKET_HOSTNAME": [
r"(" + _bucket_name_regex + r")\.(your-objectstorage\.com)"
r"(?P<name>"
+ _bucket_name_regex
+ r")\.(?P<region>"
+ _region_regex
+ r")\.your-objectstorage\.com"
],
}
4 changes: 2 additions & 2 deletions cloudcheck/providers/microsoft.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ class Microsoft(BaseProvider):
]
_bucket_name_regex = r"[a-z0-9][a-z0-9-_\.]{1,61}[a-z0-9]"
regexes: Dict[str, List[str]] = {
"STORAGE_BUCKET_NAME": [_bucket_name_regex],
"STORAGE_BUCKET_NAME": [r"(?P<name>" + _bucket_name_regex + r")"],
"STORAGE_BUCKET_HOSTNAME": [
r"(" + _bucket_name_regex + r")\.(blob\.core\.windows\.net)"
r"(?P<name>" + _bucket_name_regex + r")\.blob\.core\.windows\.net"
],
}

Expand Down
2 changes: 1 addition & 1 deletion cloudcheck_update/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@


project_root = Path(__file__).parent.parent
json_path = project_root / "cloud_providers_v2.json"
json_path = project_root / "cloud_providers_v3.json"


def _update_provider(provider_class):
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "maturin"

[project]
name = "cloudcheck"
version = "9.3.0"
version = "10.0.0"
description = "Detailed database of cloud providers. Instantly look up a domain or IP address"
readme = "README.md"
requires-python = ">=3.9"
Expand Down
6 changes: 3 additions & 3 deletions scripts/update_readme_table.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
#!/usr/bin/env python3
"""Update README.md with a provider table generated from cloud_providers_v2.json"""
"""Update README.md with a provider table generated from cloud_providers_v3.json"""

import json
import re
from pathlib import Path


def load_providers(json_path: Path):
"""Load providers from cloud_providers_v2.json"""
"""Load providers from cloud_providers_v3.json"""
with open(json_path, "r") as f:
return json.load(f)

Expand Down Expand Up @@ -83,7 +83,7 @@ def update_readme(readme_path: Path, table: str):

def main():
project_root = Path(__file__).parent.parent
json_path = project_root / "cloud_providers_v2.json"
json_path = project_root / "cloud_providers_v3.json"
readme_path = project_root / "README.md"

if not json_path.exists():
Expand Down
4 changes: 2 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use tokio::sync::{Mutex, RwLock};
#[cfg(feature = "py")]
mod python;

const CLOUDCHECK_SIGNATURE_URL: &str = "https://raw.githubusercontent.com/blacklanternsecurity/cloudcheck/refs/heads/stable/cloud_providers_v2.json";
const CLOUDCHECK_SIGNATURE_URL: &str = "https://raw.githubusercontent.com/blacklanternsecurity/cloudcheck/refs/heads/stable/cloud_providers_v3.json";

#[derive(Debug, Clone, Serialize, Deserialize, utoipa::ToSchema)]
pub struct CloudProvider {
Expand Down Expand Up @@ -86,7 +86,7 @@ impl CloudCheck {
let mut path = PathBuf::from(home);
path.push(".cache");
path.push("cloudcheck");
path.push("cloud_providers_v2.json");
path.push("cloud_providers_v3.json");
Ok(path)
}

Expand Down
78 changes: 75 additions & 3 deletions test_cloudcheck.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import re

import pytest
from cloudcheck import CloudCheck, CloudCheckError

Expand Down Expand Up @@ -25,14 +27,84 @@ async def test_lookup_with_invalid_url():
signature_url="https://invalid.example.com/nonexistent.json",
max_retries=2,
retry_delay_seconds=0,
force_refresh=True
force_refresh=True,
)

with pytest.raises(CloudCheckError, match=r"Failed to fetch cloud provider data from https://invalid\.example\.com/nonexistent\.json after 3 attempts"):

with pytest.raises(
CloudCheckError,
match=r"Failed to fetch cloud provider data from https://invalid\.example\.com/nonexistent\.json after 3 attempts",
):
await cloudcheck.lookup("8.8.8.8")


def test_import_provider():
from cloudcheck.providers import Amazon

assert Amazon.regexes


@pytest.mark.parametrize(
"provider_name,hostname,expected",
[
("Amazon", "mybucket.s3.amazonaws.com", {"name": "mybucket"}),
(
"Amazon",
"mybucket.s3-us-west-2.amazonaws.com",
{"name": "mybucket", "region": "us-west-2"},
),
(
"Amazon",
"mybucket.s3.eu-central-1.amazonaws.com",
{"name": "mybucket", "region": "eu-central-1"},
),
(
"DigitalOcean",
"mybucket.nyc3.digitaloceanspaces.com",
{"name": "mybucket", "region": "nyc3"},
),
(
"Hetzner",
"mybucket.fsn1.your-objectstorage.com",
{"name": "mybucket", "region": "fsn1"},
),
("Google", "mybucket.storage.googleapis.com", {"name": "mybucket"}),
("Google", "mybucket.firebaseio.com", {"name": "mybucket"}),
("Microsoft", "mybucket.blob.core.windows.net", {"name": "mybucket"}),
("Cloudflare", "mybucket.r2.dev", {"name": "mybucket"}),
(
"Cloudflare",
"mybucket.r2.cloudflarestorage.com",
{"name": "mybucket"},
),
],
)
def test_storage_bucket_hostname_named_groups(provider_name, hostname, expected):
import cloudcheck.providers as providers

provider_cls = getattr(providers, provider_name)
patterns = provider_cls.regexes["STORAGE_BUCKET_HOSTNAME"]
for pattern in patterns:
match = re.fullmatch(pattern, hostname)
if match:
groups = {k: v for k, v in match.groupdict().items() if v is not None}
assert groups == expected, (
f"{provider_name}: {hostname} matched {pattern} but groups={groups}"
)
return
pytest.fail(
f"{provider_name}: no STORAGE_BUCKET_HOSTNAME pattern matched {hostname}"
)


def test_all_storage_bucket_regexes_compile():
from cloudcheck.providers import load_provider_classes

for provider_cls in load_provider_classes().values():
regexes = provider_cls.model_fields["regexes"].default or {}
for category, patterns in regexes.items():
for pattern in patterns:
compiled = re.compile(pattern)
assert "name" in compiled.groupindex, (
f"{provider_cls.__name__} {category} pattern {pattern!r} "
f"is missing the 'name' named group"
)
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading