diff --git a/.github/workflows/citest.yml b/.github/workflows/citest.yml
index 9d57857..5213c21 100644
--- a/.github/workflows/citest.yml
+++ b/.github/workflows/citest.yml
@@ -10,13 +10,13 @@ on:
jobs:
test-oldpython:
- runs-on: "ubuntu-20.04"
+ runs-on: "ubuntu-22.04"
steps:
- uses: actions/checkout@v4
- - name: Set up Python 3.5
+ - name: Set up Python 3.7
uses: actions/setup-python@v5
with:
- python-version: "3.5"
+ python-version: "3.7"
env:
PIP_TRUSTED_HOST: "pypi.python.org pypi.org files.pythonhosted.org"
# workaround for certificate incompatibility
diff --git a/.github/workflows/copilot-metrics.yml b/.github/workflows/copilot-metrics.yml
new file mode 100644
index 0000000..1068044
--- /dev/null
+++ b/.github/workflows/copilot-metrics.yml
@@ -0,0 +1,105 @@
+name: Copilot Metrics Collector
+
+on:
+ schedule:
+ # Runs every day at 06:00 UTC (after GitHub's nightly data refresh)
+ - cron: "0 6 * * *"
+ workflow_dispatch:
+ # Also allows manual triggering from the Actions tab
+
+permissions:
+ contents: write # needed to commit the updated metrics file back to the repo
+
+jobs:
+ fetch-metrics:
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3"
+
+ - name: Fetch Copilot usage metrics
+ env:
+ COPILOT_METRICS_TOKEN: ${{ secrets.COPILOT_METRICS_TOKEN }}
+ ORG: DewDropstempest
+ run: |
+ python - <<'PYEOF'
+ import json
+ import os
+ import urllib.request
+ import urllib.error
+ from datetime import datetime, timezone
+
+ token = os.environ["COPILOT_METRICS_TOKEN"]
+ org = os.environ["ORG"]
+
+ url = f"https://api.github.com/orgs/{org}/copilot/usage"
+
+ req = urllib.request.Request(
+ url,
+ headers={
+ "Authorization": f"Bearer {token}",
+ "Accept": "application/vnd.github+json",
+ "X-GitHub-Api-Version": "2022-11-28",
+ },
+ )
+
+ try:
+ with urllib.request.urlopen(req) as resp:
+ data = json.loads(resp.read().decode())
+ except urllib.error.HTTPError as e:
+ body = e.read().decode()
+ print(f"HTTP {e.code}: {body}")
+ raise SystemExit(f"Failed to fetch metrics: {e.code}")
+
+ # ── Normalise field names ──────────────────────────────────────────
+ # The API returns either the v1 shape (total_suggestions_count, etc.)
+ # or the v2 shape (totals.suggestions, etc.). We normalise to v1.
+ normalised = []
+ for day in data:
+ entry = {"day": day.get("day", day.get("date", "unknown"))}
+ if "total_suggestions_count" in day:
+ # v1 shape – already in the format we want
+ entry["total_active_users"] = day.get("total_active_users", 0)
+ entry["total_suggestions_count"] = day.get("total_suggestions_count", 0)
+ entry["total_acceptances_count"] = day.get("total_acceptances_count", 0)
+ entry["total_lines_suggested"] = day.get("total_lines_suggested", 0)
+ entry["total_lines_accepted"] = day.get("total_lines_accepted", 0)
+ elif "copilot_ide_code_completions" in day:
+ # v2 / newer shape
+ cc = day.get("copilot_ide_code_completions") or {}
+ entry["total_active_users"] = day.get("total_active_users", 0)
+ entry["total_suggestions_count"] = cc.get("total_suggestions_count", 0)
+ entry["total_acceptances_count"] = cc.get("total_acceptances_count", 0)
+ entry["total_lines_suggested"] = cc.get("total_lines_suggested", 0)
+ entry["total_lines_accepted"] = cc.get("total_lines_accepted", 0)
+ else:
+ # Fallback – store whatever we got so the dashboard can adapt
+ entry.update({k: v for k, v in day.items() if k != "breakdown"})
+ normalised.append(entry)
+
+ out_path = "copilot-dashboard/data/metrics.json"
+ os.makedirs(os.path.dirname(out_path), exist_ok=True)
+
+ with open(out_path, "w") as f:
+ json.dump(normalised, f, indent=2)
+
+ print(f"Wrote {len(normalised)} day(s) of metrics to {out_path}")
+ PYEOF
+
+ - name: Commit updated metrics
+ run: |
+ git config user.name "github-actions[bot]"
+ git config user.email "github-actions[bot]@users.noreply.github.com"
+ git add copilot-dashboard/data/metrics.json
+ if git diff --cached --quiet; then
+ echo "No changes to metrics data – skipping commit."
+ else
+ git commit -m "chore: update Copilot metrics [skip ci]"
+ git push
+ fi
diff --git a/README.md b/README.md
index e0eb817..48ece58 100644
--- a/README.md
+++ b/README.md
@@ -4,9 +4,9 @@ publicsuffixlist
[Public Suffix List](https://publicsuffix.org/) parser implementation for
Python 3.5+.
-- Compliant with [TEST DATA](https://raw.githubusercontent.com/publicsuffix/list/master/tests/test_psl.txt)
+- Compliant with [TEST DATA](https://raw.githubusercontent.com/publicsuffix/list/master/tests/test_psl.txt).
- Supports IDN (unicode and punycoded).
-- Supports Python3.5+
+- Supports Python3.5+.
- Shipped with built-in PSL and an updater script.
- Written in Pure Python with no library dependencies.
@@ -33,11 +33,11 @@ from publicsuffixlist import PublicSuffixList
psl = PublicSuffixList()
# Uses built-in PSL file
-print(psl.publicsuffix("www.example.com")) # "com"
-# the longest public suffix part
+print(psl.publicsuffix("www.example.com")) # "com"
+# The longest public suffix part
-print(psl.privatesuffix("www.example.com")) # "example.com"
-# the shortest domain assigned for a registrant
+print(psl.privatesuffix("www.example.com")) # "example.com"
+# The shortest domain assigned for a registrant
print(psl.privatesuffix("com")) # None
# Returns None if no private (non-public) part found
@@ -45,7 +45,7 @@ print(psl.privatesuffix("com")) # None
print(psl.publicsuffix("www.example.unknownnewtld")) # "unknownnewtld"
# New TLDs are valid public suffix by default
-print(psl.publicsuffix("www.example.香港")) #"香港"
+print(psl.publicsuffix("www.example.香港")) # "香港"
# Accepts unicode
print(psl.publicsuffix("www.example.xn--j6w193g")) # "xn--j6w193g"
@@ -54,7 +54,7 @@ print(psl.publicsuffix("www.example.xn--j6w193g")) # "xn--j6w193g"
print(psl.privatesuffix("WWW.EXAMPLE.COM")) # "example.com"
# Returns in lowercase by default
-print(psl.privatesuffix("WWW.EXAMPLE.COM", keep_case=True) # "EXAMPLE.COM"
+print(psl.privatesuffix("WWW.EXAMPLE.COM", keep_case=True)) # "EXAMPLE.COM"
# kwarg `keep_case=True` to disable the case conversion
```
@@ -75,8 +75,8 @@ $ python -m publicsuffixlist.update
Additional convenient methods:
```python
-print(psl.is_private("example.com")) # True
-print(psl.is_public("example.com")) # False
+print(psl.is_private("example.com")) # True
+print(psl.is_public("example.com")) # False
print(psl.privateparts("aaa.www.example.com")) # ("aaa", "www", "example.com")
print(psl.subdomain("aaa.www.example.com", depth=1)) # "www.example.com"
```
@@ -86,7 +86,7 @@ Limitation
#### Domain Label Validation
-`publicsuffixlist` do NOT provide domain name and label validation.
+`publicsuffixlist` does NOT provide domain name and label validation.
In the DNS protocol, most 8-bit characters are acceptable as labels of domain
names. While ICANN-compliant registries do not accept domain names containing
underscores (_), hostnames may include them. For example, DMARC records can
@@ -97,7 +97,7 @@ based on their specific context.
Partially encoded (Unicode-mixed) Punycode is not supported due to very slow
Punycode encoding/decoding and unpredictable encoding results. If you are
unsure whether an input is valid Punycode, you should use:
-`unknowndomain.encode("idna").decode("ascii")`. This method, converting to idna
+`unknowndomain.encode("idna").decode("ascii")`. This method, converting to IDNA
is idempotent.
#### Handling Arbitrary Binary
@@ -106,13 +106,13 @@ tuple of bytes. Note that the returned bytes may include byte patterns that
cannot be decoded or represented as a standard domain name.
Example:
```python
-psl.privatesuffix((b"a.a", b"a.example\xff", b"com")) # (b"a.example\xff", b"com")
+psl.privatesuffix((b"a.a", b"a.example\xff", b"com")) # (b"a.example\xff", b"com")
# Note that IDNs must be punycoded when passed as tuple of bytes.
psl = PublicSuffixList("例.example")
-psl.publicsuffix((b"xn--fsq", b"example")) # (b"xn--fsq", b"example")
+psl.publicsuffix((b"xn--fsq", b"example")) # (b"xn--fsq", b"example")
# UTF-8 encoded bytes of "例" do not match.
-psl.publicsuffix((b"\xe4\xbe\x8b", b"example")) # (b"example",)
+psl.publicsuffix((b"\xe4\xbe\x8b", b"example")) # (b"example",)
```
License
diff --git a/copilot-dashboard/README.md b/copilot-dashboard/README.md
new file mode 100644
index 0000000..7bce5fa
--- /dev/null
+++ b/copilot-dashboard/README.md
@@ -0,0 +1,84 @@
+# Copilot Metrics Dashboard
+
+A self-hosted dashboard that pulls GitHub Copilot usage data daily and
+displays it as interactive charts — no third-party service required.
+
+---
+
+## How it works
+
+1. A GitHub Actions workflow (`copilot-metrics.yml`) runs every day at 06:00 UTC.
+2. It calls the [GitHub Copilot Usage API](https://docs.github.com/en/rest/copilot/copilot-usage) and saves the results to `data/metrics.json`.
+3. The dashboard (`index.html`) reads that JSON file and renders charts automatically.
+
+---
+
+## ✅ One-time setup (the only manual step)
+
+You need to create a secret called **`COPILOT_METRICS_TOKEN`** in this repository.
+
+### Step 1 — Create a Personal Access Token
+
+1. Go to **GitHub.com → your profile → Settings → Developer settings → Personal access tokens → Fine-grained tokens**
+2. Click **"Generate new token"**
+3. Set:
+ - **Token name**: `copilot-metrics-dashboard` (or anything you like)
+ - **Resource owner**: `DewDropstempest` (the organization)
+ - **Repository access**: `Only select repositories` → pick this repo
+ - **Permissions → Organization permissions → GitHub Copilot Business → Access: Read-only**
+4. Click **"Generate token"** and **copy the token value** (you only see it once)
+
+> If Fine-grained tokens don't show a Copilot permission yet, create a **Classic token** with the `manage_billing:copilot` scope instead.
+
+### Step 2 — Add the secret to this repository
+
+1. Go to **this repository → Settings → Secrets and variables → Actions**
+2. Click **"New repository secret"**
+3. Name: `COPILOT_METRICS_TOKEN`
+4. Value: paste the token you just copied
+5. Click **"Add secret"**
+
+That's it — you're done!
+
+---
+
+## Viewing the dashboard
+
+### Option A — GitHub Pages (recommended, zero extra cost)
+
+1. Go to **this repository → Settings → Pages**
+2. Under **"Source"** choose **"Deploy from a branch"**
+3. Branch: `main` (or whichever branch this code is on), Folder: `/copilot-dashboard`
+4. Click **Save**
+
+Your dashboard will be live at:
+```
+https://DewDropstempest.github.io/psl/
+```
+
+### Option B — Open locally
+
+Just open `copilot-dashboard/index.html` in any modern browser (after the first
+workflow run has populated `data/metrics.json`).
+
+---
+
+## Triggering the first run
+
+Don't want to wait until 06:00 UTC? Run it now:
+
+1. Go to **this repository → Actions → Copilot Metrics Collector**
+2. Click **"Run workflow" → Run workflow**
+
+The `data/metrics.json` file will be committed automatically and the dashboard will show data within a minute or two.
+
+---
+
+## Troubleshooting
+
+| Symptom | Likely cause | Fix |
+|---------|-------------|-----|
+| Workflow fails with `HTTP 403` | Token missing or wrong scope | Re-check Step 1 — ensure Copilot read permission is set |
+| Workflow succeeds but dashboard shows "No data yet" | API returned an empty array | Make sure Copilot is enabled for the org and at least one seat is active |
+| Workflow fails with `HTTP 404` | Org doesn't have Copilot Business/Enterprise | Purchase a Copilot plan for the org |
+| Dashboard shows stale data | Workflow hasn't run yet today | Trigger manually (see above) |
diff --git a/copilot-dashboard/data/metrics.json b/copilot-dashboard/data/metrics.json
new file mode 100644
index 0000000..fe51488
--- /dev/null
+++ b/copilot-dashboard/data/metrics.json
@@ -0,0 +1 @@
+[]
diff --git a/copilot-dashboard/index.html b/copilot-dashboard/index.html
new file mode 100644
index 0000000..9f9844a
--- /dev/null
+++ b/copilot-dashboard/index.html
@@ -0,0 +1,296 @@
+
+
+
+
+
+ Copilot Metrics Dashboard
+
+
+
+
+
+
+
+
+
No data yet
+
+ The dashboard will populate automatically after the first scheduled run of the
+ Copilot Metrics Collector GitHub Actions workflow.
+ You can also trigger it manually: Actions → Copilot Metrics Collector → Run workflow.
+
+
+
+
+
+
+
Peak Active Users
+
—
+
highest single day
+
+
+
Total Suggestions
+
—
+
across all tracked days
+
+
+
Total Acceptances
+
—
+
accepted suggestions
+
+
+
Avg Acceptance Rate
+
—
+
acceptances / suggestions
+
+
+
Lines Accepted
+
—
+
total lines of code accepted
+
+
+
+
+
+
Active Users per Day
+
+
+
+
Suggestions vs Acceptances
+
+
+
+
Acceptance Rate (%)
+
+
+
+
Lines Suggested vs Accepted
+
+
+
+
+
+
+
+
diff --git a/publicsuffixlist/test.py b/publicsuffixlist/test.py
index 759efb1..13f337d 100644
--- a/publicsuffixlist/test.py
+++ b/publicsuffixlist/test.py
@@ -4,14 +4,14 @@
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+# file, you can obtain one at http://mozilla.org/MPL/2.0/.
#
import os
import re
import unittest
-from publicsuffixlist import PublicSuffixList, b, encode_idn, u
+from publicsuffixlist import PublicSuffixList, b, decode_idn, encode_idn, u
def bytestuple(x):
return tuple(bytes(x).split(b'.'))
@@ -66,8 +66,8 @@ def test_keepcase(self):
def test_notpermitted_domain(self):
# From the PSL definition, empty labels are not permitted.
- # From the test_psl.txt, leading dot is not permitted.
- # However, it seems most implementations ignore trailing dot.
+ # From the test_psl.txt, a leading dot is not permitted.
+ # However, it seems most implementations ignore the trailing dot.
self.assertEqual(self.psl.suffix(".example.com"), None)
self.assertEqual(self.psl.publicsuffix(".example.com"), None)
@@ -431,6 +431,294 @@ def test_icann(self):
self.assertEqual(psl.publicsuffix("www.example.com"), 'com')
self.assertEqual(psl.publicsuffix("example.priv.at"), 'at')
+ def test_icann_section_detection_custom_source(self):
+ # Only rules inside the ICANN section should be honoured when
+ # only_icann=True. The "private" rule must be ignored.
+ source = (
+ "// ===BEGIN ICANN DOMAINS===\n"
+ "icann\n"
+ "// ===END ICANN DOMAINS===\n"
+ "private\n"
+ )
+ psl = PublicSuffixList(source, only_icann=True)
+ # "icann" rule was in the ICANN section → treated as explicit public suffix
+ self.assertIsNone(psl.privatesuffix("icann"))
+ self.assertEqual(psl.publicsuffix("icann"), "icann")
+ self.assertEqual(psl.publicsuffix("example.icann"), "icann")
+ self.assertEqual(psl.privatesuffix("example.icann"), "example.icann")
+ # "private" rule was outside the ICANN section → falls through to the
+ # unknown-TLD path (accept_unknown=True default), so it still acts as
+ # a public suffix but only by the unknown-TLD rule, not an explicit one.
+ self.assertEqual(psl.publicsuffix("example.private"), "private")
+ self.assertEqual(psl.privatesuffix("example.private"), "example.private")
+
+ def test_icann_no_markers(self):
+ # When the source has no ICANN section markers, only_icann=True means
+ # section_is_icann stays None (falsy) for every line → nothing is
+ # added to the suffix set, so all lookups fall back to accept_unknown.
+ source = "com\nnet\n"
+ psl = PublicSuffixList(source, only_icann=True)
+ # With accept_unknown=True (default) an unknown single-label TLD is
+ # still treated as public, so "example.com" gets a private suffix via
+ # the unknown-TLD path.
+ self.assertEqual(psl.privatesuffix("example.com"), "example.com")
+ # An explicit ICANN entry was never loaded, so "com" itself is treated
+ # as an unknown TLD (public) rather than as an explicitly listed suffix.
+ self.assertIsNone(psl.privatesuffix("com"))
+
+ def test_icann_private_domain_excluded(self):
+ # github.io is a private-section entry in the real PSL.
+ # With only_icann=True it should not be honoured as a public suffix,
+ # so "pages.github.io" should have a private suffix (via unknown-TLD
+ # fallback) rather than returning None.
+ psl = PublicSuffixList(only_icann=True)
+ result = psl.privatesuffix("pages.github.io")
+ self.assertIsNotNone(result)
+
+
+class TestHelpers(unittest.TestCase):
+
+ def test_u_function(self):
+ # bytes → str
+ self.assertEqual(u(b"hello"), "hello")
+ self.assertIsInstance(u(b"hello"), str)
+ # str passthrough
+ self.assertEqual(u("hello"), "hello")
+ self.assertIsInstance(u("hello"), str)
+
+ def test_b_function(self):
+ # str → bytes
+ self.assertEqual(b("hello"), b"hello")
+ self.assertIsInstance(b("hello"), bytes)
+ # bytes passthrough
+ self.assertEqual(b(b"hello"), b"hello")
+ self.assertIsInstance(b(b"hello"), bytes)
+ # bytearray → bytes
+ self.assertEqual(b(bytearray(b"hello")), b"hello")
+ self.assertIsInstance(b(bytearray(b"hello")), bytes)
+
+ def test_encode_idn(self):
+ result = encode_idn(u("例.jp"))
+ # Must be pure ASCII (punycode)
+ result.encode("ascii")
+ self.assertIn("jp", result)
+ self.assertNotIn("例", result)
+
+ def test_decode_idn(self):
+ original = u("例.jp")
+ encoded = encode_idn(original)
+ self.assertEqual(decode_idn(encoded), original)
+
+ def test_decode_idn_invalid(self):
+ # Invalid punycode must raise UnicodeError rather than silently produce
+ # a meaningless result.
+ self.assertRaises(UnicodeError, lambda: decode_idn("xn--invalid-punycode-zzzzzz.jp"))
+
+
+class TestConstructorOptions(unittest.TestCase):
+
+ def test_accept_encoded_idn_false(self):
+ # With accept_encoded_idn=False the punycode variant of an IDN rule is
+ # NOT added to the suffix set, so a punycoded domain that matches the
+ # IDN rule should fall back to the unknown-TLD path instead of being
+ # treated as a known public suffix in the usual way.
+ source = u("例.jp\n")
+ psl_with = PublicSuffixList(source, accept_encoded_idn=True)
+ psl_without = PublicSuffixList(source, accept_encoded_idn=False)
+
+ puny_tld = encode_idn(u("例.jp")) # e.g. "xn--fsq.jp"
+ domain = "test." + puny_tld # e.g. "test.xn--fsq.jp"
+
+ # With encoding enabled the punycoded rule is loaded → private suffix
+ # has exactly one private label.
+ self.assertEqual(psl_with.privatesuffix(domain), domain)
+
+ # Without encoding the punycoded rule is absent → only the base "jp"
+ # rule (via unknown TLD fallback or explicit jp) applies, so the
+ # private suffix includes more of the domain.
+ result_without = psl_without.privatesuffix(domain)
+ self.assertNotEqual(result_without, domain)
+
+
+class TestIsPrivatePublicEdgeCases(unittest.TestCase):
+
+ def setUp(self):
+ self.psl = PublicSuffixList()
+
+ def test_is_private_invalid_domain(self):
+ self.assertFalse(self.psl.is_private(".bad"))
+ self.assertFalse(self.psl.is_private(""))
+ self.assertFalse(self.psl.is_private("www..invalid"))
+
+ def test_is_public_invalid_domain(self):
+ self.assertFalse(self.psl.is_public(".bad"))
+ self.assertFalse(self.psl.is_public(""))
+ self.assertFalse(self.psl.is_public("www..invalid"))
+
+ def test_is_private_unknown_tld(self):
+ # Two-label domain under unknown TLD → private (registrable)
+ self.assertTrue(self.psl.is_private("example.unknowntld"))
+ # Three-label domain under unknown TLD → still private
+ self.assertTrue(self.psl.is_private("sub.example.unknowntld"))
+
+ def test_is_public_unknown_tld(self):
+ # Single unknown TLD → public
+ self.assertTrue(self.psl.is_public("unknowntld"))
+ # Two-label domain under unknown TLD → not public
+ self.assertFalse(self.psl.is_public("example.unknowntld"))
+
+ def test_is_private_is_public_trailing_dot(self):
+ # Trailing dot is ignored; domain reduces to valid form
+ self.assertTrue(self.psl.is_private("example.com."))
+ self.assertFalse(self.psl.is_public("example.com."))
+
+ def test_is_public_known_tld(self):
+ self.assertTrue(self.psl.is_public("com"))
+ self.assertTrue(self.psl.is_public("co.jp"))
+ self.assertFalse(self.psl.is_public("example.com"))
+
+
+class TestPrivatepartsBytestuple(unittest.TestCase):
+
+ def setUp(self):
+ self.psl = PublicSuffixList()
+
+ def test_privateparts_bytestuple_basic(self):
+ data = (b"www", b"example", b"com")
+ result = self.psl.privateparts(data)
+ # subdomain labels + private suffix tuple
+ self.assertEqual(result, (b"www", (b"example", b"com")))
+
+ def test_privateparts_bytestuple_no_subdomain(self):
+ data = (b"example", b"com")
+ result = self.psl.privateparts(data)
+ self.assertEqual(result, ((b"example", b"com"),))
+
+ def test_privateparts_bytestuple_keepcase(self):
+ data = (b"Www", b"Example", b"Com")
+ result = self.psl.privateparts(data, keep_case=True)
+ self.assertEqual(result, (b"Www", (b"Example", b"Com")))
+
+ def test_privateparts_bytestuple_none(self):
+ # public suffix only → no private part
+ data = (b"com",)
+ self.assertIsNone(self.psl.privateparts(data))
+
+
+class TestSubdomainBytestuple(unittest.TestCase):
+
+ def setUp(self):
+ self.psl = PublicSuffixList()
+
+ def test_subdomain_bytestuple_depth0(self):
+ data = (b"aaa", b"www", b"example", b"com")
+ result = self.psl.subdomain(data, depth=0)
+ self.assertEqual(result, (b"example", b"com"))
+
+ def test_subdomain_bytestuple_depth1(self):
+ data = (b"aaa", b"www", b"example", b"com")
+ result = self.psl.subdomain(data, depth=1)
+ self.assertEqual(result, (b"www", b"example", b"com"))
+
+ def test_subdomain_bytestuple_overflow(self):
+ data = (b"example", b"com")
+ # depth=1 requires at least 3 labels (publen=1 + 1 private + 1 sub)
+ self.assertIsNone(self.psl.subdomain(data, depth=1))
+
+ def test_subdomain_bytestuple_public_only(self):
+ data = (b"com",)
+ self.assertIsNone(self.psl.subdomain(data, depth=0))
+
+
+class TestBytearrayTypeError(unittest.TestCase):
+
+ def setUp(self):
+ self.psl = PublicSuffixList()
+
+ def test_bytearray_raises_typeerror(self):
+ self.assertRaises(TypeError, lambda: self.psl.suffix(bytearray(b"example.com")))
+
+ def test_bytearray_publicsuffix_raises_typeerror(self):
+ self.assertRaises(TypeError, lambda: self.psl.publicsuffix(bytearray(b"example.com")))
+
+ def test_bytearray_privatesuffix_raises_typeerror(self):
+ self.assertRaises(TypeError, lambda: self.psl.privatesuffix(bytearray(b"example.com")))
+
+
+class TestWildcardAcceptUnknown(unittest.TestCase):
+
+ def setUp(self):
+ source = "*.bd\n"
+ # accept_unknown=True is the default; test it explicitly as well
+ self.psl = PublicSuffixList(source.splitlines(), accept_unknown=True)
+
+ def test_bare_tld_is_public(self):
+ self.assertEqual(self.psl.publicsuffix("bd"), "bd")
+ self.assertIsNone(self.psl.privatesuffix("bd"))
+
+ def test_one_label_under_wildcard_is_public(self):
+ # "example.bd" matches *.bd → example.bd is public suffix
+ self.assertEqual(self.psl.publicsuffix("example.bd"), "example.bd")
+ self.assertIsNone(self.psl.privatesuffix("example.bd"))
+
+ def test_two_labels_under_wildcard_has_private(self):
+ self.assertEqual(self.psl.publicsuffix("sub.example.bd"), "example.bd")
+ self.assertEqual(self.psl.privatesuffix("sub.example.bd"), "sub.example.bd")
+
+
+class TestCompatEdgeCases(unittest.TestCase):
+
+ def setUp(self):
+ from publicsuffixlist.compat import PublicSuffixList, UnsafePublicSuffixList
+ self.psl = PublicSuffixList()
+ self.upsl = UnsafePublicSuffixList()
+
+ def test_compat_unknown_tld(self):
+ self.assertEqual(self.psl.get_public_suffix("example.unknowntld"), "example.unknowntld")
+
+ def test_compat_invalid_domain(self):
+ self.assertEqual(self.psl.get_public_suffix(".bad"), "")
+
+ def test_compat_empty_string(self):
+ self.assertEqual(self.psl.get_public_suffix(""), "")
+
+ def test_compat_very_long_domain(self):
+ d = "a." * 1000 + "example.com"
+ self.assertEqual(self.psl.get_public_suffix(d), "example.com")
+
+ def test_unsafe_compat_fallback_public_suffix(self):
+ # When privatesuffix is None (e.g. bare TLD), UnsafePublicSuffixList
+ # falls back to returning the publicsuffix instead.
+ self.assertEqual(self.upsl.get_public_suffix("com"), "com")
+
+ def test_unsafe_compat_private_domain(self):
+ self.assertEqual(self.upsl.get_public_suffix("test.example.com"), "example.com")
+
+ def test_unsafe_compat_invalid_domain(self):
+ self.assertEqual(self.upsl.get_public_suffix(".bad"), "")
+
+ def test_unsafe_compat_empty_string(self):
+ self.assertEqual(self.upsl.get_public_suffix(""), "")
+
+
+class TestLargePSLSource(unittest.TestCase):
+
+ def test_many_rules_parsing(self):
+ # Build a PSL with thousands of rules and verify lookups stay correct.
+ lines = ["// ===BEGIN ICANN DOMAINS==="]
+ lines += ["rule{0}.example".format(i) for i in range(500)]
+ lines += ["// ===END ICANN DOMAINS==="]
+ lines += ["com"]
+ source = "\n".join(lines)
+ psl = PublicSuffixList(source)
+ # An explicitly listed suffix should be recognised
+ self.assertIsNone(psl.privatesuffix("rule42.example"))
+ self.assertEqual(psl.publicsuffix("rule42.example"), "rule42.example")
+ # A sub-domain of that rule should be private
+ self.assertEqual(psl.privatesuffix("sub.rule42.example"), "sub.rule42.example")
+ # Standard TLD still works
+ self.assertEqual(psl.privatesuffix("example.com"), "example.com")
+
if __name__ == "__main__":
unittest.main()