diff --git a/.github/workflows/citest.yml b/.github/workflows/citest.yml index 9d57857..5213c21 100644 --- a/.github/workflows/citest.yml +++ b/.github/workflows/citest.yml @@ -10,13 +10,13 @@ on: jobs: test-oldpython: - runs-on: "ubuntu-20.04" + runs-on: "ubuntu-22.04" steps: - uses: actions/checkout@v4 - - name: Set up Python 3.5 + - name: Set up Python 3.7 uses: actions/setup-python@v5 with: - python-version: "3.5" + python-version: "3.7" env: PIP_TRUSTED_HOST: "pypi.python.org pypi.org files.pythonhosted.org" # workaround for certificate incompatibility diff --git a/.github/workflows/copilot-metrics.yml b/.github/workflows/copilot-metrics.yml new file mode 100644 index 0000000..1068044 --- /dev/null +++ b/.github/workflows/copilot-metrics.yml @@ -0,0 +1,105 @@ +name: Copilot Metrics Collector + +on: + schedule: + # Runs every day at 06:00 UTC (after GitHub's nightly data refresh) + - cron: "0 6 * * *" + workflow_dispatch: + # Also allows manual triggering from the Actions tab + +permissions: + contents: write # needed to commit the updated metrics file back to the repo + +jobs: + fetch-metrics: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3" + + - name: Fetch Copilot usage metrics + env: + COPILOT_METRICS_TOKEN: ${{ secrets.COPILOT_METRICS_TOKEN }} + ORG: DewDropstempest + run: | + python - <<'PYEOF' + import json + import os + import urllib.request + import urllib.error + from datetime import datetime, timezone + + token = os.environ["COPILOT_METRICS_TOKEN"] + org = os.environ["ORG"] + + url = f"https://api.github.com/orgs/{org}/copilot/usage" + + req = urllib.request.Request( + url, + headers={ + "Authorization": f"Bearer {token}", + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + }, + ) + + try: + with urllib.request.urlopen(req) as resp: + data = json.loads(resp.read().decode()) + except urllib.error.HTTPError as e: + body = e.read().decode() + print(f"HTTP {e.code}: {body}") + raise SystemExit(f"Failed to fetch metrics: {e.code}") + + # ── Normalise field names ────────────────────────────────────────── + # The API returns either the v1 shape (total_suggestions_count, etc.) + # or the v2 shape (totals.suggestions, etc.). We normalise to v1. + normalised = [] + for day in data: + entry = {"day": day.get("day", day.get("date", "unknown"))} + if "total_suggestions_count" in day: + # v1 shape – already in the format we want + entry["total_active_users"] = day.get("total_active_users", 0) + entry["total_suggestions_count"] = day.get("total_suggestions_count", 0) + entry["total_acceptances_count"] = day.get("total_acceptances_count", 0) + entry["total_lines_suggested"] = day.get("total_lines_suggested", 0) + entry["total_lines_accepted"] = day.get("total_lines_accepted", 0) + elif "copilot_ide_code_completions" in day: + # v2 / newer shape + cc = day.get("copilot_ide_code_completions") or {} + entry["total_active_users"] = day.get("total_active_users", 0) + entry["total_suggestions_count"] = cc.get("total_suggestions_count", 0) + entry["total_acceptances_count"] = cc.get("total_acceptances_count", 0) + entry["total_lines_suggested"] = cc.get("total_lines_suggested", 0) + entry["total_lines_accepted"] = cc.get("total_lines_accepted", 0) + else: + # Fallback – store whatever we got so the dashboard can adapt + entry.update({k: v for k, v in day.items() if k != "breakdown"}) + normalised.append(entry) + + out_path = "copilot-dashboard/data/metrics.json" + os.makedirs(os.path.dirname(out_path), exist_ok=True) + + with open(out_path, "w") as f: + json.dump(normalised, f, indent=2) + + print(f"Wrote {len(normalised)} day(s) of metrics to {out_path}") + PYEOF + + - name: Commit updated metrics + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add copilot-dashboard/data/metrics.json + if git diff --cached --quiet; then + echo "No changes to metrics data – skipping commit." + else + git commit -m "chore: update Copilot metrics [skip ci]" + git push + fi diff --git a/README.md b/README.md index e0eb817..48ece58 100644 --- a/README.md +++ b/README.md @@ -4,9 +4,9 @@ publicsuffixlist [Public Suffix List](https://publicsuffix.org/) parser implementation for Python 3.5+. -- Compliant with [TEST DATA](https://raw.githubusercontent.com/publicsuffix/list/master/tests/test_psl.txt) +- Compliant with [TEST DATA](https://raw.githubusercontent.com/publicsuffix/list/master/tests/test_psl.txt). - Supports IDN (unicode and punycoded). -- Supports Python3.5+ +- Supports Python3.5+. - Shipped with built-in PSL and an updater script. - Written in Pure Python with no library dependencies. @@ -33,11 +33,11 @@ from publicsuffixlist import PublicSuffixList psl = PublicSuffixList() # Uses built-in PSL file -print(psl.publicsuffix("www.example.com")) # "com" -# the longest public suffix part +print(psl.publicsuffix("www.example.com")) # "com" +# The longest public suffix part -print(psl.privatesuffix("www.example.com")) # "example.com" -# the shortest domain assigned for a registrant +print(psl.privatesuffix("www.example.com")) # "example.com" +# The shortest domain assigned for a registrant print(psl.privatesuffix("com")) # None # Returns None if no private (non-public) part found @@ -45,7 +45,7 @@ print(psl.privatesuffix("com")) # None print(psl.publicsuffix("www.example.unknownnewtld")) # "unknownnewtld" # New TLDs are valid public suffix by default -print(psl.publicsuffix("www.example.香港")) #"香港" +print(psl.publicsuffix("www.example.香港")) # "香港" # Accepts unicode print(psl.publicsuffix("www.example.xn--j6w193g")) # "xn--j6w193g" @@ -54,7 +54,7 @@ print(psl.publicsuffix("www.example.xn--j6w193g")) # "xn--j6w193g" print(psl.privatesuffix("WWW.EXAMPLE.COM")) # "example.com" # Returns in lowercase by default -print(psl.privatesuffix("WWW.EXAMPLE.COM", keep_case=True) # "EXAMPLE.COM" +print(psl.privatesuffix("WWW.EXAMPLE.COM", keep_case=True)) # "EXAMPLE.COM" # kwarg `keep_case=True` to disable the case conversion ``` @@ -75,8 +75,8 @@ $ python -m publicsuffixlist.update Additional convenient methods: ```python -print(psl.is_private("example.com")) # True -print(psl.is_public("example.com")) # False +print(psl.is_private("example.com")) # True +print(psl.is_public("example.com")) # False print(psl.privateparts("aaa.www.example.com")) # ("aaa", "www", "example.com") print(psl.subdomain("aaa.www.example.com", depth=1)) # "www.example.com" ``` @@ -86,7 +86,7 @@ Limitation #### Domain Label Validation -`publicsuffixlist` do NOT provide domain name and label validation. +`publicsuffixlist` does NOT provide domain name and label validation. In the DNS protocol, most 8-bit characters are acceptable as labels of domain names. While ICANN-compliant registries do not accept domain names containing underscores (_), hostnames may include them. For example, DMARC records can @@ -97,7 +97,7 @@ based on their specific context. Partially encoded (Unicode-mixed) Punycode is not supported due to very slow Punycode encoding/decoding and unpredictable encoding results. If you are unsure whether an input is valid Punycode, you should use: -`unknowndomain.encode("idna").decode("ascii")`. This method, converting to idna +`unknowndomain.encode("idna").decode("ascii")`. This method, converting to IDNA is idempotent. #### Handling Arbitrary Binary @@ -106,13 +106,13 @@ tuple of bytes. Note that the returned bytes may include byte patterns that cannot be decoded or represented as a standard domain name. Example: ```python -psl.privatesuffix((b"a.a", b"a.example\xff", b"com")) # (b"a.example\xff", b"com") +psl.privatesuffix((b"a.a", b"a.example\xff", b"com")) # (b"a.example\xff", b"com") # Note that IDNs must be punycoded when passed as tuple of bytes. psl = PublicSuffixList("例.example") -psl.publicsuffix((b"xn--fsq", b"example")) # (b"xn--fsq", b"example") +psl.publicsuffix((b"xn--fsq", b"example")) # (b"xn--fsq", b"example") # UTF-8 encoded bytes of "例" do not match. -psl.publicsuffix((b"\xe4\xbe\x8b", b"example")) # (b"example",) +psl.publicsuffix((b"\xe4\xbe\x8b", b"example")) # (b"example",) ``` License diff --git a/copilot-dashboard/README.md b/copilot-dashboard/README.md new file mode 100644 index 0000000..7bce5fa --- /dev/null +++ b/copilot-dashboard/README.md @@ -0,0 +1,84 @@ +# Copilot Metrics Dashboard + +A self-hosted dashboard that pulls GitHub Copilot usage data daily and +displays it as interactive charts — no third-party service required. + +--- + +## How it works + +1. A GitHub Actions workflow (`copilot-metrics.yml`) runs every day at 06:00 UTC. +2. It calls the [GitHub Copilot Usage API](https://docs.github.com/en/rest/copilot/copilot-usage) and saves the results to `data/metrics.json`. +3. The dashboard (`index.html`) reads that JSON file and renders charts automatically. + +--- + +## ✅ One-time setup (the only manual step) + +You need to create a secret called **`COPILOT_METRICS_TOKEN`** in this repository. + +### Step 1 — Create a Personal Access Token + +1. Go to **GitHub.com → your profile → Settings → Developer settings → Personal access tokens → Fine-grained tokens** +2. Click **"Generate new token"** +3. Set: + - **Token name**: `copilot-metrics-dashboard` (or anything you like) + - **Resource owner**: `DewDropstempest` (the organization) + - **Repository access**: `Only select repositories` → pick this repo + - **Permissions → Organization permissions → GitHub Copilot Business → Access: Read-only** +4. Click **"Generate token"** and **copy the token value** (you only see it once) + +> If Fine-grained tokens don't show a Copilot permission yet, create a **Classic token** with the `manage_billing:copilot` scope instead. + +### Step 2 — Add the secret to this repository + +1. Go to **this repository → Settings → Secrets and variables → Actions** +2. Click **"New repository secret"** +3. Name: `COPILOT_METRICS_TOKEN` +4. Value: paste the token you just copied +5. Click **"Add secret"** + +That's it — you're done! + +--- + +## Viewing the dashboard + +### Option A — GitHub Pages (recommended, zero extra cost) + +1. Go to **this repository → Settings → Pages** +2. Under **"Source"** choose **"Deploy from a branch"** +3. Branch: `main` (or whichever branch this code is on), Folder: `/copilot-dashboard` +4. Click **Save** + +Your dashboard will be live at: +``` +https://DewDropstempest.github.io/psl/ +``` + +### Option B — Open locally + +Just open `copilot-dashboard/index.html` in any modern browser (after the first +workflow run has populated `data/metrics.json`). + +--- + +## Triggering the first run + +Don't want to wait until 06:00 UTC? Run it now: + +1. Go to **this repository → Actions → Copilot Metrics Collector** +2. Click **"Run workflow" → Run workflow** + +The `data/metrics.json` file will be committed automatically and the dashboard will show data within a minute or two. + +--- + +## Troubleshooting + +| Symptom | Likely cause | Fix | +|---------|-------------|-----| +| Workflow fails with `HTTP 403` | Token missing or wrong scope | Re-check Step 1 — ensure Copilot read permission is set | +| Workflow succeeds but dashboard shows "No data yet" | API returned an empty array | Make sure Copilot is enabled for the org and at least one seat is active | +| Workflow fails with `HTTP 404` | Org doesn't have Copilot Business/Enterprise | Purchase a Copilot plan for the org | +| Dashboard shows stale data | Workflow hasn't run yet today | Trigger manually (see above) | diff --git a/copilot-dashboard/data/metrics.json b/copilot-dashboard/data/metrics.json new file mode 100644 index 0000000..fe51488 --- /dev/null +++ b/copilot-dashboard/data/metrics.json @@ -0,0 +1 @@ +[] diff --git a/copilot-dashboard/index.html b/copilot-dashboard/index.html new file mode 100644 index 0000000..9f9844a --- /dev/null +++ b/copilot-dashboard/index.html @@ -0,0 +1,296 @@ + + + + + + Copilot Metrics Dashboard + + + + + +
+ + + + + +
+

GitHub Copilot Metrics

+

DewDropstempest organization · Daily usage data

+
+
+
Last updated
+
+
+
+ +
+

No data yet

+

+ The dashboard will populate automatically after the first scheduled run of the
+ Copilot Metrics Collector GitHub Actions workflow.

+ You can also trigger it manually: Actions → Copilot Metrics Collector → Run workflow. +

+
+ + + + + + diff --git a/publicsuffixlist/test.py b/publicsuffixlist/test.py index 759efb1..13f337d 100644 --- a/publicsuffixlist/test.py +++ b/publicsuffixlist/test.py @@ -4,14 +4,14 @@ # # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# file, you can obtain one at http://mozilla.org/MPL/2.0/. # import os import re import unittest -from publicsuffixlist import PublicSuffixList, b, encode_idn, u +from publicsuffixlist import PublicSuffixList, b, decode_idn, encode_idn, u def bytestuple(x): return tuple(bytes(x).split(b'.')) @@ -66,8 +66,8 @@ def test_keepcase(self): def test_notpermitted_domain(self): # From the PSL definition, empty labels are not permitted. - # From the test_psl.txt, leading dot is not permitted. - # However, it seems most implementations ignore trailing dot. + # From the test_psl.txt, a leading dot is not permitted. + # However, it seems most implementations ignore the trailing dot. self.assertEqual(self.psl.suffix(".example.com"), None) self.assertEqual(self.psl.publicsuffix(".example.com"), None) @@ -431,6 +431,294 @@ def test_icann(self): self.assertEqual(psl.publicsuffix("www.example.com"), 'com') self.assertEqual(psl.publicsuffix("example.priv.at"), 'at') + def test_icann_section_detection_custom_source(self): + # Only rules inside the ICANN section should be honoured when + # only_icann=True. The "private" rule must be ignored. + source = ( + "// ===BEGIN ICANN DOMAINS===\n" + "icann\n" + "// ===END ICANN DOMAINS===\n" + "private\n" + ) + psl = PublicSuffixList(source, only_icann=True) + # "icann" rule was in the ICANN section → treated as explicit public suffix + self.assertIsNone(psl.privatesuffix("icann")) + self.assertEqual(psl.publicsuffix("icann"), "icann") + self.assertEqual(psl.publicsuffix("example.icann"), "icann") + self.assertEqual(psl.privatesuffix("example.icann"), "example.icann") + # "private" rule was outside the ICANN section → falls through to the + # unknown-TLD path (accept_unknown=True default), so it still acts as + # a public suffix but only by the unknown-TLD rule, not an explicit one. + self.assertEqual(psl.publicsuffix("example.private"), "private") + self.assertEqual(psl.privatesuffix("example.private"), "example.private") + + def test_icann_no_markers(self): + # When the source has no ICANN section markers, only_icann=True means + # section_is_icann stays None (falsy) for every line → nothing is + # added to the suffix set, so all lookups fall back to accept_unknown. + source = "com\nnet\n" + psl = PublicSuffixList(source, only_icann=True) + # With accept_unknown=True (default) an unknown single-label TLD is + # still treated as public, so "example.com" gets a private suffix via + # the unknown-TLD path. + self.assertEqual(psl.privatesuffix("example.com"), "example.com") + # An explicit ICANN entry was never loaded, so "com" itself is treated + # as an unknown TLD (public) rather than as an explicitly listed suffix. + self.assertIsNone(psl.privatesuffix("com")) + + def test_icann_private_domain_excluded(self): + # github.io is a private-section entry in the real PSL. + # With only_icann=True it should not be honoured as a public suffix, + # so "pages.github.io" should have a private suffix (via unknown-TLD + # fallback) rather than returning None. + psl = PublicSuffixList(only_icann=True) + result = psl.privatesuffix("pages.github.io") + self.assertIsNotNone(result) + + +class TestHelpers(unittest.TestCase): + + def test_u_function(self): + # bytes → str + self.assertEqual(u(b"hello"), "hello") + self.assertIsInstance(u(b"hello"), str) + # str passthrough + self.assertEqual(u("hello"), "hello") + self.assertIsInstance(u("hello"), str) + + def test_b_function(self): + # str → bytes + self.assertEqual(b("hello"), b"hello") + self.assertIsInstance(b("hello"), bytes) + # bytes passthrough + self.assertEqual(b(b"hello"), b"hello") + self.assertIsInstance(b(b"hello"), bytes) + # bytearray → bytes + self.assertEqual(b(bytearray(b"hello")), b"hello") + self.assertIsInstance(b(bytearray(b"hello")), bytes) + + def test_encode_idn(self): + result = encode_idn(u("例.jp")) + # Must be pure ASCII (punycode) + result.encode("ascii") + self.assertIn("jp", result) + self.assertNotIn("例", result) + + def test_decode_idn(self): + original = u("例.jp") + encoded = encode_idn(original) + self.assertEqual(decode_idn(encoded), original) + + def test_decode_idn_invalid(self): + # Invalid punycode must raise UnicodeError rather than silently produce + # a meaningless result. + self.assertRaises(UnicodeError, lambda: decode_idn("xn--invalid-punycode-zzzzzz.jp")) + + +class TestConstructorOptions(unittest.TestCase): + + def test_accept_encoded_idn_false(self): + # With accept_encoded_idn=False the punycode variant of an IDN rule is + # NOT added to the suffix set, so a punycoded domain that matches the + # IDN rule should fall back to the unknown-TLD path instead of being + # treated as a known public suffix in the usual way. + source = u("例.jp\n") + psl_with = PublicSuffixList(source, accept_encoded_idn=True) + psl_without = PublicSuffixList(source, accept_encoded_idn=False) + + puny_tld = encode_idn(u("例.jp")) # e.g. "xn--fsq.jp" + domain = "test." + puny_tld # e.g. "test.xn--fsq.jp" + + # With encoding enabled the punycoded rule is loaded → private suffix + # has exactly one private label. + self.assertEqual(psl_with.privatesuffix(domain), domain) + + # Without encoding the punycoded rule is absent → only the base "jp" + # rule (via unknown TLD fallback or explicit jp) applies, so the + # private suffix includes more of the domain. + result_without = psl_without.privatesuffix(domain) + self.assertNotEqual(result_without, domain) + + +class TestIsPrivatePublicEdgeCases(unittest.TestCase): + + def setUp(self): + self.psl = PublicSuffixList() + + def test_is_private_invalid_domain(self): + self.assertFalse(self.psl.is_private(".bad")) + self.assertFalse(self.psl.is_private("")) + self.assertFalse(self.psl.is_private("www..invalid")) + + def test_is_public_invalid_domain(self): + self.assertFalse(self.psl.is_public(".bad")) + self.assertFalse(self.psl.is_public("")) + self.assertFalse(self.psl.is_public("www..invalid")) + + def test_is_private_unknown_tld(self): + # Two-label domain under unknown TLD → private (registrable) + self.assertTrue(self.psl.is_private("example.unknowntld")) + # Three-label domain under unknown TLD → still private + self.assertTrue(self.psl.is_private("sub.example.unknowntld")) + + def test_is_public_unknown_tld(self): + # Single unknown TLD → public + self.assertTrue(self.psl.is_public("unknowntld")) + # Two-label domain under unknown TLD → not public + self.assertFalse(self.psl.is_public("example.unknowntld")) + + def test_is_private_is_public_trailing_dot(self): + # Trailing dot is ignored; domain reduces to valid form + self.assertTrue(self.psl.is_private("example.com.")) + self.assertFalse(self.psl.is_public("example.com.")) + + def test_is_public_known_tld(self): + self.assertTrue(self.psl.is_public("com")) + self.assertTrue(self.psl.is_public("co.jp")) + self.assertFalse(self.psl.is_public("example.com")) + + +class TestPrivatepartsBytestuple(unittest.TestCase): + + def setUp(self): + self.psl = PublicSuffixList() + + def test_privateparts_bytestuple_basic(self): + data = (b"www", b"example", b"com") + result = self.psl.privateparts(data) + # subdomain labels + private suffix tuple + self.assertEqual(result, (b"www", (b"example", b"com"))) + + def test_privateparts_bytestuple_no_subdomain(self): + data = (b"example", b"com") + result = self.psl.privateparts(data) + self.assertEqual(result, ((b"example", b"com"),)) + + def test_privateparts_bytestuple_keepcase(self): + data = (b"Www", b"Example", b"Com") + result = self.psl.privateparts(data, keep_case=True) + self.assertEqual(result, (b"Www", (b"Example", b"Com"))) + + def test_privateparts_bytestuple_none(self): + # public suffix only → no private part + data = (b"com",) + self.assertIsNone(self.psl.privateparts(data)) + + +class TestSubdomainBytestuple(unittest.TestCase): + + def setUp(self): + self.psl = PublicSuffixList() + + def test_subdomain_bytestuple_depth0(self): + data = (b"aaa", b"www", b"example", b"com") + result = self.psl.subdomain(data, depth=0) + self.assertEqual(result, (b"example", b"com")) + + def test_subdomain_bytestuple_depth1(self): + data = (b"aaa", b"www", b"example", b"com") + result = self.psl.subdomain(data, depth=1) + self.assertEqual(result, (b"www", b"example", b"com")) + + def test_subdomain_bytestuple_overflow(self): + data = (b"example", b"com") + # depth=1 requires at least 3 labels (publen=1 + 1 private + 1 sub) + self.assertIsNone(self.psl.subdomain(data, depth=1)) + + def test_subdomain_bytestuple_public_only(self): + data = (b"com",) + self.assertIsNone(self.psl.subdomain(data, depth=0)) + + +class TestBytearrayTypeError(unittest.TestCase): + + def setUp(self): + self.psl = PublicSuffixList() + + def test_bytearray_raises_typeerror(self): + self.assertRaises(TypeError, lambda: self.psl.suffix(bytearray(b"example.com"))) + + def test_bytearray_publicsuffix_raises_typeerror(self): + self.assertRaises(TypeError, lambda: self.psl.publicsuffix(bytearray(b"example.com"))) + + def test_bytearray_privatesuffix_raises_typeerror(self): + self.assertRaises(TypeError, lambda: self.psl.privatesuffix(bytearray(b"example.com"))) + + +class TestWildcardAcceptUnknown(unittest.TestCase): + + def setUp(self): + source = "*.bd\n" + # accept_unknown=True is the default; test it explicitly as well + self.psl = PublicSuffixList(source.splitlines(), accept_unknown=True) + + def test_bare_tld_is_public(self): + self.assertEqual(self.psl.publicsuffix("bd"), "bd") + self.assertIsNone(self.psl.privatesuffix("bd")) + + def test_one_label_under_wildcard_is_public(self): + # "example.bd" matches *.bd → example.bd is public suffix + self.assertEqual(self.psl.publicsuffix("example.bd"), "example.bd") + self.assertIsNone(self.psl.privatesuffix("example.bd")) + + def test_two_labels_under_wildcard_has_private(self): + self.assertEqual(self.psl.publicsuffix("sub.example.bd"), "example.bd") + self.assertEqual(self.psl.privatesuffix("sub.example.bd"), "sub.example.bd") + + +class TestCompatEdgeCases(unittest.TestCase): + + def setUp(self): + from publicsuffixlist.compat import PublicSuffixList, UnsafePublicSuffixList + self.psl = PublicSuffixList() + self.upsl = UnsafePublicSuffixList() + + def test_compat_unknown_tld(self): + self.assertEqual(self.psl.get_public_suffix("example.unknowntld"), "example.unknowntld") + + def test_compat_invalid_domain(self): + self.assertEqual(self.psl.get_public_suffix(".bad"), "") + + def test_compat_empty_string(self): + self.assertEqual(self.psl.get_public_suffix(""), "") + + def test_compat_very_long_domain(self): + d = "a." * 1000 + "example.com" + self.assertEqual(self.psl.get_public_suffix(d), "example.com") + + def test_unsafe_compat_fallback_public_suffix(self): + # When privatesuffix is None (e.g. bare TLD), UnsafePublicSuffixList + # falls back to returning the publicsuffix instead. + self.assertEqual(self.upsl.get_public_suffix("com"), "com") + + def test_unsafe_compat_private_domain(self): + self.assertEqual(self.upsl.get_public_suffix("test.example.com"), "example.com") + + def test_unsafe_compat_invalid_domain(self): + self.assertEqual(self.upsl.get_public_suffix(".bad"), "") + + def test_unsafe_compat_empty_string(self): + self.assertEqual(self.upsl.get_public_suffix(""), "") + + +class TestLargePSLSource(unittest.TestCase): + + def test_many_rules_parsing(self): + # Build a PSL with thousands of rules and verify lookups stay correct. + lines = ["// ===BEGIN ICANN DOMAINS==="] + lines += ["rule{0}.example".format(i) for i in range(500)] + lines += ["// ===END ICANN DOMAINS==="] + lines += ["com"] + source = "\n".join(lines) + psl = PublicSuffixList(source) + # An explicitly listed suffix should be recognised + self.assertIsNone(psl.privatesuffix("rule42.example")) + self.assertEqual(psl.publicsuffix("rule42.example"), "rule42.example") + # A sub-domain of that rule should be private + self.assertEqual(psl.privatesuffix("sub.rule42.example"), "sub.rule42.example") + # Standard TLD still works + self.assertEqual(psl.privatesuffix("example.com"), "example.com") + if __name__ == "__main__": unittest.main()