From 87a9a8e8ec2fbf4ff81b22d218ef2bbbd7494f5f Mon Sep 17 00:00:00 2001 From: badappearance <201956525+badappearance@users.noreply.github.com> Date: Thu, 12 Jun 2025 23:55:38 +0200 Subject: [PATCH 1/6] Update README text and code --- README.md | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index e0eb817..48ece58 100644 --- a/README.md +++ b/README.md @@ -4,9 +4,9 @@ publicsuffixlist [Public Suffix List](https://publicsuffix.org/) parser implementation for Python 3.5+. -- Compliant with [TEST DATA](https://raw.githubusercontent.com/publicsuffix/list/master/tests/test_psl.txt) +- Compliant with [TEST DATA](https://raw.githubusercontent.com/publicsuffix/list/master/tests/test_psl.txt). - Supports IDN (unicode and punycoded). -- Supports Python3.5+ +- Supports Python3.5+. - Shipped with built-in PSL and an updater script. - Written in Pure Python with no library dependencies. @@ -33,11 +33,11 @@ from publicsuffixlist import PublicSuffixList psl = PublicSuffixList() # Uses built-in PSL file -print(psl.publicsuffix("www.example.com")) # "com" -# the longest public suffix part +print(psl.publicsuffix("www.example.com")) # "com" +# The longest public suffix part -print(psl.privatesuffix("www.example.com")) # "example.com" -# the shortest domain assigned for a registrant +print(psl.privatesuffix("www.example.com")) # "example.com" +# The shortest domain assigned for a registrant print(psl.privatesuffix("com")) # None # Returns None if no private (non-public) part found @@ -45,7 +45,7 @@ print(psl.privatesuffix("com")) # None print(psl.publicsuffix("www.example.unknownnewtld")) # "unknownnewtld" # New TLDs are valid public suffix by default -print(psl.publicsuffix("www.example.香港")) #"香港" +print(psl.publicsuffix("www.example.香港")) # "香港" # Accepts unicode print(psl.publicsuffix("www.example.xn--j6w193g")) # "xn--j6w193g" @@ -54,7 +54,7 @@ print(psl.publicsuffix("www.example.xn--j6w193g")) # "xn--j6w193g" print(psl.privatesuffix("WWW.EXAMPLE.COM")) # "example.com" # Returns in lowercase by default -print(psl.privatesuffix("WWW.EXAMPLE.COM", keep_case=True) # "EXAMPLE.COM" +print(psl.privatesuffix("WWW.EXAMPLE.COM", keep_case=True)) # "EXAMPLE.COM" # kwarg `keep_case=True` to disable the case conversion ``` @@ -75,8 +75,8 @@ $ python -m publicsuffixlist.update Additional convenient methods: ```python -print(psl.is_private("example.com")) # True -print(psl.is_public("example.com")) # False +print(psl.is_private("example.com")) # True +print(psl.is_public("example.com")) # False print(psl.privateparts("aaa.www.example.com")) # ("aaa", "www", "example.com") print(psl.subdomain("aaa.www.example.com", depth=1)) # "www.example.com" ``` @@ -86,7 +86,7 @@ Limitation #### Domain Label Validation -`publicsuffixlist` do NOT provide domain name and label validation. +`publicsuffixlist` does NOT provide domain name and label validation. In the DNS protocol, most 8-bit characters are acceptable as labels of domain names. While ICANN-compliant registries do not accept domain names containing underscores (_), hostnames may include them. For example, DMARC records can @@ -97,7 +97,7 @@ based on their specific context. Partially encoded (Unicode-mixed) Punycode is not supported due to very slow Punycode encoding/decoding and unpredictable encoding results. If you are unsure whether an input is valid Punycode, you should use: -`unknowndomain.encode("idna").decode("ascii")`. This method, converting to idna +`unknowndomain.encode("idna").decode("ascii")`. This method, converting to IDNA is idempotent. #### Handling Arbitrary Binary @@ -106,13 +106,13 @@ tuple of bytes. Note that the returned bytes may include byte patterns that cannot be decoded or represented as a standard domain name. Example: ```python -psl.privatesuffix((b"a.a", b"a.example\xff", b"com")) # (b"a.example\xff", b"com") +psl.privatesuffix((b"a.a", b"a.example\xff", b"com")) # (b"a.example\xff", b"com") # Note that IDNs must be punycoded when passed as tuple of bytes. psl = PublicSuffixList("例.example") -psl.publicsuffix((b"xn--fsq", b"example")) # (b"xn--fsq", b"example") +psl.publicsuffix((b"xn--fsq", b"example")) # (b"xn--fsq", b"example") # UTF-8 encoded bytes of "例" do not match. -psl.publicsuffix((b"\xe4\xbe\x8b", b"example")) # (b"example",) +psl.publicsuffix((b"\xe4\xbe\x8b", b"example")) # (b"example",) ``` License From 7d4d0d0db229f996824bd65741ed285ebb466d87 Mon Sep 17 00:00:00 2001 From: ko-zu Date: Fri, 13 Jun 2025 14:20:18 +0000 Subject: [PATCH 2/6] Update CI to use ubuntu-22.04 and Python 3.7 Signed-off-by: ko-zu --- .github/workflows/citest.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/citest.yml b/.github/workflows/citest.yml index 9d57857..5213c21 100644 --- a/.github/workflows/citest.yml +++ b/.github/workflows/citest.yml @@ -10,13 +10,13 @@ on: jobs: test-oldpython: - runs-on: "ubuntu-20.04" + runs-on: "ubuntu-22.04" steps: - uses: actions/checkout@v4 - - name: Set up Python 3.5 + - name: Set up Python 3.7 uses: actions/setup-python@v5 with: - python-version: "3.5" + python-version: "3.7" env: PIP_TRUSTED_HOST: "pypi.python.org pypi.org files.pythonhosted.org" # workaround for certificate incompatibility From 8f5e7dfcb41f925d9176b155fb5976b7ee6bd4c1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 12 Apr 2026 12:24:06 +0000 Subject: [PATCH 3/6] test: add comprehensive test coverage for publicsuffixlist Agent-Logs-Url: https://github.com/DewDropstempest/psl/sessions/2e03a6fc-9a19-48f1-b069-17d77020600f Co-authored-by: DewDropstempest <123912597+DewDropstempest@users.noreply.github.com> --- publicsuffixlist/test.py | 290 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 289 insertions(+), 1 deletion(-) diff --git a/publicsuffixlist/test.py b/publicsuffixlist/test.py index 759efb1..915cf69 100644 --- a/publicsuffixlist/test.py +++ b/publicsuffixlist/test.py @@ -11,7 +11,7 @@ import re import unittest -from publicsuffixlist import PublicSuffixList, b, encode_idn, u +from publicsuffixlist import PublicSuffixList, b, decode_idn, encode_idn, u def bytestuple(x): return tuple(bytes(x).split(b'.')) @@ -431,6 +431,294 @@ def test_icann(self): self.assertEqual(psl.publicsuffix("www.example.com"), 'com') self.assertEqual(psl.publicsuffix("example.priv.at"), 'at') + def test_icann_section_detection_custom_source(self): + # Only rules inside the ICANN section should be honoured when + # only_icann=True. The "private" rule must be ignored. + source = ( + "// ===BEGIN ICANN DOMAINS===\n" + "icann\n" + "// ===END ICANN DOMAINS===\n" + "private\n" + ) + psl = PublicSuffixList(source, only_icann=True) + # "icann" rule was in the ICANN section → treated as explicit public suffix + self.assertIsNone(psl.privatesuffix("icann")) + self.assertEqual(psl.publicsuffix("icann"), "icann") + self.assertEqual(psl.publicsuffix("example.icann"), "icann") + self.assertEqual(psl.privatesuffix("example.icann"), "example.icann") + # "private" rule was outside the ICANN section → falls through to the + # unknown-TLD path (accept_unknown=True default), so it still acts as + # a public suffix but only by the unknown-TLD rule, not an explicit one. + self.assertEqual(psl.publicsuffix("example.private"), "private") + self.assertEqual(psl.privatesuffix("example.private"), "example.private") + + def test_icann_no_markers(self): + # When the source has no ICANN section markers, only_icann=True means + # section_is_icann stays None (falsy) for every line → nothing is + # added to the suffix set, so all lookups fall back to accept_unknown. + source = "com\nnet\n" + psl = PublicSuffixList(source, only_icann=True) + # With accept_unknown=True (default) an unknown single-label TLD is + # still treated as public, so "example.com" gets a private suffix via + # the unknown-TLD path. + self.assertEqual(psl.privatesuffix("example.com"), "example.com") + # An explicit ICANN entry was never loaded, so "com" itself is treated + # as an unknown TLD (public) rather than as an explicitly listed suffix. + self.assertIsNone(psl.privatesuffix("com")) + + def test_icann_private_domain_excluded(self): + # github.io is a private-section entry in the real PSL. + # With only_icann=True it should not be honoured as a public suffix, + # so "pages.github.io" should have a private suffix (via unknown-TLD + # fallback) rather than returning None. + psl = PublicSuffixList(only_icann=True) + result = psl.privatesuffix("pages.github.io") + self.assertIsNotNone(result) + + +class TestHelpers(unittest.TestCase): + + def test_u_function(self): + # bytes → str + self.assertEqual(u(b"hello"), "hello") + self.assertIsInstance(u(b"hello"), str) + # str passthrough + self.assertEqual(u("hello"), "hello") + self.assertIsInstance(u("hello"), str) + + def test_b_function(self): + # str → bytes + self.assertEqual(b("hello"), b"hello") + self.assertIsInstance(b("hello"), bytes) + # bytes passthrough + self.assertEqual(b(b"hello"), b"hello") + self.assertIsInstance(b(b"hello"), bytes) + # bytearray → bytes + self.assertEqual(b(bytearray(b"hello")), b"hello") + self.assertIsInstance(b(bytearray(b"hello")), bytes) + + def test_encode_idn(self): + result = encode_idn(u("例.jp")) + # Must be pure ASCII (punycode) + result.encode("ascii") + self.assertIn("jp", result) + self.assertNotIn("例", result) + + def test_decode_idn(self): + original = u("例.jp") + encoded = encode_idn(original) + self.assertEqual(decode_idn(encoded), original) + + def test_decode_idn_invalid(self): + # Invalid punycode must raise UnicodeError rather than silently produce + # a meaningless result. + self.assertRaises(UnicodeError, lambda: decode_idn("xn--invalid-punycode-zzzzzz.jp")) + + +class TestConstructorOptions(unittest.TestCase): + + def test_accept_encoded_idn_false(self): + # With accept_encoded_idn=False the punycode variant of an IDN rule is + # NOT added to the suffix set, so a punycoded domain that matches the + # IDN rule should fall back to the unknown-TLD path instead of being + # treated as a known public suffix in the usual way. + source = u("例.jp\n") + psl_with = PublicSuffixList(source, accept_encoded_idn=True) + psl_without = PublicSuffixList(source, accept_encoded_idn=False) + + puny_tld = encode_idn(u("例.jp")) # e.g. "xn--fsq.jp" + domain = "test." + puny_tld # e.g. "test.xn--fsq.jp" + + # With encoding enabled the punycoded rule is loaded → private suffix + # has exactly one private label. + self.assertEqual(psl_with.privatesuffix(domain), domain) + + # Without encoding the punycoded rule is absent → only the base "jp" + # rule (via unknown TLD fallback or explicit jp) applies, so the + # private suffix includes more of the domain. + result_without = psl_without.privatesuffix(domain) + self.assertNotEqual(result_without, domain) + + +class TestIsPrivatePublicEdgeCases(unittest.TestCase): + + def setUp(self): + self.psl = PublicSuffixList() + + def test_is_private_invalid_domain(self): + self.assertFalse(self.psl.is_private(".bad")) + self.assertFalse(self.psl.is_private("")) + self.assertFalse(self.psl.is_private("www..invalid")) + + def test_is_public_invalid_domain(self): + self.assertFalse(self.psl.is_public(".bad")) + self.assertFalse(self.psl.is_public("")) + self.assertFalse(self.psl.is_public("www..invalid")) + + def test_is_private_unknown_tld(self): + # Two-label domain under unknown TLD → private (registrable) + self.assertTrue(self.psl.is_private("example.unknowntld")) + # Three-label domain under unknown TLD → still private + self.assertTrue(self.psl.is_private("sub.example.unknowntld")) + + def test_is_public_unknown_tld(self): + # Single unknown TLD → public + self.assertTrue(self.psl.is_public("unknowntld")) + # Two-label domain under unknown TLD → not public + self.assertFalse(self.psl.is_public("example.unknowntld")) + + def test_is_private_is_public_trailing_dot(self): + # Trailing dot is ignored; domain reduces to valid form + self.assertTrue(self.psl.is_private("example.com.")) + self.assertFalse(self.psl.is_public("example.com.")) + + def test_is_public_known_tld(self): + self.assertTrue(self.psl.is_public("com")) + self.assertTrue(self.psl.is_public("co.jp")) + self.assertFalse(self.psl.is_public("example.com")) + + +class TestPrivatepartsBytestuple(unittest.TestCase): + + def setUp(self): + self.psl = PublicSuffixList() + + def test_privateparts_bytestuple_basic(self): + data = (b"www", b"example", b"com") + result = self.psl.privateparts(data) + # subdomain labels + private suffix tuple + self.assertEqual(result, (b"www", (b"example", b"com"))) + + def test_privateparts_bytestuple_no_subdomain(self): + data = (b"example", b"com") + result = self.psl.privateparts(data) + self.assertEqual(result, ((b"example", b"com"),)) + + def test_privateparts_bytestuple_keepcase(self): + data = (b"Www", b"Example", b"Com") + result = self.psl.privateparts(data, keep_case=True) + self.assertEqual(result, (b"Www", (b"Example", b"Com"))) + + def test_privateparts_bytestuple_none(self): + # public suffix only → no private part + data = (b"com",) + self.assertIsNone(self.psl.privateparts(data)) + + +class TestSubdomainBytestuple(unittest.TestCase): + + def setUp(self): + self.psl = PublicSuffixList() + + def test_subdomain_bytestuple_depth0(self): + data = (b"aaa", b"www", b"example", b"com") + result = self.psl.subdomain(data, depth=0) + self.assertEqual(result, (b"example", b"com")) + + def test_subdomain_bytestuple_depth1(self): + data = (b"aaa", b"www", b"example", b"com") + result = self.psl.subdomain(data, depth=1) + self.assertEqual(result, (b"www", b"example", b"com")) + + def test_subdomain_bytestuple_overflow(self): + data = (b"example", b"com") + # depth=1 requires at least 3 labels (publen=1 + 1 private + 1 sub) + self.assertIsNone(self.psl.subdomain(data, depth=1)) + + def test_subdomain_bytestuple_public_only(self): + data = (b"com",) + self.assertIsNone(self.psl.subdomain(data, depth=0)) + + +class TestBytearrayTypeError(unittest.TestCase): + + def setUp(self): + self.psl = PublicSuffixList() + + def test_bytearray_raises_typeerror(self): + self.assertRaises(TypeError, lambda: self.psl.suffix(bytearray(b"example.com"))) + + def test_bytearray_publicsuffix_raises_typeerror(self): + self.assertRaises(TypeError, lambda: self.psl.publicsuffix(bytearray(b"example.com"))) + + def test_bytearray_privatesuffix_raises_typeerror(self): + self.assertRaises(TypeError, lambda: self.psl.privatesuffix(bytearray(b"example.com"))) + + +class TestWildcardAcceptUnknown(unittest.TestCase): + + def setUp(self): + source = "*.bd\n" + # accept_unknown=True is the default; test it explicitly as well + self.psl = PublicSuffixList(source.splitlines(), accept_unknown=True) + + def test_bare_tld_is_public(self): + self.assertEqual(self.psl.publicsuffix("bd"), "bd") + self.assertIsNone(self.psl.privatesuffix("bd")) + + def test_one_label_under_wildcard_is_public(self): + # "example.bd" matches *.bd → example.bd is public suffix + self.assertEqual(self.psl.publicsuffix("example.bd"), "example.bd") + self.assertIsNone(self.psl.privatesuffix("example.bd")) + + def test_two_labels_under_wildcard_has_private(self): + self.assertEqual(self.psl.publicsuffix("sub.example.bd"), "example.bd") + self.assertEqual(self.psl.privatesuffix("sub.example.bd"), "sub.example.bd") + + +class TestCompatEdgeCases(unittest.TestCase): + + def setUp(self): + from publicsuffixlist.compat import PublicSuffixList, UnsafePublicSuffixList + self.psl = PublicSuffixList() + self.upsl = UnsafePublicSuffixList() + + def test_compat_unknown_tld(self): + self.assertEqual(self.psl.get_public_suffix("example.unknowntld"), "example.unknowntld") + + def test_compat_invalid_domain(self): + self.assertEqual(self.psl.get_public_suffix(".bad"), "") + + def test_compat_empty_string(self): + self.assertEqual(self.psl.get_public_suffix(""), "") + + def test_compat_very_long_domain(self): + d = "a." * 1000 + "example.com" + self.assertEqual(self.psl.get_public_suffix(d), "example.com") + + def test_unsafe_compat_fallback_public_suffix(self): + # When privatesuffix is None (e.g. bare TLD), UnsafePublicSuffixList + # falls back to returning the publicsuffix instead. + self.assertEqual(self.upsl.get_public_suffix("com"), "com") + + def test_unsafe_compat_private_domain(self): + self.assertEqual(self.upsl.get_public_suffix("test.example.com"), "example.com") + + def test_unsafe_compat_invalid_domain(self): + self.assertEqual(self.upsl.get_public_suffix(".bad"), "") + + def test_unsafe_compat_empty_string(self): + self.assertEqual(self.upsl.get_public_suffix(""), "") + + +class TestLargePSLSource(unittest.TestCase): + + def test_many_rules_parsing(self): + # Build a PSL with thousands of rules and verify lookups stay correct. + lines = ["// ===BEGIN ICANN DOMAINS==="] + lines += ["rule{0}.example".format(i) for i in range(500)] + lines += ["// ===END ICANN DOMAINS==="] + lines += ["com"] + source = "\n".join(lines) + psl = PublicSuffixList(source) + # An explicitly listed suffix should be recognised + self.assertIsNone(psl.privatesuffix("rule42.example")) + self.assertEqual(psl.publicsuffix("rule42.example"), "rule42.example") + # A sub-domain of that rule should be private + self.assertEqual(psl.privatesuffix("sub.rule42.example"), "sub.rule42.example") + # Standard TLD still works + self.assertEqual(psl.privatesuffix("example.com"), "example.com") + if __name__ == "__main__": unittest.main() From a432b3c9677d0d24a02346172b481c2b1a96ac08 Mon Sep 17 00:00:00 2001 From: DewDropstempest <123912597+DewDropstempest@users.noreply.github.com> Date: Sun, 12 Apr 2026 08:33:58 -0400 Subject: [PATCH 4/6] Update test.py. This file contains crucial test cases that validate the functionality of our application. By adding new scenarios and refactoring existing ones, we can ensure that the code remains robust and reliable. It is important to regularly review and enhance our test suite as the application evolves to catch potential bugs early in the development process. --- publicsuffixlist/test.py | 72 ++++++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 28 deletions(-) diff --git a/publicsuffixlist/test.py b/publicsuffixlist/test.py index 915cf69..6cf7428 100644 --- a/publicsuffixlist/test.py +++ b/publicsuffixlist/test.py @@ -4,7 +4,7 @@ # # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# file, you can obtain one at http://mozilla.org/MPL/2.0/. # import os @@ -66,8 +66,8 @@ def test_keepcase(self): def test_notpermitted_domain(self): # From the PSL definition, empty labels are not permitted. - # From the test_psl.txt, leading dot is not permitted. - # However, it seems most implementations ignore trailing dot. + # From the test_psl.txt, a leading dot is not permitted. + # However, it seems most implementations ignore the trailing dot. self.assertEqual(self.psl.suffix(".example.com"), None) self.assertEqual(self.psl.publicsuffix(".example.com"), None) @@ -215,33 +215,49 @@ def test_bytestuple_punycode(self): """ psl = PublicSuffixList(source) # punycoded ASCII should match - data = bytestuple("aaa.www.例.example".encode("idna")) - pubres = data[-2:] # xn--fsq.example - privres = data[-3:] - self.assertEqual(psl.publicsuffix(data), pubres) - self.assertEqual(psl.privatesuffix(data), privres) + data = bytestuple("aaa.www.例.example".enc```python +# -*- coding: utf-8 -*- +# +# Copyright 2014 ko-zu +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, you can obtain one at http://mozilla.org/MPL/2.0/. +# - def test_bytestuple_utf8(self): - source = """ -example -例.example -""" - psl = PublicSuffixList(source) - # UTF-8 encoded bytes should NOT match - data = bytestuple("aaa.www.例.example".encode("utf8")) - pubres = data[-1:] # example - privres = data[-2:] - self.assertEqual(psl.publicsuffix(data), pubres) - self.assertEqual(psl.privatesuffix(data), privres) +import os +import re +import unittest - def test_bytestuple_otherencoding(self): - source = """ -example -例.example -""" - psl = PublicSuffixList(source.splitlines()) - # Shift_JIS encoded bytes should NOT match - data = bytestuple("aaa.www.例.example".encode("sjis")) +from publicsuffixlist import PublicSuffixList, b, decode_idn, encode_idn, u + +def bytestuple(x): + return tuple(bytes(x).split(b'.')) + +class TestPSL(unittest.TestCase): + + def setUp(self): + self.psl = PublicSuffixList() + + def test_typesafe(self): + self.assertEqual(self.psl.suffix("www.example.co.jp").__class__, "example.co.jp".__class__) + self.assertEqual(self.psl.suffix(u("www.example.co.jp")).__class__, u("example.co.jp").__class__) + + self.assertEqual(self.psl.publicsuffix("www.example.co.jp").__class__, "co.jp".__class__) + self.assertEqual(self.psl.publicsuffix(u("www.example.co.jp")).__class__, u("co.jp").__class__) + + def test_typesafe_bytestuple(self): + self.assertEqual( + self.psl.privatesuffix((b"www",b"example",b"co",b"jp")).__class__, + (b"example", b"co", b"jp").__class__) + self.assertEqual( + self.psl.publicsuffix((b"www",b"example",b"co",b"jp")).__class__, + (b"co", b"jp").__class__) + + def test_uppercase(self): + self.assertEqual(self.psl.suffix("Jp"), None) + self.assertEqual(self.psl.publicsuffix("Jp"), "jp") +```("aaa.www.例.example".encode("sjis")) pubres = data[-1:] # example privres = data[-2:] self.assertEqual(psl.publicsuffix(data), pubres) From b7886c6f82abd0be81d7f4fbb6cccd98a0c59352 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 12 Apr 2026 12:40:42 +0000 Subject: [PATCH 5/6] fix: restore corrupted test.py (markdown block injected mid-file) Agent-Logs-Url: https://github.com/DewDropstempest/psl/sessions/544777bc-a666-4096-ba06-15835e5b4a1c Co-authored-by: DewDropstempest <123912597+DewDropstempest@users.noreply.github.com> --- publicsuffixlist/test.py | 66 +++++++++++++++------------------------- 1 file changed, 25 insertions(+), 41 deletions(-) diff --git a/publicsuffixlist/test.py b/publicsuffixlist/test.py index 6cf7428..13f337d 100644 --- a/publicsuffixlist/test.py +++ b/publicsuffixlist/test.py @@ -215,49 +215,33 @@ def test_bytestuple_punycode(self): """ psl = PublicSuffixList(source) # punycoded ASCII should match - data = bytestuple("aaa.www.例.example".enc```python -# -*- coding: utf-8 -*- -# -# Copyright 2014 ko-zu -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, you can obtain one at http://mozilla.org/MPL/2.0/. -# - -import os -import re -import unittest - -from publicsuffixlist import PublicSuffixList, b, decode_idn, encode_idn, u - -def bytestuple(x): - return tuple(bytes(x).split(b'.')) - -class TestPSL(unittest.TestCase): - - def setUp(self): - self.psl = PublicSuffixList() - - def test_typesafe(self): - self.assertEqual(self.psl.suffix("www.example.co.jp").__class__, "example.co.jp".__class__) - self.assertEqual(self.psl.suffix(u("www.example.co.jp")).__class__, u("example.co.jp").__class__) - - self.assertEqual(self.psl.publicsuffix("www.example.co.jp").__class__, "co.jp".__class__) - self.assertEqual(self.psl.publicsuffix(u("www.example.co.jp")).__class__, u("co.jp").__class__) + data = bytestuple("aaa.www.例.example".encode("idna")) + pubres = data[-2:] # xn--fsq.example + privres = data[-3:] + self.assertEqual(psl.publicsuffix(data), pubres) + self.assertEqual(psl.privatesuffix(data), privres) - def test_typesafe_bytestuple(self): - self.assertEqual( - self.psl.privatesuffix((b"www",b"example",b"co",b"jp")).__class__, - (b"example", b"co", b"jp").__class__) - self.assertEqual( - self.psl.publicsuffix((b"www",b"example",b"co",b"jp")).__class__, - (b"co", b"jp").__class__) + def test_bytestuple_utf8(self): + source = """ +example +例.example +""" + psl = PublicSuffixList(source) + # UTF-8 encoded bytes should NOT match + data = bytestuple("aaa.www.例.example".encode("utf8")) + pubres = data[-1:] # example + privres = data[-2:] + self.assertEqual(psl.publicsuffix(data), pubres) + self.assertEqual(psl.privatesuffix(data), privres) - def test_uppercase(self): - self.assertEqual(self.psl.suffix("Jp"), None) - self.assertEqual(self.psl.publicsuffix("Jp"), "jp") -```("aaa.www.例.example".encode("sjis")) + def test_bytestuple_otherencoding(self): + source = """ +example +例.example +""" + psl = PublicSuffixList(source.splitlines()) + # Shift_JIS encoded bytes should NOT match + data = bytestuple("aaa.www.例.example".encode("sjis")) pubres = data[-1:] # example privres = data[-2:] self.assertEqual(psl.publicsuffix(data), pubres) From 9288a38f7fff23fffb197756be545ab46b2e04b9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 25 Apr 2026 20:20:45 +0000 Subject: [PATCH 6/6] feat: add Copilot metrics dashboard with daily GitHub Actions collector Agent-Logs-Url: https://github.com/DewDropstempest/psl/sessions/589aff3d-5c78-47b9-b37b-ede03411ce14 Co-authored-by: DewDropstempest <123912597+DewDropstempest@users.noreply.github.com> --- .github/workflows/copilot-metrics.yml | 105 +++++++++ copilot-dashboard/README.md | 84 ++++++++ copilot-dashboard/data/metrics.json | 1 + copilot-dashboard/index.html | 296 ++++++++++++++++++++++++++ 4 files changed, 486 insertions(+) create mode 100644 .github/workflows/copilot-metrics.yml create mode 100644 copilot-dashboard/README.md create mode 100644 copilot-dashboard/data/metrics.json create mode 100644 copilot-dashboard/index.html diff --git a/.github/workflows/copilot-metrics.yml b/.github/workflows/copilot-metrics.yml new file mode 100644 index 0000000..1068044 --- /dev/null +++ b/.github/workflows/copilot-metrics.yml @@ -0,0 +1,105 @@ +name: Copilot Metrics Collector + +on: + schedule: + # Runs every day at 06:00 UTC (after GitHub's nightly data refresh) + - cron: "0 6 * * *" + workflow_dispatch: + # Also allows manual triggering from the Actions tab + +permissions: + contents: write # needed to commit the updated metrics file back to the repo + +jobs: + fetch-metrics: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3" + + - name: Fetch Copilot usage metrics + env: + COPILOT_METRICS_TOKEN: ${{ secrets.COPILOT_METRICS_TOKEN }} + ORG: DewDropstempest + run: | + python - <<'PYEOF' + import json + import os + import urllib.request + import urllib.error + from datetime import datetime, timezone + + token = os.environ["COPILOT_METRICS_TOKEN"] + org = os.environ["ORG"] + + url = f"https://api.github.com/orgs/{org}/copilot/usage" + + req = urllib.request.Request( + url, + headers={ + "Authorization": f"Bearer {token}", + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + }, + ) + + try: + with urllib.request.urlopen(req) as resp: + data = json.loads(resp.read().decode()) + except urllib.error.HTTPError as e: + body = e.read().decode() + print(f"HTTP {e.code}: {body}") + raise SystemExit(f"Failed to fetch metrics: {e.code}") + + # ── Normalise field names ────────────────────────────────────────── + # The API returns either the v1 shape (total_suggestions_count, etc.) + # or the v2 shape (totals.suggestions, etc.). We normalise to v1. + normalised = [] + for day in data: + entry = {"day": day.get("day", day.get("date", "unknown"))} + if "total_suggestions_count" in day: + # v1 shape – already in the format we want + entry["total_active_users"] = day.get("total_active_users", 0) + entry["total_suggestions_count"] = day.get("total_suggestions_count", 0) + entry["total_acceptances_count"] = day.get("total_acceptances_count", 0) + entry["total_lines_suggested"] = day.get("total_lines_suggested", 0) + entry["total_lines_accepted"] = day.get("total_lines_accepted", 0) + elif "copilot_ide_code_completions" in day: + # v2 / newer shape + cc = day.get("copilot_ide_code_completions") or {} + entry["total_active_users"] = day.get("total_active_users", 0) + entry["total_suggestions_count"] = cc.get("total_suggestions_count", 0) + entry["total_acceptances_count"] = cc.get("total_acceptances_count", 0) + entry["total_lines_suggested"] = cc.get("total_lines_suggested", 0) + entry["total_lines_accepted"] = cc.get("total_lines_accepted", 0) + else: + # Fallback – store whatever we got so the dashboard can adapt + entry.update({k: v for k, v in day.items() if k != "breakdown"}) + normalised.append(entry) + + out_path = "copilot-dashboard/data/metrics.json" + os.makedirs(os.path.dirname(out_path), exist_ok=True) + + with open(out_path, "w") as f: + json.dump(normalised, f, indent=2) + + print(f"Wrote {len(normalised)} day(s) of metrics to {out_path}") + PYEOF + + - name: Commit updated metrics + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add copilot-dashboard/data/metrics.json + if git diff --cached --quiet; then + echo "No changes to metrics data – skipping commit." + else + git commit -m "chore: update Copilot metrics [skip ci]" + git push + fi diff --git a/copilot-dashboard/README.md b/copilot-dashboard/README.md new file mode 100644 index 0000000..7bce5fa --- /dev/null +++ b/copilot-dashboard/README.md @@ -0,0 +1,84 @@ +# Copilot Metrics Dashboard + +A self-hosted dashboard that pulls GitHub Copilot usage data daily and +displays it as interactive charts — no third-party service required. + +--- + +## How it works + +1. A GitHub Actions workflow (`copilot-metrics.yml`) runs every day at 06:00 UTC. +2. It calls the [GitHub Copilot Usage API](https://docs.github.com/en/rest/copilot/copilot-usage) and saves the results to `data/metrics.json`. +3. The dashboard (`index.html`) reads that JSON file and renders charts automatically. + +--- + +## ✅ One-time setup (the only manual step) + +You need to create a secret called **`COPILOT_METRICS_TOKEN`** in this repository. + +### Step 1 — Create a Personal Access Token + +1. Go to **GitHub.com → your profile → Settings → Developer settings → Personal access tokens → Fine-grained tokens** +2. Click **"Generate new token"** +3. Set: + - **Token name**: `copilot-metrics-dashboard` (or anything you like) + - **Resource owner**: `DewDropstempest` (the organization) + - **Repository access**: `Only select repositories` → pick this repo + - **Permissions → Organization permissions → GitHub Copilot Business → Access: Read-only** +4. Click **"Generate token"** and **copy the token value** (you only see it once) + +> If Fine-grained tokens don't show a Copilot permission yet, create a **Classic token** with the `manage_billing:copilot` scope instead. + +### Step 2 — Add the secret to this repository + +1. Go to **this repository → Settings → Secrets and variables → Actions** +2. Click **"New repository secret"** +3. Name: `COPILOT_METRICS_TOKEN` +4. Value: paste the token you just copied +5. Click **"Add secret"** + +That's it — you're done! + +--- + +## Viewing the dashboard + +### Option A — GitHub Pages (recommended, zero extra cost) + +1. Go to **this repository → Settings → Pages** +2. Under **"Source"** choose **"Deploy from a branch"** +3. Branch: `main` (or whichever branch this code is on), Folder: `/copilot-dashboard` +4. Click **Save** + +Your dashboard will be live at: +``` +https://DewDropstempest.github.io/psl/ +``` + +### Option B — Open locally + +Just open `copilot-dashboard/index.html` in any modern browser (after the first +workflow run has populated `data/metrics.json`). + +--- + +## Triggering the first run + +Don't want to wait until 06:00 UTC? Run it now: + +1. Go to **this repository → Actions → Copilot Metrics Collector** +2. Click **"Run workflow" → Run workflow** + +The `data/metrics.json` file will be committed automatically and the dashboard will show data within a minute or two. + +--- + +## Troubleshooting + +| Symptom | Likely cause | Fix | +|---------|-------------|-----| +| Workflow fails with `HTTP 403` | Token missing or wrong scope | Re-check Step 1 — ensure Copilot read permission is set | +| Workflow succeeds but dashboard shows "No data yet" | API returned an empty array | Make sure Copilot is enabled for the org and at least one seat is active | +| Workflow fails with `HTTP 404` | Org doesn't have Copilot Business/Enterprise | Purchase a Copilot plan for the org | +| Dashboard shows stale data | Workflow hasn't run yet today | Trigger manually (see above) | diff --git a/copilot-dashboard/data/metrics.json b/copilot-dashboard/data/metrics.json new file mode 100644 index 0000000..fe51488 --- /dev/null +++ b/copilot-dashboard/data/metrics.json @@ -0,0 +1 @@ +[] diff --git a/copilot-dashboard/index.html b/copilot-dashboard/index.html new file mode 100644 index 0000000..9f9844a --- /dev/null +++ b/copilot-dashboard/index.html @@ -0,0 +1,296 @@ + + + + + + Copilot Metrics Dashboard + + + + + +
+ + + + + +
+

GitHub Copilot Metrics

+

DewDropstempest organization · Daily usage data

+
+
+
Last updated
+
+
+
+ +
+

No data yet

+

+ The dashboard will populate automatically after the first scheduled run of the
+ Copilot Metrics Collector GitHub Actions workflow.

+ You can also trigger it manually: Actions → Copilot Metrics Collector → Run workflow. +

+
+ + + + + +