From 8f5e7dfcb41f925d9176b155fb5976b7ee6bd4c1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 12 Apr 2026 12:24:06 +0000 Subject: [PATCH 1/3] test: add comprehensive test coverage for publicsuffixlist Agent-Logs-Url: https://github.com/DewDropstempest/psl/sessions/2e03a6fc-9a19-48f1-b069-17d77020600f Co-authored-by: DewDropstempest <123912597+DewDropstempest@users.noreply.github.com> --- publicsuffixlist/test.py | 290 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 289 insertions(+), 1 deletion(-) diff --git a/publicsuffixlist/test.py b/publicsuffixlist/test.py index 759efb1..915cf69 100644 --- a/publicsuffixlist/test.py +++ b/publicsuffixlist/test.py @@ -11,7 +11,7 @@ import re import unittest -from publicsuffixlist import PublicSuffixList, b, encode_idn, u +from publicsuffixlist import PublicSuffixList, b, decode_idn, encode_idn, u def bytestuple(x): return tuple(bytes(x).split(b'.')) @@ -431,6 +431,294 @@ def test_icann(self): self.assertEqual(psl.publicsuffix("www.example.com"), 'com') self.assertEqual(psl.publicsuffix("example.priv.at"), 'at') + def test_icann_section_detection_custom_source(self): + # Only rules inside the ICANN section should be honoured when + # only_icann=True. The "private" rule must be ignored. + source = ( + "// ===BEGIN ICANN DOMAINS===\n" + "icann\n" + "// ===END ICANN DOMAINS===\n" + "private\n" + ) + psl = PublicSuffixList(source, only_icann=True) + # "icann" rule was in the ICANN section → treated as explicit public suffix + self.assertIsNone(psl.privatesuffix("icann")) + self.assertEqual(psl.publicsuffix("icann"), "icann") + self.assertEqual(psl.publicsuffix("example.icann"), "icann") + self.assertEqual(psl.privatesuffix("example.icann"), "example.icann") + # "private" rule was outside the ICANN section → falls through to the + # unknown-TLD path (accept_unknown=True default), so it still acts as + # a public suffix but only by the unknown-TLD rule, not an explicit one. + self.assertEqual(psl.publicsuffix("example.private"), "private") + self.assertEqual(psl.privatesuffix("example.private"), "example.private") + + def test_icann_no_markers(self): + # When the source has no ICANN section markers, only_icann=True means + # section_is_icann stays None (falsy) for every line → nothing is + # added to the suffix set, so all lookups fall back to accept_unknown. + source = "com\nnet\n" + psl = PublicSuffixList(source, only_icann=True) + # With accept_unknown=True (default) an unknown single-label TLD is + # still treated as public, so "example.com" gets a private suffix via + # the unknown-TLD path. + self.assertEqual(psl.privatesuffix("example.com"), "example.com") + # An explicit ICANN entry was never loaded, so "com" itself is treated + # as an unknown TLD (public) rather than as an explicitly listed suffix. + self.assertIsNone(psl.privatesuffix("com")) + + def test_icann_private_domain_excluded(self): + # github.io is a private-section entry in the real PSL. + # With only_icann=True it should not be honoured as a public suffix, + # so "pages.github.io" should have a private suffix (via unknown-TLD + # fallback) rather than returning None. + psl = PublicSuffixList(only_icann=True) + result = psl.privatesuffix("pages.github.io") + self.assertIsNotNone(result) + + +class TestHelpers(unittest.TestCase): + + def test_u_function(self): + # bytes → str + self.assertEqual(u(b"hello"), "hello") + self.assertIsInstance(u(b"hello"), str) + # str passthrough + self.assertEqual(u("hello"), "hello") + self.assertIsInstance(u("hello"), str) + + def test_b_function(self): + # str → bytes + self.assertEqual(b("hello"), b"hello") + self.assertIsInstance(b("hello"), bytes) + # bytes passthrough + self.assertEqual(b(b"hello"), b"hello") + self.assertIsInstance(b(b"hello"), bytes) + # bytearray → bytes + self.assertEqual(b(bytearray(b"hello")), b"hello") + self.assertIsInstance(b(bytearray(b"hello")), bytes) + + def test_encode_idn(self): + result = encode_idn(u("例.jp")) + # Must be pure ASCII (punycode) + result.encode("ascii") + self.assertIn("jp", result) + self.assertNotIn("例", result) + + def test_decode_idn(self): + original = u("例.jp") + encoded = encode_idn(original) + self.assertEqual(decode_idn(encoded), original) + + def test_decode_idn_invalid(self): + # Invalid punycode must raise UnicodeError rather than silently produce + # a meaningless result. + self.assertRaises(UnicodeError, lambda: decode_idn("xn--invalid-punycode-zzzzzz.jp")) + + +class TestConstructorOptions(unittest.TestCase): + + def test_accept_encoded_idn_false(self): + # With accept_encoded_idn=False the punycode variant of an IDN rule is + # NOT added to the suffix set, so a punycoded domain that matches the + # IDN rule should fall back to the unknown-TLD path instead of being + # treated as a known public suffix in the usual way. + source = u("例.jp\n") + psl_with = PublicSuffixList(source, accept_encoded_idn=True) + psl_without = PublicSuffixList(source, accept_encoded_idn=False) + + puny_tld = encode_idn(u("例.jp")) # e.g. "xn--fsq.jp" + domain = "test." + puny_tld # e.g. "test.xn--fsq.jp" + + # With encoding enabled the punycoded rule is loaded → private suffix + # has exactly one private label. + self.assertEqual(psl_with.privatesuffix(domain), domain) + + # Without encoding the punycoded rule is absent → only the base "jp" + # rule (via unknown TLD fallback or explicit jp) applies, so the + # private suffix includes more of the domain. + result_without = psl_without.privatesuffix(domain) + self.assertNotEqual(result_without, domain) + + +class TestIsPrivatePublicEdgeCases(unittest.TestCase): + + def setUp(self): + self.psl = PublicSuffixList() + + def test_is_private_invalid_domain(self): + self.assertFalse(self.psl.is_private(".bad")) + self.assertFalse(self.psl.is_private("")) + self.assertFalse(self.psl.is_private("www..invalid")) + + def test_is_public_invalid_domain(self): + self.assertFalse(self.psl.is_public(".bad")) + self.assertFalse(self.psl.is_public("")) + self.assertFalse(self.psl.is_public("www..invalid")) + + def test_is_private_unknown_tld(self): + # Two-label domain under unknown TLD → private (registrable) + self.assertTrue(self.psl.is_private("example.unknowntld")) + # Three-label domain under unknown TLD → still private + self.assertTrue(self.psl.is_private("sub.example.unknowntld")) + + def test_is_public_unknown_tld(self): + # Single unknown TLD → public + self.assertTrue(self.psl.is_public("unknowntld")) + # Two-label domain under unknown TLD → not public + self.assertFalse(self.psl.is_public("example.unknowntld")) + + def test_is_private_is_public_trailing_dot(self): + # Trailing dot is ignored; domain reduces to valid form + self.assertTrue(self.psl.is_private("example.com.")) + self.assertFalse(self.psl.is_public("example.com.")) + + def test_is_public_known_tld(self): + self.assertTrue(self.psl.is_public("com")) + self.assertTrue(self.psl.is_public("co.jp")) + self.assertFalse(self.psl.is_public("example.com")) + + +class TestPrivatepartsBytestuple(unittest.TestCase): + + def setUp(self): + self.psl = PublicSuffixList() + + def test_privateparts_bytestuple_basic(self): + data = (b"www", b"example", b"com") + result = self.psl.privateparts(data) + # subdomain labels + private suffix tuple + self.assertEqual(result, (b"www", (b"example", b"com"))) + + def test_privateparts_bytestuple_no_subdomain(self): + data = (b"example", b"com") + result = self.psl.privateparts(data) + self.assertEqual(result, ((b"example", b"com"),)) + + def test_privateparts_bytestuple_keepcase(self): + data = (b"Www", b"Example", b"Com") + result = self.psl.privateparts(data, keep_case=True) + self.assertEqual(result, (b"Www", (b"Example", b"Com"))) + + def test_privateparts_bytestuple_none(self): + # public suffix only → no private part + data = (b"com",) + self.assertIsNone(self.psl.privateparts(data)) + + +class TestSubdomainBytestuple(unittest.TestCase): + + def setUp(self): + self.psl = PublicSuffixList() + + def test_subdomain_bytestuple_depth0(self): + data = (b"aaa", b"www", b"example", b"com") + result = self.psl.subdomain(data, depth=0) + self.assertEqual(result, (b"example", b"com")) + + def test_subdomain_bytestuple_depth1(self): + data = (b"aaa", b"www", b"example", b"com") + result = self.psl.subdomain(data, depth=1) + self.assertEqual(result, (b"www", b"example", b"com")) + + def test_subdomain_bytestuple_overflow(self): + data = (b"example", b"com") + # depth=1 requires at least 3 labels (publen=1 + 1 private + 1 sub) + self.assertIsNone(self.psl.subdomain(data, depth=1)) + + def test_subdomain_bytestuple_public_only(self): + data = (b"com",) + self.assertIsNone(self.psl.subdomain(data, depth=0)) + + +class TestBytearrayTypeError(unittest.TestCase): + + def setUp(self): + self.psl = PublicSuffixList() + + def test_bytearray_raises_typeerror(self): + self.assertRaises(TypeError, lambda: self.psl.suffix(bytearray(b"example.com"))) + + def test_bytearray_publicsuffix_raises_typeerror(self): + self.assertRaises(TypeError, lambda: self.psl.publicsuffix(bytearray(b"example.com"))) + + def test_bytearray_privatesuffix_raises_typeerror(self): + self.assertRaises(TypeError, lambda: self.psl.privatesuffix(bytearray(b"example.com"))) + + +class TestWildcardAcceptUnknown(unittest.TestCase): + + def setUp(self): + source = "*.bd\n" + # accept_unknown=True is the default; test it explicitly as well + self.psl = PublicSuffixList(source.splitlines(), accept_unknown=True) + + def test_bare_tld_is_public(self): + self.assertEqual(self.psl.publicsuffix("bd"), "bd") + self.assertIsNone(self.psl.privatesuffix("bd")) + + def test_one_label_under_wildcard_is_public(self): + # "example.bd" matches *.bd → example.bd is public suffix + self.assertEqual(self.psl.publicsuffix("example.bd"), "example.bd") + self.assertIsNone(self.psl.privatesuffix("example.bd")) + + def test_two_labels_under_wildcard_has_private(self): + self.assertEqual(self.psl.publicsuffix("sub.example.bd"), "example.bd") + self.assertEqual(self.psl.privatesuffix("sub.example.bd"), "sub.example.bd") + + +class TestCompatEdgeCases(unittest.TestCase): + + def setUp(self): + from publicsuffixlist.compat import PublicSuffixList, UnsafePublicSuffixList + self.psl = PublicSuffixList() + self.upsl = UnsafePublicSuffixList() + + def test_compat_unknown_tld(self): + self.assertEqual(self.psl.get_public_suffix("example.unknowntld"), "example.unknowntld") + + def test_compat_invalid_domain(self): + self.assertEqual(self.psl.get_public_suffix(".bad"), "") + + def test_compat_empty_string(self): + self.assertEqual(self.psl.get_public_suffix(""), "") + + def test_compat_very_long_domain(self): + d = "a." * 1000 + "example.com" + self.assertEqual(self.psl.get_public_suffix(d), "example.com") + + def test_unsafe_compat_fallback_public_suffix(self): + # When privatesuffix is None (e.g. bare TLD), UnsafePublicSuffixList + # falls back to returning the publicsuffix instead. + self.assertEqual(self.upsl.get_public_suffix("com"), "com") + + def test_unsafe_compat_private_domain(self): + self.assertEqual(self.upsl.get_public_suffix("test.example.com"), "example.com") + + def test_unsafe_compat_invalid_domain(self): + self.assertEqual(self.upsl.get_public_suffix(".bad"), "") + + def test_unsafe_compat_empty_string(self): + self.assertEqual(self.upsl.get_public_suffix(""), "") + + +class TestLargePSLSource(unittest.TestCase): + + def test_many_rules_parsing(self): + # Build a PSL with thousands of rules and verify lookups stay correct. + lines = ["// ===BEGIN ICANN DOMAINS==="] + lines += ["rule{0}.example".format(i) for i in range(500)] + lines += ["// ===END ICANN DOMAINS==="] + lines += ["com"] + source = "\n".join(lines) + psl = PublicSuffixList(source) + # An explicitly listed suffix should be recognised + self.assertIsNone(psl.privatesuffix("rule42.example")) + self.assertEqual(psl.publicsuffix("rule42.example"), "rule42.example") + # A sub-domain of that rule should be private + self.assertEqual(psl.privatesuffix("sub.rule42.example"), "sub.rule42.example") + # Standard TLD still works + self.assertEqual(psl.privatesuffix("example.com"), "example.com") + if __name__ == "__main__": unittest.main() From a432b3c9677d0d24a02346172b481c2b1a96ac08 Mon Sep 17 00:00:00 2001 From: DewDropstempest <123912597+DewDropstempest@users.noreply.github.com> Date: Sun, 12 Apr 2026 08:33:58 -0400 Subject: [PATCH 2/3] Update test.py. This file contains crucial test cases that validate the functionality of our application. By adding new scenarios and refactoring existing ones, we can ensure that the code remains robust and reliable. It is important to regularly review and enhance our test suite as the application evolves to catch potential bugs early in the development process. --- publicsuffixlist/test.py | 72 ++++++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 28 deletions(-) diff --git a/publicsuffixlist/test.py b/publicsuffixlist/test.py index 915cf69..6cf7428 100644 --- a/publicsuffixlist/test.py +++ b/publicsuffixlist/test.py @@ -4,7 +4,7 @@ # # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# file, you can obtain one at http://mozilla.org/MPL/2.0/. # import os @@ -66,8 +66,8 @@ def test_keepcase(self): def test_notpermitted_domain(self): # From the PSL definition, empty labels are not permitted. - # From the test_psl.txt, leading dot is not permitted. - # However, it seems most implementations ignore trailing dot. + # From the test_psl.txt, a leading dot is not permitted. + # However, it seems most implementations ignore the trailing dot. self.assertEqual(self.psl.suffix(".example.com"), None) self.assertEqual(self.psl.publicsuffix(".example.com"), None) @@ -215,33 +215,49 @@ def test_bytestuple_punycode(self): """ psl = PublicSuffixList(source) # punycoded ASCII should match - data = bytestuple("aaa.www.例.example".encode("idna")) - pubres = data[-2:] # xn--fsq.example - privres = data[-3:] - self.assertEqual(psl.publicsuffix(data), pubres) - self.assertEqual(psl.privatesuffix(data), privres) + data = bytestuple("aaa.www.例.example".enc```python +# -*- coding: utf-8 -*- +# +# Copyright 2014 ko-zu +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, you can obtain one at http://mozilla.org/MPL/2.0/. +# - def test_bytestuple_utf8(self): - source = """ -example -例.example -""" - psl = PublicSuffixList(source) - # UTF-8 encoded bytes should NOT match - data = bytestuple("aaa.www.例.example".encode("utf8")) - pubres = data[-1:] # example - privres = data[-2:] - self.assertEqual(psl.publicsuffix(data), pubres) - self.assertEqual(psl.privatesuffix(data), privres) +import os +import re +import unittest - def test_bytestuple_otherencoding(self): - source = """ -example -例.example -""" - psl = PublicSuffixList(source.splitlines()) - # Shift_JIS encoded bytes should NOT match - data = bytestuple("aaa.www.例.example".encode("sjis")) +from publicsuffixlist import PublicSuffixList, b, decode_idn, encode_idn, u + +def bytestuple(x): + return tuple(bytes(x).split(b'.')) + +class TestPSL(unittest.TestCase): + + def setUp(self): + self.psl = PublicSuffixList() + + def test_typesafe(self): + self.assertEqual(self.psl.suffix("www.example.co.jp").__class__, "example.co.jp".__class__) + self.assertEqual(self.psl.suffix(u("www.example.co.jp")).__class__, u("example.co.jp").__class__) + + self.assertEqual(self.psl.publicsuffix("www.example.co.jp").__class__, "co.jp".__class__) + self.assertEqual(self.psl.publicsuffix(u("www.example.co.jp")).__class__, u("co.jp").__class__) + + def test_typesafe_bytestuple(self): + self.assertEqual( + self.psl.privatesuffix((b"www",b"example",b"co",b"jp")).__class__, + (b"example", b"co", b"jp").__class__) + self.assertEqual( + self.psl.publicsuffix((b"www",b"example",b"co",b"jp")).__class__, + (b"co", b"jp").__class__) + + def test_uppercase(self): + self.assertEqual(self.psl.suffix("Jp"), None) + self.assertEqual(self.psl.publicsuffix("Jp"), "jp") +```("aaa.www.例.example".encode("sjis")) pubres = data[-1:] # example privres = data[-2:] self.assertEqual(psl.publicsuffix(data), pubres) From b7886c6f82abd0be81d7f4fbb6cccd98a0c59352 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 12 Apr 2026 12:40:42 +0000 Subject: [PATCH 3/3] fix: restore corrupted test.py (markdown block injected mid-file) Agent-Logs-Url: https://github.com/DewDropstempest/psl/sessions/544777bc-a666-4096-ba06-15835e5b4a1c Co-authored-by: DewDropstempest <123912597+DewDropstempest@users.noreply.github.com> --- publicsuffixlist/test.py | 66 +++++++++++++++------------------------- 1 file changed, 25 insertions(+), 41 deletions(-) diff --git a/publicsuffixlist/test.py b/publicsuffixlist/test.py index 6cf7428..13f337d 100644 --- a/publicsuffixlist/test.py +++ b/publicsuffixlist/test.py @@ -215,49 +215,33 @@ def test_bytestuple_punycode(self): """ psl = PublicSuffixList(source) # punycoded ASCII should match - data = bytestuple("aaa.www.例.example".enc```python -# -*- coding: utf-8 -*- -# -# Copyright 2014 ko-zu -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, you can obtain one at http://mozilla.org/MPL/2.0/. -# - -import os -import re -import unittest - -from publicsuffixlist import PublicSuffixList, b, decode_idn, encode_idn, u - -def bytestuple(x): - return tuple(bytes(x).split(b'.')) - -class TestPSL(unittest.TestCase): - - def setUp(self): - self.psl = PublicSuffixList() - - def test_typesafe(self): - self.assertEqual(self.psl.suffix("www.example.co.jp").__class__, "example.co.jp".__class__) - self.assertEqual(self.psl.suffix(u("www.example.co.jp")).__class__, u("example.co.jp").__class__) - - self.assertEqual(self.psl.publicsuffix("www.example.co.jp").__class__, "co.jp".__class__) - self.assertEqual(self.psl.publicsuffix(u("www.example.co.jp")).__class__, u("co.jp").__class__) + data = bytestuple("aaa.www.例.example".encode("idna")) + pubres = data[-2:] # xn--fsq.example + privres = data[-3:] + self.assertEqual(psl.publicsuffix(data), pubres) + self.assertEqual(psl.privatesuffix(data), privres) - def test_typesafe_bytestuple(self): - self.assertEqual( - self.psl.privatesuffix((b"www",b"example",b"co",b"jp")).__class__, - (b"example", b"co", b"jp").__class__) - self.assertEqual( - self.psl.publicsuffix((b"www",b"example",b"co",b"jp")).__class__, - (b"co", b"jp").__class__) + def test_bytestuple_utf8(self): + source = """ +example +例.example +""" + psl = PublicSuffixList(source) + # UTF-8 encoded bytes should NOT match + data = bytestuple("aaa.www.例.example".encode("utf8")) + pubres = data[-1:] # example + privres = data[-2:] + self.assertEqual(psl.publicsuffix(data), pubres) + self.assertEqual(psl.privatesuffix(data), privres) - def test_uppercase(self): - self.assertEqual(self.psl.suffix("Jp"), None) - self.assertEqual(self.psl.publicsuffix("Jp"), "jp") -```("aaa.www.例.example".encode("sjis")) + def test_bytestuple_otherencoding(self): + source = """ +example +例.example +""" + psl = PublicSuffixList(source.splitlines()) + # Shift_JIS encoded bytes should NOT match + data = bytestuple("aaa.www.例.example".encode("sjis")) pubres = data[-1:] # example privres = data[-2:] self.assertEqual(psl.publicsuffix(data), pubres)