From 05c8664995a19515ebafd5fb0e6ca7d00565c857 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Tue, 24 Feb 2026 13:57:53 +0900 Subject: [PATCH 1/5] fix(pypi): handle unnormalized package names when extracting sdist version With this change we are handling more of the edge cases for when the filenames are more complex. Initial code had bugs when the sdist name had `-` in the name part. This code is easier to read and a little bit more explicit how it handles things. We will use it later to only return the `whl` and `sdist` entries for the versions requested through the requirements lock file. This is to make it possible to write facts only for the versions that we use. Work towards #2731 --- python/private/pypi/BUILD.bazel | 3 ++ python/private/pypi/parse_simpleapi_html.bzl | 26 ++-------- python/private/pypi/version_from_filename.bzl | 41 ++++++++++++++++ tests/pypi/version_from_filename/BUILD.bazel | 3 ++ .../version_from_filename_tests.bzl | 49 +++++++++++++++++++ 5 files changed, 99 insertions(+), 23 deletions(-) create mode 100644 python/private/pypi/version_from_filename.bzl create mode 100644 tests/pypi/version_from_filename/BUILD.bazel create mode 100644 tests/pypi/version_from_filename/version_from_filename_tests.bzl diff --git a/python/private/pypi/BUILD.bazel b/python/private/pypi/BUILD.bazel index 6bfd64652e..a3a876d084 100644 --- a/python/private/pypi/BUILD.bazel +++ b/python/private/pypi/BUILD.bazel @@ -241,6 +241,9 @@ bzl_library( bzl_library( name = "parse_simpleapi_html_bzl", srcs = ["parse_simpleapi_html.bzl"], + deps = [ + ":version_from_filename_bzl", + ] ) bzl_library( diff --git a/python/private/pypi/parse_simpleapi_html.bzl b/python/private/pypi/parse_simpleapi_html.bzl index a41f0750c4..23ecbf496f 100644 --- a/python/private/pypi/parse_simpleapi_html.bzl +++ b/python/private/pypi/parse_simpleapi_html.bzl @@ -16,6 +16,8 @@ Parse SimpleAPI HTML in Starlark. """ +load(":version_from_filename.bzl", "version_from_filename") + def parse_simpleapi_html(*, url, content): """Get the package URLs for given shas by parsing the Simple API HTML. @@ -64,7 +66,7 @@ def parse_simpleapi_html(*, url, content): head, _, _ = tail.rpartition("") maybe_metadata, _, filename = head.rpartition(">") - version = _version(filename) + version = version_from_filename(filename) sha256s_by_version.setdefault(version, []).append(sha256) metadata_sha256 = "" @@ -105,28 +107,6 @@ def parse_simpleapi_html(*, url, content): sha256s_by_version = sha256s_by_version, ) -_SDIST_EXTS = [ - ".tar", # handles any compression - ".zip", -] - -def _version(filename): - # See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#binary-distribution-format - - _, _, tail = filename.partition("-") - version, _, _ = tail.partition("-") - if version != tail: - # The format is {name}-{version}-{whl_specifiers}.whl - return version - - # NOTE @aignas 2025-03-29: most of the files are wheels, so this is not the common path - - # {name}-{version}.{ext} - for ext in _SDIST_EXTS: - version, _, _ = version.partition(ext) # build or name - - return version - def _get_root_directory(url): scheme_end = url.find("://") if scheme_end == -1: diff --git a/python/private/pypi/version_from_filename.bzl b/python/private/pypi/version_from_filename.bzl new file mode 100644 index 0000000000..d6c1c7b9e9 --- /dev/null +++ b/python/private/pypi/version_from_filename.bzl @@ -0,0 +1,41 @@ +"""Parse the version of the thing just from the filename. This is useful for selecting files based on the requested version.""" + +_SDIST_EXTS = [ + ".tar", # handles any compression + ".zip", +] + +def version_from_filename(filename, _fail=fail): + """Parse the version of the filename. + + Args: + filename: {type}`str` the filename. + _fail: The fail function. + + Returns: + A string version or None if we could not parse the version. + """ + # See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#binary-distribution-format + + if filename.endswith(".whl"): + # The format is {name}-{version}-{whl_specifiers}.whl + _, _, version = filename.partition("-") + version, _, _ = version.partition("-") + return version + + # NOTE @aignas 2025-03-29: most of the files are wheels, so this is not the common path + + # {name}-{version}.{ext} + head = "" + for ext in _SDIST_EXTS: + head, _, _ = filename.rpartition(ext) # build or name + if head: + break + + if not head: + _fail("Unsupported sdist extension: {filename}".format(filename=filename)) + return None + + # Based on PEP440 the version number cannot include dashes + _, _, version = head.rpartition("-") + return version diff --git a/tests/pypi/version_from_filename/BUILD.bazel b/tests/pypi/version_from_filename/BUILD.bazel new file mode 100644 index 0000000000..e9d50dc6b8 --- /dev/null +++ b/tests/pypi/version_from_filename/BUILD.bazel @@ -0,0 +1,3 @@ +load(":version_from_filename_tests.bzl", "version_from_filename_test_suite") + +version_from_filename_test_suite(name = "version_from_filename_tests") diff --git a/tests/pypi/version_from_filename/version_from_filename_tests.bzl b/tests/pypi/version_from_filename/version_from_filename_tests.bzl new file mode 100644 index 0000000000..8fcc8553a8 --- /dev/null +++ b/tests/pypi/version_from_filename/version_from_filename_tests.bzl @@ -0,0 +1,49 @@ +"" + +load("@rules_testing//lib:test_suite.bzl", "test_suite") +load("//python/private/pypi:version_from_filename.bzl", "version_from_filename") + +_tests = [] + +def _test_wheel_version_extraction(env): + # Case 1: Standard sdist + env.expect.that_str(version_from_filename("foo-1.2.3-py3-none-any.whl")).equals("1.2.3") + +_tests.append(_test_wheel_version_extraction) + +def _test_sdist_version_extraction(env): + # Case 1: Standard sdist + env.expect.that_str(version_from_filename("foo-1.2.3.tar.gz")).equals("1.2.3") + + # Case 2: PEP 625 - Project name has underscores (normalized from dashes) + # If the package is 'my-pkg', the sdist might be 'my_pkg-1.0.0.tar.gz' + env.expect.that_str(version_from_filename("my_pkg-1.0.0.tar.gz")).equals("1.0.0") + + # Case 3: Project name has multiple underscores + env.expect.that_str(version_from_filename("very_long_project_name-0.5.0.zip")).equals("0.5.0") + + # Case 4: Legacy sdist with hyphens in name + # Note: Modern tools normalize this, but we should support the hyphen split + env.expect.that_str(version_from_filename("complex-name-1.2.3.tar.gz")).equals("1.2.3") + + # Case 5: Version contains an underscore (e.g. local versions) + env.expect.that_str(version_from_filename("pkg-1.2.3_post1.tar.gz")).equals("1.2.3_post1") + + # Case 6: custom compression + env.expect.that_str(version_from_filename("pkg-1.2.3_post1.tar.xz")).equals("1.2.3_post1") + +_tests.append(_test_sdist_version_extraction) + +def _test_sdist_version_extraction_fail(env): + failures = [] + # Case 1: Standard sdist + env.expect.that_str(version_from_filename("foo-1.2.3.7z", _fail=failures.append)).equals(None) + env.expect.that_collection(failures).contains_exactly(["Unsupported sdist extension: foo-1.2.3.7z"]) + +_tests.append(_test_sdist_version_extraction_fail) + +def version_from_filename_test_suite(name): + test_suite( + name = name, + basic_tests = _tests, + ) From c5de47d89d4431a2be1e75ac8f8fa5905c4fcfd2 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Tue, 24 Feb 2026 14:04:32 +0900 Subject: [PATCH 2/5] fixup --- python/private/pypi/BUILD.bazel | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/python/private/pypi/BUILD.bazel b/python/private/pypi/BUILD.bazel index a3a876d084..1a6507337f 100644 --- a/python/private/pypi/BUILD.bazel +++ b/python/private/pypi/BUILD.bazel @@ -419,6 +419,11 @@ bzl_library( ], ) +bzl_library( + name = "version_from_filename_bzl", + srcs = ["version_from_filename.bzl"], +) + bzl_library( name = "whl_config_repo_bzl", srcs = ["whl_config_repo.bzl"], From 9c5958d6435d0876717635a11e892068e5d8efe0 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Tue, 24 Feb 2026 14:09:06 +0900 Subject: [PATCH 3/5] fixup --- python/private/pypi/version_from_filename.bzl | 5 +++-- .../version_from_filename_tests.bzl | 11 +++++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/python/private/pypi/version_from_filename.bzl b/python/private/pypi/version_from_filename.bzl index d6c1c7b9e9..4c697f5852 100644 --- a/python/private/pypi/version_from_filename.bzl +++ b/python/private/pypi/version_from_filename.bzl @@ -5,7 +5,7 @@ _SDIST_EXTS = [ ".zip", ] -def version_from_filename(filename, _fail=fail): +def version_from_filename(filename, _fail=None): """Parse the version of the filename. Args: @@ -33,7 +33,8 @@ def version_from_filename(filename, _fail=fail): break if not head: - _fail("Unsupported sdist extension: {filename}".format(filename=filename)) + if _fail: + _fail("Unsupported sdist extension: {filename}".format(filename=filename)) return None # Based on PEP440 the version number cannot include dashes diff --git a/tests/pypi/version_from_filename/version_from_filename_tests.bzl b/tests/pypi/version_from_filename/version_from_filename_tests.bzl index 8fcc8553a8..881c228757 100644 --- a/tests/pypi/version_from_filename/version_from_filename_tests.bzl +++ b/tests/pypi/version_from_filename/version_from_filename_tests.bzl @@ -6,7 +6,7 @@ load("//python/private/pypi:version_from_filename.bzl", "version_from_filename") _tests = [] def _test_wheel_version_extraction(env): - # Case 1: Standard sdist + # Case 1: wheel env.expect.that_str(version_from_filename("foo-1.2.3-py3-none-any.whl")).equals("1.2.3") _tests.append(_test_wheel_version_extraction) @@ -36,10 +36,17 @@ _tests.append(_test_sdist_version_extraction) def _test_sdist_version_extraction_fail(env): failures = [] - # Case 1: Standard sdist + + # Case 1: 7z + env.expect.that_str(version_from_filename("foo-1.2.3.7z")).equals(None) env.expect.that_str(version_from_filename("foo-1.2.3.7z", _fail=failures.append)).equals(None) env.expect.that_collection(failures).contains_exactly(["Unsupported sdist extension: foo-1.2.3.7z"]) + # Case 2: egg + failures.clear() + env.expect.that_str(version_from_filename("foo-1.2.3-py3.egg", _fail=failures.append)).equals(None) + env.expect.that_collection(failures).contains_exactly(["Unsupported sdist extension: foo-1.2.3-py3.egg"]) + _tests.append(_test_sdist_version_extraction_fail) def version_from_filename_test_suite(name): From a19f6cab12514d4badbd817bc72626a9b2bbb245 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Tue, 24 Feb 2026 14:40:42 +0900 Subject: [PATCH 4/5] buildifier --- python/private/pypi/BUILD.bazel | 2 +- python/private/pypi/version_from_filename.bzl | 4 ++-- .../version_from_filename/version_from_filename_tests.bzl | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/private/pypi/BUILD.bazel b/python/private/pypi/BUILD.bazel index 1a6507337f..48a1837f36 100644 --- a/python/private/pypi/BUILD.bazel +++ b/python/private/pypi/BUILD.bazel @@ -243,7 +243,7 @@ bzl_library( srcs = ["parse_simpleapi_html.bzl"], deps = [ ":version_from_filename_bzl", - ] + ], ) bzl_library( diff --git a/python/private/pypi/version_from_filename.bzl b/python/private/pypi/version_from_filename.bzl index 4c697f5852..d0b6e3105d 100644 --- a/python/private/pypi/version_from_filename.bzl +++ b/python/private/pypi/version_from_filename.bzl @@ -5,7 +5,7 @@ _SDIST_EXTS = [ ".zip", ] -def version_from_filename(filename, _fail=None): +def version_from_filename(filename, _fail = None): """Parse the version of the filename. Args: @@ -34,7 +34,7 @@ def version_from_filename(filename, _fail=None): if not head: if _fail: - _fail("Unsupported sdist extension: {filename}".format(filename=filename)) + _fail("Unsupported sdist extension: {filename}".format(filename = filename)) return None # Based on PEP440 the version number cannot include dashes diff --git a/tests/pypi/version_from_filename/version_from_filename_tests.bzl b/tests/pypi/version_from_filename/version_from_filename_tests.bzl index 881c228757..2bf9b4132c 100644 --- a/tests/pypi/version_from_filename/version_from_filename_tests.bzl +++ b/tests/pypi/version_from_filename/version_from_filename_tests.bzl @@ -39,12 +39,12 @@ def _test_sdist_version_extraction_fail(env): # Case 1: 7z env.expect.that_str(version_from_filename("foo-1.2.3.7z")).equals(None) - env.expect.that_str(version_from_filename("foo-1.2.3.7z", _fail=failures.append)).equals(None) + env.expect.that_str(version_from_filename("foo-1.2.3.7z", _fail = failures.append)).equals(None) env.expect.that_collection(failures).contains_exactly(["Unsupported sdist extension: foo-1.2.3.7z"]) # Case 2: egg failures.clear() - env.expect.that_str(version_from_filename("foo-1.2.3-py3.egg", _fail=failures.append)).equals(None) + env.expect.that_str(version_from_filename("foo-1.2.3-py3.egg", _fail = failures.append)).equals(None) env.expect.that_collection(failures).contains_exactly(["Unsupported sdist extension: foo-1.2.3-py3.egg"]) _tests.append(_test_sdist_version_extraction_fail) From 0eef72791cb175efa6bcaa79f187b4f0cedc12c5 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Tue, 24 Feb 2026 17:25:16 +0900 Subject: [PATCH 5/5] add a visibility exclusion --- .../pypi/version_from_filename/version_from_filename_tests.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pypi/version_from_filename/version_from_filename_tests.bzl b/tests/pypi/version_from_filename/version_from_filename_tests.bzl index 2bf9b4132c..fab921fd4f 100644 --- a/tests/pypi/version_from_filename/version_from_filename_tests.bzl +++ b/tests/pypi/version_from_filename/version_from_filename_tests.bzl @@ -1,7 +1,7 @@ "" load("@rules_testing//lib:test_suite.bzl", "test_suite") -load("//python/private/pypi:version_from_filename.bzl", "version_from_filename") +load("//python/private/pypi:version_from_filename.bzl", "version_from_filename") # buildifier: disable=bzl-visibility _tests = []