diff --git a/python/private/pypi/BUILD.bazel b/python/private/pypi/BUILD.bazel index 6bfd64652e..48a1837f36 100644 --- a/python/private/pypi/BUILD.bazel +++ b/python/private/pypi/BUILD.bazel @@ -241,6 +241,9 @@ bzl_library( bzl_library( name = "parse_simpleapi_html_bzl", srcs = ["parse_simpleapi_html.bzl"], + deps = [ + ":version_from_filename_bzl", + ], ) bzl_library( @@ -416,6 +419,11 @@ bzl_library( ], ) +bzl_library( + name = "version_from_filename_bzl", + srcs = ["version_from_filename.bzl"], +) + bzl_library( name = "whl_config_repo_bzl", srcs = ["whl_config_repo.bzl"], diff --git a/python/private/pypi/parse_simpleapi_html.bzl b/python/private/pypi/parse_simpleapi_html.bzl index a41f0750c4..23ecbf496f 100644 --- a/python/private/pypi/parse_simpleapi_html.bzl +++ b/python/private/pypi/parse_simpleapi_html.bzl @@ -16,6 +16,8 @@ Parse SimpleAPI HTML in Starlark. """ +load(":version_from_filename.bzl", "version_from_filename") + def parse_simpleapi_html(*, url, content): """Get the package URLs for given shas by parsing the Simple API HTML. @@ -64,7 +66,7 @@ def parse_simpleapi_html(*, url, content): head, _, _ = tail.rpartition("") maybe_metadata, _, filename = head.rpartition(">") - version = _version(filename) + version = version_from_filename(filename) sha256s_by_version.setdefault(version, []).append(sha256) metadata_sha256 = "" @@ -105,28 +107,6 @@ def parse_simpleapi_html(*, url, content): sha256s_by_version = sha256s_by_version, ) -_SDIST_EXTS = [ - ".tar", # handles any compression - ".zip", -] - -def _version(filename): - # See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#binary-distribution-format - - _, _, tail = filename.partition("-") - version, _, _ = tail.partition("-") - if version != tail: - # The format is {name}-{version}-{whl_specifiers}.whl - return version - - # NOTE @aignas 2025-03-29: most of the files are wheels, so this is not the common path - - # {name}-{version}.{ext} - for ext in _SDIST_EXTS: - version, _, _ = version.partition(ext) # build or name - - return version - def _get_root_directory(url): scheme_end = url.find("://") if scheme_end == -1: diff --git a/python/private/pypi/version_from_filename.bzl b/python/private/pypi/version_from_filename.bzl new file mode 100644 index 0000000000..d0b6e3105d --- /dev/null +++ b/python/private/pypi/version_from_filename.bzl @@ -0,0 +1,42 @@ +"""Parse the version of the thing just from the filename. This is useful for selecting files based on the requested version.""" + +_SDIST_EXTS = [ + ".tar", # handles any compression + ".zip", +] + +def version_from_filename(filename, _fail = None): + """Parse the version of the filename. + + Args: + filename: {type}`str` the filename. + _fail: The fail function. + + Returns: + A string version or None if we could not parse the version. + """ + # See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#binary-distribution-format + + if filename.endswith(".whl"): + # The format is {name}-{version}-{whl_specifiers}.whl + _, _, version = filename.partition("-") + version, _, _ = version.partition("-") + return version + + # NOTE @aignas 2025-03-29: most of the files are wheels, so this is not the common path + + # {name}-{version}.{ext} + head = "" + for ext in _SDIST_EXTS: + head, _, _ = filename.rpartition(ext) # build or name + if head: + break + + if not head: + if _fail: + _fail("Unsupported sdist extension: {filename}".format(filename = filename)) + return None + + # Based on PEP440 the version number cannot include dashes + _, _, version = head.rpartition("-") + return version diff --git a/tests/pypi/version_from_filename/BUILD.bazel b/tests/pypi/version_from_filename/BUILD.bazel new file mode 100644 index 0000000000..e9d50dc6b8 --- /dev/null +++ b/tests/pypi/version_from_filename/BUILD.bazel @@ -0,0 +1,3 @@ +load(":version_from_filename_tests.bzl", "version_from_filename_test_suite") + +version_from_filename_test_suite(name = "version_from_filename_tests") diff --git a/tests/pypi/version_from_filename/version_from_filename_tests.bzl b/tests/pypi/version_from_filename/version_from_filename_tests.bzl new file mode 100644 index 0000000000..fab921fd4f --- /dev/null +++ b/tests/pypi/version_from_filename/version_from_filename_tests.bzl @@ -0,0 +1,56 @@ +"" + +load("@rules_testing//lib:test_suite.bzl", "test_suite") +load("//python/private/pypi:version_from_filename.bzl", "version_from_filename") # buildifier: disable=bzl-visibility + +_tests = [] + +def _test_wheel_version_extraction(env): + # Case 1: wheel + env.expect.that_str(version_from_filename("foo-1.2.3-py3-none-any.whl")).equals("1.2.3") + +_tests.append(_test_wheel_version_extraction) + +def _test_sdist_version_extraction(env): + # Case 1: Standard sdist + env.expect.that_str(version_from_filename("foo-1.2.3.tar.gz")).equals("1.2.3") + + # Case 2: PEP 625 - Project name has underscores (normalized from dashes) + # If the package is 'my-pkg', the sdist might be 'my_pkg-1.0.0.tar.gz' + env.expect.that_str(version_from_filename("my_pkg-1.0.0.tar.gz")).equals("1.0.0") + + # Case 3: Project name has multiple underscores + env.expect.that_str(version_from_filename("very_long_project_name-0.5.0.zip")).equals("0.5.0") + + # Case 4: Legacy sdist with hyphens in name + # Note: Modern tools normalize this, but we should support the hyphen split + env.expect.that_str(version_from_filename("complex-name-1.2.3.tar.gz")).equals("1.2.3") + + # Case 5: Version contains an underscore (e.g. local versions) + env.expect.that_str(version_from_filename("pkg-1.2.3_post1.tar.gz")).equals("1.2.3_post1") + + # Case 6: custom compression + env.expect.that_str(version_from_filename("pkg-1.2.3_post1.tar.xz")).equals("1.2.3_post1") + +_tests.append(_test_sdist_version_extraction) + +def _test_sdist_version_extraction_fail(env): + failures = [] + + # Case 1: 7z + env.expect.that_str(version_from_filename("foo-1.2.3.7z")).equals(None) + env.expect.that_str(version_from_filename("foo-1.2.3.7z", _fail = failures.append)).equals(None) + env.expect.that_collection(failures).contains_exactly(["Unsupported sdist extension: foo-1.2.3.7z"]) + + # Case 2: egg + failures.clear() + env.expect.that_str(version_from_filename("foo-1.2.3-py3.egg", _fail = failures.append)).equals(None) + env.expect.that_collection(failures).contains_exactly(["Unsupported sdist extension: foo-1.2.3-py3.egg"]) + +_tests.append(_test_sdist_version_extraction_fail) + +def version_from_filename_test_suite(name): + test_suite( + name = name, + basic_tests = _tests, + )