Skip to content

Commit 18236df

Browse files
committed
Merge remote-tracking branch 'origin/main' into codex/issue-169-log-capture-export
2 parents 977cb25 + deed356 commit 18236df

7 files changed

Lines changed: 208 additions & 3 deletions

File tree

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ releases are available on [PyPI](https://pypi.org/project/pytask) and
77

88
## Unreleased
99

10+
- [#820](https://github.com/pytask-dev/pytask/pull/820) fixes collection and node
11+
display for remote `UPath`-backed nodes, while preserving correct handling of local
12+
`file://` and `local://` `UPath`s across platforms.
1013
- [#743](https://github.com/pytask-dev/pytask/pull/743) adds the `pytask.lock`
1114
lockfile as the primary state backend with a portable format and documentation. When
1215
no lockfile exists, pytask reads the legacy SQLite state and writes `pytask.lock`;

src/_pytask/collect.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@
4242
from _pytask.outcomes import count_outcomes
4343
from _pytask.path import find_case_sensitive_path
4444
from _pytask.path import import_path
45+
from _pytask.path import is_non_local_path
46+
from _pytask.path import normalize_local_upath
4547
from _pytask.path import shorten_path
4648
from _pytask.pluginmanager import hookimpl
4749
from _pytask.reports import CollectionReport
@@ -455,7 +457,14 @@ def pytask_collect_node( # noqa: C901, PLR0912
455457
node.name = create_name_of_python_node(node_info)
456458
return node
457459

458-
if isinstance(node, PPathNode) and not node.path.is_absolute():
460+
if isinstance(node, PPathNode):
461+
node.path = normalize_local_upath(node.path)
462+
463+
if (
464+
isinstance(node, PPathNode)
465+
and not is_non_local_path(node.path)
466+
and not node.path.is_absolute()
467+
):
459468
node.path = path.joinpath(node.path)
460469

461470
# ``normpath`` removes ``../`` from the path which is necessary for the casing
@@ -487,6 +496,9 @@ def pytask_collect_node( # noqa: C901, PLR0912
487496
node.name = create_name_of_python_node(node_info)
488497
return node
489498

499+
if isinstance(node, UPath): # pragma: no cover
500+
node = normalize_local_upath(node)
501+
490502
if isinstance(node, UPath): # pragma: no cover
491503
if not node.protocol:
492504
node = Path(node)

src/_pytask/collect_command.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
from _pytask.node_protocols import PTaskWithPath
3131
from _pytask.outcomes import ExitCode
3232
from _pytask.path import find_common_ancestor
33+
from _pytask.path import is_non_local_path
34+
from _pytask.path import normalize_local_upath
3335
from _pytask.path import relative_to
3436
from _pytask.pluginmanager import hookimpl
3537
from _pytask.pluginmanager import storage
@@ -125,10 +127,14 @@ def _find_common_ancestor_of_all_nodes(
125127
all_paths.append(task.path)
126128
if show_nodes:
127129
all_paths.extend(
128-
x.path for x in tree_leaves(task.depends_on) if isinstance(x, PPathNode)
130+
normalize_local_upath(x.path)
131+
for x in tree_leaves(task.depends_on)
132+
if isinstance(x, PPathNode) and not is_non_local_path(x.path)
129133
)
130134
all_paths.extend(
131-
x.path for x in tree_leaves(task.produces) if isinstance(x, PPathNode)
135+
normalize_local_upath(x.path)
136+
for x in tree_leaves(task.produces)
137+
if isinstance(x, PPathNode) and not is_non_local_path(x.path)
132138
)
133139

134140
return find_common_ancestor(*all_paths, *paths)

src/_pytask/path.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
from types import ModuleType
1414
from typing import TYPE_CHECKING
1515

16+
from upath import UPath
17+
1618
from _pytask._hashlib import file_digest
1719
from _pytask.cache import Cache
1820

@@ -25,11 +27,17 @@
2527
"find_common_ancestor",
2628
"hash_path",
2729
"import_path",
30+
"is_non_local_path",
31+
"normalize_local_upath",
2832
"relative_to",
2933
"shorten_path",
3034
]
3135

3236

37+
_LOCAL_UPATH_PROTOCOLS = frozenset(("", "file", "local"))
38+
_WINDOWS_DRIVE_PREFIX_LENGTH = 3
39+
40+
3341
def relative_to(path: Path, source: Path, *, include_source: bool = True) -> Path:
3442
"""Make a path relative to another path.
3543
@@ -56,6 +64,27 @@ def relative_to(path: Path, source: Path, *, include_source: bool = True) -> Pat
5664
return Path(source_name, path.relative_to(source))
5765

5866

67+
def is_non_local_path(path: Path) -> bool:
68+
"""Return whether a path points to a non-local `UPath` resource."""
69+
return isinstance(path, UPath) and path.protocol not in _LOCAL_UPATH_PROTOCOLS
70+
71+
72+
def normalize_local_upath(path: Path) -> Path:
73+
"""Convert local `UPath` variants to a stdlib `Path`."""
74+
if isinstance(path, UPath) and path.protocol in {"file", "local"}:
75+
local_path = path.path
76+
if (
77+
sys.platform == "win32"
78+
and local_path.startswith("/")
79+
and len(local_path) >= _WINDOWS_DRIVE_PREFIX_LENGTH
80+
and local_path[1].isalpha()
81+
and local_path[2] == ":"
82+
):
83+
local_path = local_path[1:]
84+
return Path(local_path)
85+
return path
86+
87+
5988
def find_closest_ancestor(
6089
path: Path, potential_ancestors: Sequence[Path]
6190
) -> Path | None:
@@ -432,6 +461,12 @@ def shorten_path(path: Path, paths: Sequence[Path]) -> str:
432461
path from one path in ``session.config["paths"]`` to the node.
433462
434463
"""
464+
if is_non_local_path(path):
465+
return path.as_posix()
466+
467+
path = normalize_local_upath(path)
468+
paths = [normalize_local_upath(p) for p in paths]
469+
435470
ancestor = find_closest_ancestor(path, paths)
436471
if ancestor is None:
437472
try:

tests/test_collect.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,18 @@
1414
from pytask import CollectionOutcome
1515
from pytask import ExitCode
1616
from pytask import NodeInfo
17+
from pytask import PickleNode
1718
from pytask import Session
1819
from pytask import Task
1920
from pytask import build
2021
from pytask import cli
2122
from tests.conftest import noop
2223

2324

25+
def _make_local_upath_uri(path: Path, protocol: str) -> str:
26+
return f"{protocol}:///{path.as_posix().lstrip('/')}"
27+
28+
2429
@pytest.mark.parametrize(
2530
("depends_on", "produces"),
2631
[
@@ -190,6 +195,56 @@ def test_pytask_collect_node(session, path, node_info, expected):
190195
assert str(result.load()) == str(expected)
191196

192197

198+
def test_pytask_collect_remote_path_node_keeps_uri_name():
199+
upath = pytest.importorskip("upath")
200+
201+
session = Session.from_config(
202+
{"check_casing_of_paths": False, "paths": (Path.cwd(),), "root": Path.cwd()}
203+
)
204+
205+
result = pytask_collect_node(
206+
session,
207+
Path.cwd(),
208+
NodeInfo(
209+
arg_name="path",
210+
path=(),
211+
value=PickleNode(path=upath.UPath("s3://bucket/file.pkl")),
212+
task_path=Path.cwd() / "task_example.py",
213+
task_name="task_example",
214+
),
215+
)
216+
217+
assert isinstance(result, PPathNode)
218+
assert result.name == "s3://bucket/file.pkl"
219+
220+
221+
@pytest.mark.parametrize("protocol", ["file", "local"])
222+
def test_pytask_collect_local_upath_protocol_node_is_shortened(tmp_path, protocol):
223+
upath = pytest.importorskip("upath")
224+
225+
session = Session.from_config(
226+
{"check_casing_of_paths": False, "paths": (tmp_path,), "root": tmp_path}
227+
)
228+
229+
result = pytask_collect_node(
230+
session,
231+
tmp_path,
232+
NodeInfo(
233+
arg_name="path",
234+
path=(),
235+
value=PickleNode(
236+
path=upath.UPath(_make_local_upath_uri(tmp_path / "file.pkl", protocol))
237+
),
238+
task_path=tmp_path / "task_example.py",
239+
task_name="task_example",
240+
),
241+
)
242+
243+
assert isinstance(result, PPathNode)
244+
assert result.path == tmp_path / "file.pkl"
245+
assert result.name == f"{tmp_path.name}/file.pkl"
246+
247+
193248
@pytest.mark.skipif(
194249
sys.platform != "win32", reason="Only works on case-insensitive file systems."
195250
)

tests/test_collect_command.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@
2121
from _pytask.node_protocols import PTaskWithPath
2222

2323

24+
def _make_local_upath_uri(path: Path, protocol: str) -> str:
25+
return f"{protocol}:///{path.as_posix().lstrip('/')}"
26+
27+
2428
def test_collect_task(runner, tmp_path):
2529
source = """
2630
from pathlib import Path
@@ -396,6 +400,59 @@ def test_task_name_is_shortened(runner, tmp_path):
396400
assert "a/b/task_example.py::task_example" not in result.output
397401

398402

403+
def test_collect_task_with_remote_upath_node(runner, tmp_path):
404+
pytest.importorskip("upath")
405+
406+
source = """
407+
from pathlib import Path
408+
from typing import Annotated
409+
410+
from upath import UPath
411+
412+
from pytask import PickleNode
413+
from pytask import Product
414+
415+
def task_example(
416+
data=PickleNode(path=UPath("s3://bucket/in.pkl")),
417+
path: Annotated[Path, Product] = Path("out.txt"),
418+
): ...
419+
"""
420+
tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source))
421+
422+
result = runner.invoke(cli, ["collect", "--nodes", tmp_path.as_posix()])
423+
424+
assert result.exit_code == ExitCode.OK
425+
assert "s3://bucket/in.pkl" in result.output
426+
427+
428+
@pytest.mark.parametrize("protocol", ["file", "local"])
429+
def test_collect_task_with_local_upath_protocol_node(runner, tmp_path, protocol):
430+
pytest.importorskip("upath")
431+
432+
uri = _make_local_upath_uri(tmp_path / "in.pkl", protocol)
433+
434+
source = f"""
435+
from pathlib import Path
436+
from typing import Annotated
437+
438+
from upath import UPath
439+
440+
from pytask import PickleNode
441+
from pytask import Product
442+
443+
def task_example(
444+
data=PickleNode(path=UPath("{uri}")),
445+
path: Annotated[Path, Product] = Path("out.txt"),
446+
): ...
447+
"""
448+
tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source))
449+
450+
result = runner.invoke(cli, ["collect", "--nodes", tmp_path.as_posix()])
451+
452+
assert result.exit_code == ExitCode.OK
453+
assert f"{tmp_path.name}/in.pkl" in result.output
454+
455+
399456
def test_python_node_is_collected(runner, tmp_path):
400457
source = """
401458
from pytask import Product

tests/test_path.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,20 @@
1919
from _pytask.path import find_case_sensitive_path
2020
from _pytask.path import find_closest_ancestor
2121
from _pytask.path import find_common_ancestor
22+
from _pytask.path import is_non_local_path
23+
from _pytask.path import normalize_local_upath
2224
from _pytask.path import relative_to
25+
from _pytask.path import shorten_path
2326
from pytask.path import import_path
2427

2528
if TYPE_CHECKING:
2629
from collections.abc import Generator
2730

2831

32+
def _make_local_upath_uri(path: Path, protocol: str) -> str:
33+
return f"{protocol}:///{path.as_posix().lstrip('/')}"
34+
35+
2936
@pytest.mark.parametrize(
3037
("path", "source", "include_source", "expected"),
3138
[
@@ -110,6 +117,36 @@ def test_find_common_ancestor(path_1, path_2, expectation, expected):
110117
assert result == expected
111118

112119

120+
def test_shorten_path_keeps_non_local_uri():
121+
upath = pytest.importorskip("upath")
122+
123+
path = upath.UPath("s3://bucket/file.pkl")
124+
125+
assert shorten_path(path, [Path.cwd()]) == "s3://bucket/file.pkl"
126+
127+
128+
@pytest.mark.parametrize("protocol", ["file", "local"])
129+
def test_shorten_path_treats_local_upath_protocols_as_local(tmp_path, protocol):
130+
upath = pytest.importorskip("upath")
131+
132+
path = upath.UPath(_make_local_upath_uri(tmp_path / "file.pkl", protocol))
133+
134+
assert not is_non_local_path(path)
135+
assert shorten_path(path, [tmp_path]) == f"{tmp_path.name}/file.pkl"
136+
137+
138+
@pytest.mark.parametrize("protocol", ["file", "local"])
139+
def test_normalize_local_upath_strips_windows_drive_prefix(monkeypatch, protocol):
140+
upath = pytest.importorskip("upath")
141+
142+
monkeypatch.setattr(sys, "platform", "win32")
143+
path = upath.UPath(f"{protocol}:///C:/tmp/file.pkl")
144+
145+
result = normalize_local_upath(path)
146+
147+
assert result.as_posix() == "C:/tmp/file.pkl"
148+
149+
113150
@pytest.mark.skipif(sys.platform != "win32", reason="Only works on Windows.")
114151
@pytest.mark.parametrize(
115152
("path", "existing_paths", "expected"),

0 commit comments

Comments
 (0)