Skip to content

Commit 0ea5050

Browse files
committed
Explain why tasks will be executed.
1 parent b8ff125 commit 0ea5050

File tree

9 files changed

+655
-5
lines changed

9 files changed

+655
-5
lines changed

src/_pytask/build.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ def build( # noqa: C901, PLR0912, PLR0913
7676
dry_run: bool = False,
7777
editor_url_scheme: Literal["no_link", "file", "vscode", "pycharm"] # noqa: PYI051
7878
| str = "file",
79+
explain: bool = False,
7980
expression: str = "",
8081
force: bool = False,
8182
ignore: Iterable[str] = (),
@@ -125,6 +126,8 @@ def build( # noqa: C901, PLR0912, PLR0913
125126
editor_url_scheme
126127
An url scheme that allows to click on task names, node names and filenames and
127128
jump right into you preferred editor to the right line.
129+
explain
130+
Explain why tasks need to be executed by showing what changed.
128131
expression
129132
Same as ``-k`` on the command line. Select tasks via expressions on task ids.
130133
force
@@ -189,6 +192,7 @@ def build( # noqa: C901, PLR0912, PLR0913
189192
"disable_warnings": disable_warnings,
190193
"dry_run": dry_run,
191194
"editor_url_scheme": editor_url_scheme,
195+
"explain": explain,
192196
"expression": expression,
193197
"force": force,
194198
"ignore": ignore,
@@ -324,6 +328,12 @@ def build( # noqa: C901, PLR0912, PLR0913
324328
default=False,
325329
help="Execute a task even if it succeeded successfully before.",
326330
)
331+
@click.option(
332+
"--explain",
333+
is_flag=True,
334+
default=False,
335+
help="Explain why tasks need to be executed by showing what changed.",
336+
)
327337
def build_command(**raw_config: Any) -> NoReturn:
328338
"""Collect tasks, execute them and report the results."""
329339
raw_config["command"] = "build"

src/_pytask/database_utils.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
"BaseTable",
2323
"DatabaseSession",
2424
"create_database",
25+
"get_node_change_info",
2526
"update_states_in_database",
2627
]
2728

@@ -83,3 +84,40 @@ def has_node_changed(task: PTask, node: PTask | PNode, state: str | None) -> boo
8384
return True
8485

8586
return state != db_state.hash_
87+
88+
89+
def get_node_change_info(
90+
task: PTask, node: PTask | PNode, state: str | None
91+
) -> tuple[bool, str, dict[str, str]]:
92+
"""Get detailed information about why a node changed.
93+
94+
Returns
95+
-------
96+
tuple[bool, str, dict[str, str]]
97+
A tuple of (has_changed, reason, details) where:
98+
- has_changed: Whether the node has changed
99+
- reason: The reason for the change ("missing", "not_in_db", "changed",
100+
"unchanged")
101+
- details: Additional details like old and new hash values
102+
103+
"""
104+
details: dict[str, str] = {}
105+
106+
# If node does not exist, we receive None.
107+
if state is None:
108+
return True, "missing", details
109+
110+
with DatabaseSession() as session:
111+
db_state = session.get(State, (task.signature, node.signature))
112+
113+
# If the node is not in the database.
114+
if db_state is None:
115+
return True, "not_in_db", details
116+
117+
# Check if state changed
118+
if state != db_state.hash_:
119+
details["old_hash"] = db_state.hash_
120+
details["new_hash"] = state
121+
return True, "changed", details
122+
123+
return False, "unchanged", details

src/_pytask/execute.py

Lines changed: 64 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,15 @@
2020
from _pytask.dag_utils import TopologicalSorter
2121
from _pytask.dag_utils import descending_tasks
2222
from _pytask.dag_utils import node_and_neighbors
23+
from _pytask.database_utils import get_node_change_info
2324
from _pytask.database_utils import has_node_changed
2425
from _pytask.database_utils import update_states_in_database
2526
from _pytask.exceptions import ExecutionError
2627
from _pytask.exceptions import NodeLoadError
2728
from _pytask.exceptions import NodeNotFoundError
29+
from _pytask.explain import ChangeReason
30+
from _pytask.explain import TaskExplanation
31+
from _pytask.explain import create_change_reason
2832
from _pytask.mark import Mark
2933
from _pytask.mark_utils import has_mark
3034
from _pytask.node_protocols import PNode
@@ -99,6 +103,14 @@ def pytask_execute_build(session: Session) -> bool | None:
99103
@hookimpl
100104
def pytask_execute_task_protocol(session: Session, task: PTask) -> ExecutionReport:
101105
"""Follow the protocol to execute each task."""
106+
# Initialize explanation for this task if in explain mode
107+
if session.config.get("explain", False):
108+
task._explanation = TaskExplanation( # type: ignore[attr-defined]
109+
task_name=task.name,
110+
would_execute=False,
111+
reasons=[],
112+
)
113+
102114
session.hook.pytask_execute_task_log_start(session=session, task=task)
103115
try:
104116
session.hook.pytask_execute_task_setup(session=session, task=task)
@@ -119,7 +131,7 @@ def pytask_execute_task_protocol(session: Session, task: PTask) -> ExecutionRepo
119131

120132

121133
@hookimpl(trylast=True)
122-
def pytask_execute_task_setup(session: Session, task: PTask) -> None: # noqa: C901
134+
def pytask_execute_task_setup(session: Session, task: PTask) -> None: # noqa: C901, PLR0912
123135
"""Set up the execution of a task.
124136
125137
1. Check whether all dependencies of a task are available.
@@ -130,11 +142,22 @@ def pytask_execute_task_setup(session: Session, task: PTask) -> None: # noqa: C
130142
raise WouldBeExecuted
131143

132144
dag = session.dag
145+
change_reasons = []
133146

134147
# Task generators are always executed since their states are not updated, but we
135148
# skip the checks as well.
136149
needs_to_be_executed = session.config["force"] or is_task_generator(task)
137150

151+
if session.config["force"] and session.config["explain"]:
152+
change_reasons.append(
153+
ChangeReason(
154+
node_name="",
155+
node_type="task",
156+
reason="forced",
157+
details={},
158+
)
159+
)
160+
138161
if not needs_to_be_executed:
139162
predecessors = set(dag.predecessors(task.signature)) | {task.signature}
140163
for node_signature in node_and_neighbors(dag, task.signature):
@@ -159,9 +182,39 @@ def pytask_execute_task_setup(session: Session, task: PTask) -> None: # noqa: C
159182
)
160183
raise NodeNotFoundError(msg)
161184

162-
has_changed = has_node_changed(task=task, node=node, state=node_state)
163-
if has_changed:
164-
needs_to_be_executed = True
185+
# Check if node changed and collect detailed info if in explain mode
186+
if session.config["explain"]:
187+
has_changed, reason, details = get_node_change_info(
188+
task=task, node=node, state=node_state
189+
)
190+
if has_changed:
191+
needs_to_be_executed = True
192+
# Determine node type
193+
if node_signature == task.signature:
194+
node_type = "source"
195+
elif node_signature in predecessors:
196+
node_type = "dependency"
197+
else:
198+
node_type = "product"
199+
200+
change_reasons.append(
201+
create_change_reason(
202+
node=node,
203+
node_type=node_type,
204+
reason=reason,
205+
old_hash=details.get("old_hash"),
206+
new_hash=details.get("new_hash"),
207+
)
208+
)
209+
else:
210+
has_changed = has_node_changed(task=task, node=node, state=node_state)
211+
if has_changed:
212+
needs_to_be_executed = True
213+
214+
# Update explanation on task if in explain mode
215+
if session.config["explain"] and hasattr(task, "_explanation"):
216+
task._explanation.would_execute = needs_to_be_executed # type: ignore[attr-defined]
217+
task._explanation.reasons = change_reasons # type: ignore[attr-defined]
165218

166219
if not needs_to_be_executed:
167220
collect_provisional_products(session, task)
@@ -188,7 +241,7 @@ def _safe_load(node: PNode | PProvisionalNode, task: PTask, *, is_product: bool)
188241
@hookimpl(trylast=True)
189242
def pytask_execute_task(session: Session, task: PTask) -> bool:
190243
"""Execute task."""
191-
if session.config["dry_run"]:
244+
if session.config["dry_run"] or session.config["explain"]:
192245
raise WouldBeExecuted
193246

194247
parameters = inspect.signature(task.function).parameters
@@ -255,6 +308,8 @@ def pytask_execute_task_process_report(
255308
256309
"""
257310
task = report.task
311+
explain_mode = session.config.get("explain", False)
312+
258313
if report.outcome == TaskOutcome.SUCCESS:
259314
update_states_in_database(session, task.signature)
260315
elif report.exc_info and isinstance(report.exc_info[1], WouldBeExecuted):
@@ -287,6 +342,10 @@ def pytask_execute_task_process_report(
287342
if report.exc_info and isinstance(report.exc_info[1], Exit): # pragma: no cover
288343
session.should_stop = True
289344

345+
# Update explanation with outcome if in explain mode
346+
if explain_mode and hasattr(task, "_explanation"):
347+
task._explanation.outcome = report.outcome # type: ignore[attr-defined]
348+
290349
return True
291350

292351

src/_pytask/explain.py

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
"""Contains logic for explaining why tasks need to be re-executed."""
2+
3+
from __future__ import annotations
4+
5+
from typing import TYPE_CHECKING
6+
from typing import Any
7+
8+
from attrs import define
9+
from attrs import field
10+
from rich.text import Text
11+
12+
from _pytask.console import console
13+
from _pytask.outcomes import TaskOutcome
14+
from _pytask.pluginmanager import hookimpl
15+
16+
if TYPE_CHECKING:
17+
from _pytask.node_protocols import PNode
18+
from _pytask.node_protocols import PTask
19+
from _pytask.reports import ExecutionReport
20+
from _pytask.session import Session
21+
22+
23+
@define
24+
class ChangeReason:
25+
"""Represents a reason why a node changed."""
26+
27+
node_name: str
28+
node_type: str # "source", "dependency", "product", "task"
29+
reason: str # "changed", "missing", "not_in_db", "first_run"
30+
details: dict[str, Any] = field(factory=dict)
31+
32+
def format(self, verbose: int = 1) -> str: # noqa: PLR0911
33+
"""Format the change reason as a string."""
34+
if self.reason == "missing":
35+
return f" • {self.node_name}: Missing"
36+
if self.reason == "not_in_db":
37+
return (
38+
f" • {self.node_name}: Not in database (first run or database cleared)"
39+
)
40+
if self.reason == "changed":
41+
if verbose >= 2 and "old_hash" in self.details: # noqa: PLR2004
42+
return (
43+
f" • {self.node_name}: Changed\n"
44+
f" Previous hash: {self.details['old_hash'][:8]}...\n"
45+
f" Current hash: {self.details['new_hash'][:8]}..."
46+
)
47+
return f" • {self.node_name}: Changed"
48+
if self.reason == "first_run":
49+
return " • First execution"
50+
if self.reason == "forced":
51+
return " • Forced execution (--force flag)"
52+
return f" • {self.node_name}: {self.reason}"
53+
54+
55+
@define
56+
class TaskExplanation:
57+
"""Represents the explanation for why a task needs to be executed."""
58+
59+
task_name: str
60+
would_execute: bool
61+
outcome: TaskOutcome | None = None
62+
reasons: list[ChangeReason] = field(factory=list)
63+
64+
def format(self, verbose: int = 1) -> str:
65+
"""Format the task explanation as a string."""
66+
lines = []
67+
68+
if self.outcome == TaskOutcome.SKIP_UNCHANGED:
69+
lines.append(f"{self.task_name}")
70+
lines.append(" ✓ No changes detected")
71+
elif self.outcome == TaskOutcome.PERSISTENCE:
72+
lines.append(f"{self.task_name}")
73+
lines.append(" • Persisted (products exist, changes ignored)")
74+
elif self.outcome == TaskOutcome.SKIP:
75+
lines.append(f"{self.task_name}")
76+
lines.append(" • Skipped by marker")
77+
elif not self.reasons:
78+
lines.append(f"{self.task_name}")
79+
lines.append(" ✓ No changes detected")
80+
else:
81+
lines.append(f"{self.task_name}")
82+
lines.extend(reason.format(verbose) for reason in self.reasons)
83+
84+
return "\n".join(lines)
85+
86+
87+
def create_change_reason(
88+
node: PNode | PTask,
89+
node_type: str,
90+
reason: str,
91+
old_hash: str | None = None,
92+
new_hash: str | None = None,
93+
) -> ChangeReason:
94+
"""Create a ChangeReason object."""
95+
details = {}
96+
if old_hash is not None:
97+
details["old_hash"] = old_hash
98+
if new_hash is not None:
99+
details["new_hash"] = new_hash
100+
101+
return ChangeReason(
102+
node_name=node.name,
103+
node_type=node_type,
104+
reason=reason,
105+
details=details,
106+
)
107+
108+
109+
@hookimpl(tryfirst=True)
110+
def pytask_execute_log_end(session: Session, reports: list[ExecutionReport]) -> None:
111+
"""Log explanations if --explain flag is set."""
112+
if not session.config.get("explain"):
113+
return
114+
115+
console.print()
116+
console.rule("Explanation", style="bold blue")
117+
console.print()
118+
119+
# Collect all explanations
120+
explanations = [
121+
report.task._explanation
122+
for report in reports
123+
if hasattr(report.task, "_explanation")
124+
]
125+
126+
if not explanations:
127+
console.print("No tasks require execution - everything is up to date.")
128+
return
129+
130+
# Group by outcome
131+
would_execute = [e for e in explanations if e.would_execute]
132+
skipped = [
133+
e
134+
for e in explanations
135+
if not e.would_execute and e.outcome != TaskOutcome.SKIP_UNCHANGED
136+
]
137+
unchanged = [e for e in explanations if e.outcome == TaskOutcome.SKIP_UNCHANGED]
138+
139+
verbose = session.config.get("verbose", 1)
140+
141+
if would_execute:
142+
console.print(
143+
Text("Tasks that would be executed:", style="bold yellow"),
144+
style="yellow",
145+
)
146+
console.print()
147+
for exp in would_execute:
148+
console.print(exp.format(verbose))
149+
console.print()
150+
151+
if skipped:
152+
console.print(Text("Skipped tasks:", style="bold blue"), style="blue")
153+
console.print()
154+
for exp in skipped:
155+
console.print(exp.format(verbose))
156+
console.print()
157+
158+
if unchanged and verbose >= 2: # noqa: PLR2004
159+
console.print(Text("Tasks with no changes:", style="bold green"), style="green")
160+
console.print()
161+
for exp in unchanged:
162+
console.print(exp.format(verbose))
163+
console.print()

src/_pytask/outcomes.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ class TaskOutcome(Enum):
9595
source files and products have not changed.
9696
SUCCESS
9797
Outcome for task which was executed successfully.
98+
WOULD_BE_EXECUTED
99+
Outcome for tasks which would be executed.
98100
99101
"""
100102

0 commit comments

Comments
 (0)