pytask-dev
diff --git a/‎src/_pytask/build.py‎
Lines changed: 10 additions & 0 deletions b/‎src/_pytask/build.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎src/_pytask/database_utils.py‎
Lines changed: 38 additions & 0 deletions b/‎src/_pytask/database_utils.py‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎src/_pytask/execute.py‎
Lines changed: 64 additions & 5 deletions b/‎src/_pytask/execute.py‎
Lines changed: 64 additions & 5 deletions
diff --git a/‎src/_pytask/explain.py‎
Lines changed: 163 additions & 0 deletions b/‎src/_pytask/explain.py‎
Lines changed: 163 additions & 0 deletions
diff --git a/‎src/_pytask/outcomes.py‎
Lines changed: 2 additions & 0 deletions b/‎src/_pytask/outcomes.py‎
Lines changed: 2 additions & 0 deletions
@@ -76,6 +76,7 @@ def build(  # noqa: C901, PLR0912, PLR0913
     dry_run: bool = False,
     editor_url_scheme: Literal["no_link", "file", "vscode", "pycharm"]  # noqa: PYI051
     | str = "file",
+    explain: bool = False,
     expression: str = "",
     force: bool = False,
     ignore: Iterable[str] = (),
@@ -125,6 +126,8 @@ def build(  # noqa: C901, PLR0912, PLR0913
     editor_url_scheme
         An url scheme that allows to click on task names, node names and filenames and
         jump right into you preferred editor to the right line.
+    explain
+        Explain why tasks need to be executed by showing what changed.
     expression
         Same as ``-k`` on the command line. Select tasks via expressions on task ids.
     force
@@ -189,6 +192,7 @@ def build(  # noqa: C901, PLR0912, PLR0913
             "disable_warnings": disable_warnings,
             "dry_run": dry_run,
             "editor_url_scheme": editor_url_scheme,
+            "explain": explain,
             "expression": expression,
             "force": force,
             "ignore": ignore,
@@ -324,6 +328,12 @@ def build(  # noqa: C901, PLR0912, PLR0913
     default=False,
     help="Execute a task even if it succeeded successfully before.",
 )
+@click.option(
+    "--explain",
+    is_flag=True,
+    default=False,
+    help="Explain why tasks need to be executed by showing what changed.",
+)
 def build_command(**raw_config: Any) -> NoReturn:
     """Collect tasks, execute them and report the results."""
     raw_config["command"] = "build"
 
@@ -22,6 +22,7 @@
     "BaseTable",
     "DatabaseSession",
     "create_database",
+    "get_node_change_info",
     "update_states_in_database",
 ]
 
@@ -83,3 +84,40 @@ def has_node_changed(task: PTask, node: PTask | PNode, state: str | None) -> boo
         return True
 
     return state != db_state.hash_
+
+
+def get_node_change_info(
+    task: PTask, node: PTask | PNode, state: str | None
+) -> tuple[bool, str, dict[str, str]]:
+    """Get detailed information about why a node changed.
+
+    Returns
+    -------
+    tuple[bool, str, dict[str, str]]
+        A tuple of (has_changed, reason, details) where:
+        - has_changed: Whether the node has changed
+        - reason: The reason for the change ("missing", "not_in_db", "changed",
+          "unchanged")
+        - details: Additional details like old and new hash values
+
+    """
+    details: dict[str, str] = {}
+
+    # If node does not exist, we receive None.
+    if state is None:
+        return True, "missing", details
+
+    with DatabaseSession() as session:
+        db_state = session.get(State, (task.signature, node.signature))
+
+    # If the node is not in the database.
+    if db_state is None:
+        return True, "not_in_db", details
+
+    # Check if state changed
+    if state != db_state.hash_:
+        details["old_hash"] = db_state.hash_
+        details["new_hash"] = state
+        return True, "changed", details
+
+    return False, "unchanged", details
@@ -20,11 +20,15 @@
 from _pytask.dag_utils import TopologicalSorter
 from _pytask.dag_utils import descending_tasks
 from _pytask.dag_utils import node_and_neighbors
+from _pytask.database_utils import get_node_change_info
 from _pytask.database_utils import has_node_changed
 from _pytask.database_utils import update_states_in_database
 from _pytask.exceptions import ExecutionError
 from _pytask.exceptions import NodeLoadError
 from _pytask.exceptions import NodeNotFoundError
+from _pytask.explain import ChangeReason
+from _pytask.explain import TaskExplanation
+from _pytask.explain import create_change_reason
 from _pytask.mark import Mark
 from _pytask.mark_utils import has_mark
 from _pytask.node_protocols import PNode
@@ -99,6 +103,14 @@ def pytask_execute_build(session: Session) -> bool | None:
 @hookimpl
 def pytask_execute_task_protocol(session: Session, task: PTask) -> ExecutionReport:
     """Follow the protocol to execute each task."""
+    # Initialize explanation for this task if in explain mode
+    if session.config.get("explain", False):
+        task._explanation = TaskExplanation(  # type: ignore[attr-defined]
+            task_name=task.name,
+            would_execute=False,
+            reasons=[],
+        )
+
     session.hook.pytask_execute_task_log_start(session=session, task=task)
     try:
         session.hook.pytask_execute_task_setup(session=session, task=task)
@@ -119,7 +131,7 @@ def pytask_execute_task_protocol(session: Session, task: PTask) -> ExecutionRepo
 
 
 @hookimpl(trylast=True)
-def pytask_execute_task_setup(session: Session, task: PTask) -> None:  # noqa: C901
+def pytask_execute_task_setup(session: Session, task: PTask) -> None:  # noqa: C901, PLR0912
     """Set up the execution of a task.
 
     1. Check whether all dependencies of a task are available.
@@ -130,11 +142,22 @@ def pytask_execute_task_setup(session: Session, task: PTask) -> None:  # noqa: C
         raise WouldBeExecuted
 
     dag = session.dag
+    change_reasons = []
 
     # Task generators are always executed since their states are not updated, but we
     # skip the checks as well.
     needs_to_be_executed = session.config["force"] or is_task_generator(task)
 
+    if session.config["force"] and session.config["explain"]:
+        change_reasons.append(
+            ChangeReason(
+                node_name="",
+                node_type="task",
+                reason="forced",
+                details={},
+            )
+        )
+
     if not needs_to_be_executed:
         predecessors = set(dag.predecessors(task.signature)) | {task.signature}
         for node_signature in node_and_neighbors(dag, task.signature):
@@ -159,9 +182,39 @@ def pytask_execute_task_setup(session: Session, task: PTask) -> None:  # noqa: C
                     )
                 raise NodeNotFoundError(msg)
 
-            has_changed = has_node_changed(task=task, node=node, state=node_state)
-            if has_changed:
-                needs_to_be_executed = True
+            # Check if node changed and collect detailed info if in explain mode
+            if session.config["explain"]:
+                has_changed, reason, details = get_node_change_info(
+                    task=task, node=node, state=node_state
+                )
+                if has_changed:
+                    needs_to_be_executed = True
+                    # Determine node type
+                    if node_signature == task.signature:
+                        node_type = "source"
+                    elif node_signature in predecessors:
+                        node_type = "dependency"
+                    else:
+                        node_type = "product"
+
+                    change_reasons.append(
+                        create_change_reason(
+                            node=node,
+                            node_type=node_type,
+                            reason=reason,
+                            old_hash=details.get("old_hash"),
+                            new_hash=details.get("new_hash"),
+                        )
+                    )
+            else:
+                has_changed = has_node_changed(task=task, node=node, state=node_state)
+                if has_changed:
+                    needs_to_be_executed = True
+
+    # Update explanation on task if in explain mode
+    if session.config["explain"] and hasattr(task, "_explanation"):
+        task._explanation.would_execute = needs_to_be_executed  # type: ignore[attr-defined]
+        task._explanation.reasons = change_reasons  # type: ignore[attr-defined]
 
     if not needs_to_be_executed:
         collect_provisional_products(session, task)
@@ -188,7 +241,7 @@ def _safe_load(node: PNode | PProvisionalNode, task: PTask, *, is_product: bool)
 @hookimpl(trylast=True)
 def pytask_execute_task(session: Session, task: PTask) -> bool:
     """Execute task."""
-    if session.config["dry_run"]:
+    if session.config["dry_run"] or session.config["explain"]:
         raise WouldBeExecuted
 
     parameters = inspect.signature(task.function).parameters
@@ -255,6 +308,8 @@ def pytask_execute_task_process_report(
 
     """
     task = report.task
+    explain_mode = session.config.get("explain", False)
+
     if report.outcome == TaskOutcome.SUCCESS:
         update_states_in_database(session, task.signature)
     elif report.exc_info and isinstance(report.exc_info[1], WouldBeExecuted):
@@ -287,6 +342,10 @@ def pytask_execute_task_process_report(
         if report.exc_info and isinstance(report.exc_info[1], Exit):  # pragma: no cover
             session.should_stop = True
 
+    # Update explanation with outcome if in explain mode
+    if explain_mode and hasattr(task, "_explanation"):
+        task._explanation.outcome = report.outcome  # type: ignore[attr-defined]
+
     return True
 
 
 
@@ -0,0 +1,163 @@
+"""Contains logic for explaining why tasks need to be re-executed."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+from typing import Any
+
+from attrs import define
+from attrs import field
+from rich.text import Text
+
+from _pytask.console import console
+from _pytask.outcomes import TaskOutcome
+from _pytask.pluginmanager import hookimpl
+
+if TYPE_CHECKING:
+    from _pytask.node_protocols import PNode
+    from _pytask.node_protocols import PTask
+    from _pytask.reports import ExecutionReport
+    from _pytask.session import Session
+
+
+@define
+class ChangeReason:
+    """Represents a reason why a node changed."""
+
+    node_name: str
+    node_type: str  # "source", "dependency", "product", "task"
+    reason: str  # "changed", "missing", "not_in_db", "first_run"
+    details: dict[str, Any] = field(factory=dict)
+
+    def format(self, verbose: int = 1) -> str:  # noqa: PLR0911
+        """Format the change reason as a string."""
+        if self.reason == "missing":
+            return f"  • {self.node_name}: Missing"
+        if self.reason == "not_in_db":
+            return (
+                f"  • {self.node_name}: Not in database (first run or database cleared)"
+            )
+        if self.reason == "changed":
+            if verbose >= 2 and "old_hash" in self.details:  # noqa: PLR2004
+                return (
+                    f"  • {self.node_name}: Changed\n"
+                    f"    Previous hash: {self.details['old_hash'][:8]}...\n"
+                    f"    Current hash:  {self.details['new_hash'][:8]}..."
+                )
+            return f"  • {self.node_name}: Changed"
+        if self.reason == "first_run":
+            return "  • First execution"
+        if self.reason == "forced":
+            return "  • Forced execution (--force flag)"
+        return f"  • {self.node_name}: {self.reason}"
+
+
+@define
+class TaskExplanation:
+    """Represents the explanation for why a task needs to be executed."""
+
+    task_name: str
+    would_execute: bool
+    outcome: TaskOutcome | None = None
+    reasons: list[ChangeReason] = field(factory=list)
+
+    def format(self, verbose: int = 1) -> str:
+        """Format the task explanation as a string."""
+        lines = []
+
+        if self.outcome == TaskOutcome.SKIP_UNCHANGED:
+            lines.append(f"{self.task_name}")
+            lines.append("  ✓ No changes detected")
+        elif self.outcome == TaskOutcome.PERSISTENCE:
+            lines.append(f"{self.task_name}")
+            lines.append("  • Persisted (products exist, changes ignored)")
+        elif self.outcome == TaskOutcome.SKIP:
+            lines.append(f"{self.task_name}")
+            lines.append("  • Skipped by marker")
+        elif not self.reasons:
+            lines.append(f"{self.task_name}")
+            lines.append("  ✓ No changes detected")
+        else:
+            lines.append(f"{self.task_name}")
+            lines.extend(reason.format(verbose) for reason in self.reasons)
+
+        return "\n".join(lines)
+
+
+def create_change_reason(
+    node: PNode | PTask,
+    node_type: str,
+    reason: str,
+    old_hash: str | None = None,
+    new_hash: str | None = None,
+) -> ChangeReason:
+    """Create a ChangeReason object."""
+    details = {}
+    if old_hash is not None:
+        details["old_hash"] = old_hash
+    if new_hash is not None:
+        details["new_hash"] = new_hash
+
+    return ChangeReason(
+        node_name=node.name,
+        node_type=node_type,
+        reason=reason,
+        details=details,
+    )
+
+
+@hookimpl(tryfirst=True)
+def pytask_execute_log_end(session: Session, reports: list[ExecutionReport]) -> None:
+    """Log explanations if --explain flag is set."""
+    if not session.config.get("explain"):
+        return
+
+    console.print()
+    console.rule("Explanation", style="bold blue")
+    console.print()
+
+    # Collect all explanations
+    explanations = [
+        report.task._explanation
+        for report in reports
+        if hasattr(report.task, "_explanation")
+    ]
+
+    if not explanations:
+        console.print("No tasks require execution - everything is up to date.")
+        return
+
+    # Group by outcome
+    would_execute = [e for e in explanations if e.would_execute]
+    skipped = [
+        e
+        for e in explanations
+        if not e.would_execute and e.outcome != TaskOutcome.SKIP_UNCHANGED
+    ]
+    unchanged = [e for e in explanations if e.outcome == TaskOutcome.SKIP_UNCHANGED]
+
+    verbose = session.config.get("verbose", 1)
+
+    if would_execute:
+        console.print(
+            Text("Tasks that would be executed:", style="bold yellow"),
+            style="yellow",
+        )
+        console.print()
+        for exp in would_execute:
+            console.print(exp.format(verbose))
+            console.print()
+
+    if skipped:
+        console.print(Text("Skipped tasks:", style="bold blue"), style="blue")
+        console.print()
+        for exp in skipped:
+            console.print(exp.format(verbose))
+            console.print()
+
+    if unchanged and verbose >= 2:  # noqa: PLR2004
+        console.print(Text("Tasks with no changes:", style="bold green"), style="green")
+        console.print()
+        for exp in unchanged:
+            console.print(exp.format(verbose))
+            console.print()
@@ -95,6 +95,8 @@ class TaskOutcome(Enum):
         source files and products have not changed.
     SUCCESS
         Outcome for task which was executed successfully.
+    WOULD_BE_EXECUTED
+        Outcome for tasks which would be executed.
 
     """