Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
af045e7
fix: replace 'TBD merged' invariant description with real text
Essoz Mar 14, 2026
148e75f
docs: fix broken link and remove under-construction banner
Essoz Mar 14, 2026
4bbb451
feat: add to_display_name() for human-readable invariant labels
Essoz Mar 14, 2026
695fba0
feat: add violation summary with step numbers and recurrence
Essoz Mar 14, 2026
cfb035e
test: add semantic unit tests for display names and violation summary
Essoz Mar 14, 2026
6d1b71c
fix: clean up display names for _TRAINCHECK_ attrs and non_zero values
Essoz Mar 18, 2026
8ebe1a6
fix: silence noisy stdout/stderr during traincheck-check
Essoz Mar 18, 2026
4645aad
feat: replace multi-bar clutter with single live-stats progress bar
Essoz Mar 18, 2026
1c3f8ad
fix: suppress all inner progress bars during checking
Essoz Mar 18, 2026
3f90199
refactor: use to_display_name() as the canonical text_description at …
Essoz Mar 18, 2026
5e78187
fix: wire ANALYSIS_SKIP_FUNC_NAMES through all relation function filters
Essoz Mar 18, 2026
d6e68c9
feat: clean up inference stdout with structured per-phase output
Essoz Mar 18, 2026
242959c
feat: show per-relation progress bar during hypothesis generation
Essoz Mar 18, 2026
3c820a3
fix: demote instrumentor internal prints to logger.debug
Essoz Mar 19, 2026
09c9ea7
fix: prefix all TrainCheck log output with [TrainCheck]
Essoz Mar 19, 2026
e8ec62e
fix: silence instrumentor noise during traincheck-collect
Essoz Mar 19, 2026
a2be746
fix: suppress deprecation warnings during attribute probing in dumper
Essoz Mar 19, 2026
6ae7c8e
fix: suppress warnings during torch module instrumentation
Essoz Mar 19, 2026
db9bf45
fix: resolve online checker crashes and add dynamic trace file detection
Essoz Mar 19, 2026
93a3e9c
fix: miscellaneous online checker bug fixes
Essoz Mar 19, 2026
e8a7520
feat: rich online HTML report with step/stage annotations and checkin…
Essoz Mar 19, 2026
b4f266d
fix: KeyError on missing varid attribute in APIContainRelation online…
Essoz Mar 19, 2026
0a1d166
fix: use attr_map to scope varid iteration in APIContainRelation onli…
Essoz Mar 20, 2026
95fb115
fix: fail loudly on attr_map/varid_map inconsistency, explicit not-ye…
Essoz Mar 20, 2026
02a3a33
fix: strip _TRAINCHECK_ prefix when displaying internal tensor-tracki…
Essoz Mar 20, 2026
9d8d04c
fix: keep _ID suffix when stripping _TRAINCHECK_ prefix from attr dis…
Essoz Mar 20, 2026
9cf8064
feat: add step/stage/trace detail to offline report, W&B, and MLflow
Essoz Mar 20, 2026
6c4caf4
feat: log per-step violation counts to W&B and MLflow
Essoz Mar 20, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions docs/technical-doc.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
# TrainCheck Documentation

🚜 This documentation is under construction. We welcome any feedback or questions through GitHub Issues or [our Discord server](https://discord.gg/DPEd7Xeg).


TrainCheck is a lightweight, invariant-based instrumentation and analysis tool for identifying silent correctness issues in PyTorch training pipelines. It infers behavioral invariants from correct reference runs (e.g., official examples or clean configurations), then checks other scripts for behavioral violations. TrainCheck is designed to be minimally intrusive—requiring no code modifications or rewrites of training logic.

## 🔧 System Overview
Expand Down
2 changes: 1 addition & 1 deletion docs/usage-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ TrainCheck helps detect and diagnose silent errors in deep learning training run

## 🚀 Quick Start

Check out the [5-minute guide](./docs/5-min.md) for a minimal working example.
Check out the [5-minute guide](5-min-tutorial.md) for a minimal working example.

## ✅ Common Use Cases

Expand Down
278 changes: 278 additions & 0 deletions tests/test_display_names.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,278 @@
"""Semantic unit tests for Relation.to_display_name().

These tests verify that key *meaning* tokens appear in the output for each
relation type given a known params list. They do NOT test inference logic —
the params are constructed directly, so the tests remain stable even if the
inference algorithm changes.
"""

import pytest

from traincheck.invariant.base_cls import (
_NOT_SET,
APIParam,
InputOutputParam,
VarTypeParam,
)
from traincheck.invariant.consistency_relation import ConsistencyRelation
from traincheck.invariant.consistency_transient_vars import (
ConsistentInputOutputRelation,
ConsistentOutputRelation,
ThresholdRelation,
)
from traincheck.invariant.contain_relation import APIContainRelation
from traincheck.invariant.cover_relation import FunctionCoverRelation
from traincheck.invariant.DistinctArgumentRelation import DistinctArgumentRelation
from traincheck.invariant.lead_relation import FunctionLeadRelation


class TestAPIContainRelationDisplayName:
def test_state_transition(self):
params = [
APIParam("torch.optim.optimizer.Optimizer.zero_grad"),
VarTypeParam(
"torch.nn.Parameter", "grad", pre_value="non_zero", post_value=None
),
]
name = APIContainRelation.to_display_name(params)
assert name is not None
assert "zero_grad" in name
assert "grad" in name
assert "non" in name.lower() # "non-zero"

def test_api_calls_api(self):
params = [
APIParam("torch.optim.optimizer.Optimizer.step"),
APIParam("torch.optim.adadelta.adadelta"),
]
name = APIContainRelation.to_display_name(params)
assert name is not None
assert "step" in name
assert "adadelta" in name

def test_const_value(self):
params = [
APIParam("torch.nn.modules.module.Module.forward"),
VarTypeParam("torch.nn.Parameter", "requires_grad", const_value=True),
]
name = APIContainRelation.to_display_name(params)
assert name is not None
assert "forward" in name
assert "requires_grad" in name

def test_post_value_non_zero_normalized(self):
"""non_zero post-value should render as 'non-zero', not 'non_zero'."""
params = [
APIParam("torch.optim.sgd.SGD.step"),
VarTypeParam(
"torch.nn.Parameter",
"data",
pre_value="non_zero",
post_value="non_zero",
),
]
name = APIContainRelation.to_display_name(params)
assert name is not None
assert "non_zero" not in name
assert "non-zero" in name

def test_traincheck_internal_attr_hidden(self):
"""Attributes starting with _TRAINCHECK_ are internal proxy IDs and should be filtered."""
params = [
APIParam("torch.optim.sgd.SGD.step"),
VarTypeParam(
"torch.nn.Parameter",
"_TRAINCHECK_grad_ID",
pre_value="above_zero",
post_value="above_zero",
),
]
assert APIContainRelation.to_display_name(params) is None

def test_returns_none_for_empty_params(self):
assert APIContainRelation.to_display_name([]) is None

def test_returns_none_for_single_param(self):
assert APIContainRelation.to_display_name([APIParam("torch.foo")]) is None


class TestConsistencyRelationDisplayName:
def test_basic(self):
params = [VarTypeParam("torch.nn.Parameter", "grad")]
name = ConsistencyRelation.to_display_name(params)
assert name is not None
assert "Parameter" in name
assert "grad" in name
assert any(w in name.lower() for w in ("consistent", "stay", "step"))

def test_returns_none_for_empty(self):
assert ConsistencyRelation.to_display_name([]) is None

def test_returns_none_for_non_vartype(self):
assert ConsistencyRelation.to_display_name([APIParam("torch.foo.bar")]) is None


class TestFunctionCoverRelationDisplayName:
def test_cover_direction(self):
params = [
APIParam("torch.distributed.is_initialized"),
APIParam("torch.nn.modules.module.Module.eval"),
]
name = FunctionCoverRelation.to_display_name(params)
assert name is not None
assert "is_initialized" in name
assert "eval" in name
assert any(w in name.lower() for w in ("occurs", "cover", "when"))

def test_returns_none_for_insufficient_params(self):
assert FunctionCoverRelation.to_display_name([APIParam("torch.foo")]) is None


class TestFunctionLeadRelationDisplayName:
def test_ordering(self):
params = [
APIParam("torch.Tensor.backward"),
APIParam("torch.optim.optimizer.Optimizer.step"),
]
name = FunctionLeadRelation.to_display_name(params)
assert name is not None
assert "backward" in name
assert "step" in name
assert any(w in name.lower() for w in ("precede", "before", "lead"))

def test_merged_three_params(self):
"""Merged lead invariants can have 3 APIParams; display uses first and last."""
params = [
APIParam("torch.Tensor.backward"),
APIParam("torch.optim.optimizer.Optimizer.zero_grad"),
APIParam("torch.optim.optimizer.Optimizer.step"),
]
name = FunctionLeadRelation.to_display_name(params)
assert name is not None
assert "backward" in name
assert "step" in name

def test_returns_none_for_single_param(self):
assert FunctionLeadRelation.to_display_name([APIParam("torch.foo")]) is None


class TestDistinctArgumentRelationDisplayName:
def test_basic(self):
params = [APIParam("torch.nn.init.normal_")]
name = DistinctArgumentRelation.to_display_name(params)
assert name is not None
assert "normal_" in name
assert any(w in name.lower() for w in ("distinct", "different", "argument"))

def test_returns_none_for_empty(self):
assert DistinctArgumentRelation.to_display_name([]) is None

def test_returns_none_for_non_api_param(self):
params = [VarTypeParam("torch.nn.Parameter", "grad")]
assert DistinctArgumentRelation.to_display_name(params) is None


class TestConsistentOutputRelationDisplayName:
def test_with_const_value(self):
params = [
APIParam("torch.nn.functional.relu"),
VarTypeParam("torch.Tensor", "dtype", const_value="float32"),
]
name = ConsistentOutputRelation.to_display_name(params)
assert name is not None
assert "relu" in name
assert "dtype" in name
assert "float32" in name
assert any(w in name.lower() for w in ("consistent", "return"))

def test_without_const_value(self):
params = [
APIParam("torch.nn.functional.relu"),
VarTypeParam("torch.Tensor", "ndim"),
]
name = ConsistentOutputRelation.to_display_name(params)
assert name is not None
assert "relu" in name
assert "ndim" in name

def test_returns_none_for_insufficient_params(self):
assert ConsistentOutputRelation.to_display_name([APIParam("torch.foo")]) is None


class TestConsistentInputOutputRelationDisplayName:
def test_basic(self):
in_p = InputOutputParam(
name="input",
index=0,
type="torch.Tensor",
additional_path=("itemsize",),
api_name="kaiming_uniform_",
is_input=True,
)
out_p = InputOutputParam(
name="output",
index=0,
type="torch.Tensor",
additional_path=("ndim",),
api_name="kaiming_uniform_",
is_input=False,
)
api_p = APIParam("torch.nn.init.kaiming_uniform_")
name = ConsistentInputOutputRelation.to_display_name([in_p, api_p, out_p])
assert name is not None
assert "kaiming_uniform_" in name
assert "itemsize" in name
assert "ndim" in name
assert "input" in name.lower()
assert "output" in name.lower()

def test_returns_none_for_insufficient_params(self):
api_p = APIParam("torch.foo")
assert ConsistentInputOutputRelation.to_display_name([api_p]) is None


class TestThresholdRelationDisplayName:
def _make_output_param(self, api_name: str) -> InputOutputParam:
return InputOutputParam(
name="output_tensors",
index=0,
type="torch.Tensor",
additional_path=("value",),
api_name=api_name,
is_input=False,
)

def _make_threshold_param(self, name: str, api_name: str) -> InputOutputParam:
return InputOutputParam(
name=name,
index=None,
type="float",
additional_path=None,
api_name=api_name,
is_input=True,
)

def test_min_threshold_gte(self):
"""params=[output, api, threshold] → output ≥ threshold."""
api_p = APIParam("torch.optim.optimizer.Optimizer.step")
out_p = self._make_output_param("Optimizer.step")
thresh_p = self._make_threshold_param("lr", "Optimizer.step")
name = ThresholdRelation.to_display_name([out_p, api_p, thresh_p])
assert name is not None
assert "Optimizer.step" in name
assert "lr" in name
assert "≥" in name

def test_max_threshold_lte(self):
"""params=[threshold, api, output] → output ≤ threshold."""
api_p = APIParam("torch.optim.optimizer.Optimizer.step")
out_p = self._make_output_param("Optimizer.step")
thresh_p = self._make_threshold_param("lr", "Optimizer.step")
name = ThresholdRelation.to_display_name([thresh_p, api_p, out_p])
assert name is not None
assert "Optimizer.step" in name
assert "lr" in name
assert "≤" in name

def test_returns_none_for_insufficient_params(self):
assert ThresholdRelation.to_display_name([APIParam("torch.foo")]) is None
Loading
Loading