Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# ontrack

[![Tests](https://github.com/FertigLab/ontrack/actions/workflows/tests.yml/badge.svg)](https://github.com/FertigLab/ontrack/actions/workflows/tests.yml)

A command-line tool that scans directory trees and reports file statistics (file count, total size) for locations defined in a YAML configuration file. Supports Unix group-based filtering.

## Requirements
Expand All @@ -20,7 +22,7 @@ Create a YAML config file (see [`config.yaml`](config.yaml) for a template):

```yaml
# Top-level directories to scan
directories:
paths:
- /path/to/data

# Unix group whose members' subdirectories should be reported (optional)
Expand All @@ -34,7 +36,7 @@ ignore:

| Key | Description |
|---|---|
| `directories` | List of top-level paths to scan (required) |
| `paths` | List of top-level paths to scan (required) |
| `group` | Unix group name; enables group mode (optional, overridden by `--group`) |
| `ignore` | Glob patterns matched against base names to exclude from all scans |

Expand Down
2 changes: 1 addition & 1 deletion config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Specify the top-level directories to scan.
# For each directory, first-level subdirectories owned by members of `groups`
# will be reported.
directories:
paths:
- /path/to/your/data

# Unix groups whose members' subdirectories should be reported.
Expand Down
12 changes: 6 additions & 6 deletions ontrack.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,22 +393,22 @@ def main(
output: Write YAML report to this path instead of printing to stdout.
"""
config = load_config(config_path)
directories = config.get("directories", [])
paths: list[str] = config.get("paths", [])

# Allow groups to be specified in the config file; CLI takes precedence.
if groups is None:
groups = config.get("groups")

if not directories:
print("No directories specified in configuration.", file=sys.stderr)
if not paths:
print("No paths specified in configuration.", file=sys.stderr)
sys.exit(1)

# Read ignore patterns from the config file.
ignore_patterns: list[str] = config.get("ignore", [])
if ignore_patterns:
logger.info("Ignore patterns: %s", ignore_patterns)

logger.info("Directories supplied: %s", directories)
logger.info("Paths supplied: %s", paths)

if groups is not None:
members: set[str] = set()
Expand All @@ -418,7 +418,7 @@ def main(
members.update(group_members)

subdirs: list[str] = []
for parent_dir in directories:
for parent_dir in paths:
if not pathlib.Path(parent_dir).is_dir():
print(
f"WARNING: '{parent_dir}' is not a valid directory – skipping.",
Expand All @@ -429,7 +429,7 @@ def main(

paths_to_process: list[str] = subdirs
else:
paths_to_process = directories
paths_to_process = paths

iterator = (
tqdm(paths_to_process, desc="Processing directories", unit="dir", file=sys.stderr)
Expand Down
44 changes: 22 additions & 22 deletions tests/test_ontrack.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def test_get_username_returns_string():


def test_load_config():
config_data = {"directories": ["/tmp/test1", "/tmp/test2"]}
config_data = {"paths": ["/tmp/test1", "/tmp/test2"]}
with tempfile.NamedTemporaryFile(
mode="w", suffix=".yaml", delete=False
) as tmp:
Expand All @@ -141,7 +141,7 @@ def test_load_config():

try:
loaded = load_config(tmp_path)
assert loaded["directories"] == ["/tmp/test1", "/tmp/test2"]
assert loaded["paths"] == ["/tmp/test1", "/tmp/test2"]
finally:
os.unlink(tmp_path)

Expand Down Expand Up @@ -178,7 +178,7 @@ def test_report_directory_invalid(capsys):

def test_main_no_directories(tmp_path):
config_file = tmp_path / "config.yaml"
config_file.write_text("directories: []\n")
config_file.write_text("paths: []\n")

with pytest.raises(SystemExit):
main(str(config_file))
Expand All @@ -190,7 +190,7 @@ def test_main_with_valid_directory(tmp_path, capsys):
(data_dir / "file.txt").write_text("hello world")

config_file = tmp_path / "config.yaml"
config_file.write_text(f"directories:\n - {data_dir}\n")
config_file.write_text(f"paths:\n - {data_dir}\n")

main(str(config_file))
captured = capsys.readouterr()
Expand Down Expand Up @@ -321,7 +321,7 @@ def test_main_with_group(tmp_path, capsys):
(user_subdir / "file.txt").write_text("hello world")

config_file = tmp_path / "config.yaml"
config_file.write_text(f"directories:\n - {data_dir}\n")
config_file.write_text(f"paths:\n - {data_dir}\n")

main(str(config_file), groups=[group_name])
captured = capsys.readouterr()
Expand Down Expand Up @@ -351,7 +351,7 @@ def test_main_with_multiple_groups(tmp_path, capsys):
(user_subdir / "file.txt").write_text("hello world")

config_file = tmp_path / "config.yaml"
config_file.write_text(f"directories:\n - {data_dir}\n")
config_file.write_text(f"paths:\n - {data_dir}\n")

main(str(config_file), groups=[group_name, second_group])
captured = capsys.readouterr()
Expand Down Expand Up @@ -382,7 +382,7 @@ def test_main_multiple_groups_from_config(tmp_path, capsys):

config_file = tmp_path / "config.yaml"
config_file.write_text(
f"directories:\n - {data_dir}\ngroups:\n - {group_name}\n - {second_group}\n"
f"paths:\n - {data_dir}\ngroups:\n - {group_name}\n - {second_group}\n"
)

main(str(config_file))
Expand All @@ -403,12 +403,12 @@ def test_main_logs_directories(tmp_path, caplog):
(data_dir / "file.txt").write_text("hello")

config_file = tmp_path / "config.yaml"
config_file.write_text(f"directories:\n - {data_dir}\n")
config_file.write_text(f"paths:\n - {data_dir}\n")

with caplog.at_level(logging.INFO, logger="ontrack"):
main(str(config_file))

assert "Directories supplied" in caplog.text
assert "Paths supplied" in caplog.text
assert str(data_dir) in caplog.text


Expand All @@ -424,7 +424,7 @@ def test_main_logs_group_members(tmp_path, caplog):
(data_dir / "file.txt").write_text("hello")

config_file = tmp_path / "config.yaml"
config_file.write_text(f"directories:\n - {data_dir}\n")
config_file.write_text(f"paths:\n - {data_dir}\n")

with caplog.at_level(logging.INFO, logger="ontrack"):
main(str(config_file), groups=[group_name])
Expand All @@ -441,7 +441,7 @@ def test_main_no_group_logging_skipped(tmp_path, caplog):
(data_dir / "file.txt").write_text("hello")

config_file = tmp_path / "config.yaml"
config_file.write_text(f"directories:\n - {data_dir}\n")
config_file.write_text(f"paths:\n - {data_dir}\n")

with caplog.at_level(logging.INFO, logger="ontrack"):
main(str(config_file))
Expand Down Expand Up @@ -775,7 +775,7 @@ def test_main_group_from_config(tmp_path, capsys):
(user_subdir / "file.txt").write_text("content")

config_file = tmp_path / "config.yaml"
config_file.write_text(f"directories:\n - {data_dir}\ngroups:\n - {group_name}\n")
config_file.write_text(f"paths:\n - {data_dir}\ngroups:\n - {group_name}\n")

main(str(config_file)) # no groups kwarg; should come from config
captured = capsys.readouterr()
Expand All @@ -796,7 +796,7 @@ def test_main_cli_group_overrides_config(tmp_path, capsys):

config_file = tmp_path / "config.yaml"
# Config contains a bogus group; the CLI groups should win.
config_file.write_text(f"directories:\n - {data_dir}\ngroups:\n - __bogus_group__\n")
config_file.write_text(f"paths:\n - {data_dir}\ngroups:\n - __bogus_group__\n")

main(str(config_file), groups=[group_name])
captured = capsys.readouterr()
Expand All @@ -810,7 +810,7 @@ def test_main_with_group_invalid_parent_dir(tmp_path, capsys):
group_name = grp.getgrgid(current_gid).gr_name

config_file = tmp_path / "config.yaml"
config_file.write_text("directories:\n - /nonexistent/path/xyz\n")
config_file.write_text("paths:\n - /nonexistent/path/xyz\n")

main(str(config_file), groups=[group_name])
captured = capsys.readouterr()
Expand Down Expand Up @@ -858,7 +858,7 @@ def test_main_light_mode_omits_stats(tmp_path, capsys):
(data_dir / "file.txt").write_text("hello world")

config_file = tmp_path / "config.yaml"
config_file.write_text(f"directories:\n - {data_dir}\n")
config_file.write_text(f"paths:\n - {data_dir}\n")

main(str(config_file), light=True)
captured = capsys.readouterr()
Expand All @@ -879,7 +879,7 @@ def test_main_output_writes_yaml(tmp_path):
(data_dir / "file.txt").write_text("hello world")

config_file = tmp_path / "config.yaml"
config_file.write_text(f"directories:\n - {data_dir}\n")
config_file.write_text(f"paths:\n - {data_dir}\n")

output_file = str(tmp_path / "report.yaml")
main(str(config_file), output=output_file)
Expand All @@ -904,7 +904,7 @@ def test_main_output_does_not_print(tmp_path, capsys):
(data_dir / "file.txt").write_text("content")

config_file = tmp_path / "config.yaml"
config_file.write_text(f"directories:\n - {data_dir}\n")
config_file.write_text(f"paths:\n - {data_dir}\n")

output_file = str(tmp_path / "report.yaml")
main(str(config_file), output=output_file)
Expand All @@ -919,7 +919,7 @@ def test_main_output_light_mode(tmp_path):
(data_dir / "file.txt").write_text("hello")

config_file = tmp_path / "config.yaml"
config_file.write_text(f"directories:\n - {data_dir}\n")
config_file.write_text(f"paths:\n - {data_dir}\n")

output_file = str(tmp_path / "report.yaml")
main(str(config_file), light=True, output=output_file)
Expand Down Expand Up @@ -1154,7 +1154,7 @@ def test_main_config_ignore_excludes_hidden_files(tmp_path, capsys):
(data_dir / ".hidden").write_text("secret")

config_file = tmp_path / "config.yaml"
config_file.write_text(f"directories:\n - {data_dir}\nignore:\n - '.*'\n")
config_file.write_text(f"paths:\n - {data_dir}\nignore:\n - '.*'\n")

main(str(config_file))
captured = capsys.readouterr()
Expand All @@ -1172,7 +1172,7 @@ def test_main_config_ignore_excludes_dirs(tmp_path, capsys):
(data_dir / "readme.txt").write_text("hi")

config_file = tmp_path / "config.yaml"
config_file.write_text(f"directories:\n - {data_dir}\nignore:\n - '.*'\n")
config_file.write_text(f"paths:\n - {data_dir}\nignore:\n - '.*'\n")

main(str(config_file))
captured = capsys.readouterr()
Expand All @@ -1188,7 +1188,7 @@ def test_main_config_ignore_wildcard_extension(tmp_path, capsys):
(data_dir / "unwanted.tmp").write_text("junk")

config_file = tmp_path / "config.yaml"
config_file.write_text(f"directories:\n - {data_dir}\nignore:\n - '*.tmp'\n")
config_file.write_text(f"paths:\n - {data_dir}\nignore:\n - '*.tmp'\n")

main(str(config_file))
captured = capsys.readouterr()
Expand All @@ -1204,7 +1204,7 @@ def test_main_no_ignore_key_counts_all_files(tmp_path, capsys):
(data_dir / ".hidden").write_text("world!")

config_file = tmp_path / "config.yaml"
config_file.write_text(f"directories:\n - {data_dir}\n")
config_file.write_text(f"paths:\n - {data_dir}\n")

main(str(config_file))
captured = capsys.readouterr()
Expand Down
Loading