diff --git a/config.yaml b/config.yaml index cfe34a7..995ca7f 100644 --- a/config.yaml +++ b/config.yaml @@ -1,14 +1,15 @@ # ontrack configuration # Specify the top-level directories to scan. -# For each directory, first-level subdirectories owned by members of `group` +# For each directory, first-level subdirectories owned by members of `groups` # will be reported. directories: - /path/to/your/data -# Unix group whose members' subdirectories should be reported. -# Can also be supplied via the --group command-line argument (CLI takes +# Unix groups whose members' subdirectories should be reported. +# Can also be supplied via the --groups command-line argument (CLI takes # precedence over this setting). -group: your_group_name +groups: + - your_group_name # Shell-style glob patterns for files and directories to exclude from all # scans. Matching is done against the base name only (not the full path). diff --git a/ontrack.py b/ontrack.py index 95cb3b7..49f91ac 100644 --- a/ontrack.py +++ b/ontrack.py @@ -3,7 +3,7 @@ Two operating modes are supported: -* **Group mode** (``--group`` supplied or ``group:`` set in the config file): +* **Group mode** (``--groups`` supplied or ``groups:`` set in the config file): For each configured directory, the script finds subdirectories owned by users who belong to the specified Unix group and reports stats for each of those subdirectories. Only the immediate children are checked for @@ -205,23 +205,24 @@ def get_group_subdirectories( def get_directory_stats( path: str, - group: str | None = None, + groups: list[str] | None = None, show_progress: bool = False, ignore_patterns: list[str] | None = None, ) -> dict: """Return file count and total size (bytes) for a directory tree. - If *group* is given, only files owned by users belonging to that Unix - group are counted. If *show_progress* is ``True`` (default: ``False``), - a tqdm progress bar is displayed on stderr for each subdirectory visited - during the walk; set it to ``False`` to suppress all progress output. - Directories and files whose base names match any pattern in - *ignore_patterns* are excluded from the walk and the counts respectively. + If *groups* is given, only files owned by users belonging to any of those + Unix groups are counted. If *show_progress* is ``True`` (default: + ``False``), a tqdm progress bar is displayed on stderr for each + subdirectory visited during the walk; set it to ``False`` to suppress all + progress output. Directories and files whose base names match any pattern + in *ignore_patterns* are excluded from the walk and the counts + respectively. Args: path: Root of the directory tree to scan. - group: Optional Unix group name; when supplied only files owned by - members of this group are included in the counts. + groups: Optional list of Unix group names; when supplied only files + owned by members of these groups are included in the counts. show_progress: Display a tqdm progress bar on stderr while scanning. ignore_patterns: Shell-style glob patterns (see :func:`_is_ignored`). Matched directories are not descended into; matched files are not @@ -229,8 +230,10 @@ def get_directory_stats( """ patterns: list[str] = ignore_patterns or [] allowed_users: set[str] | None = None - if group is not None: - allowed_users = get_group_members(group) + if groups is not None: + allowed_users = set() + for group in groups: + allowed_users.update(get_group_members(group)) file_count = 0 total_size = 0 @@ -274,7 +277,7 @@ def format_size(size_bytes: int) -> str: def _build_directory_entry( path: str, - group: str | None = None, + groups: list[str] | None = None, light: bool = False, show_progress: bool = False, ignore_patterns: list[str] | None = None, @@ -287,7 +290,8 @@ def _build_directory_entry( Args: path: Directory to report on. - group: Optional Unix group name forwarded to :func:`get_directory_stats`. + groups: Optional list of Unix group names forwarded to + :func:`get_directory_stats`. light: When ``True``, skip file-count and size scanning. show_progress: Forward progress display flag to :func:`get_directory_stats`. ignore_patterns: Shell-style glob patterns forwarded to @@ -300,11 +304,11 @@ def _build_directory_entry( username = get_username(path) entry: dict = {"directory": path, "username": username} - if group is not None: - entry["group"] = group + if groups is not None: + entry["groups"] = groups if not light: stats = get_directory_stats( - path, group=group, show_progress=show_progress, ignore_patterns=ignore_patterns + path, groups=groups, show_progress=show_progress, ignore_patterns=ignore_patterns ) entry["file_count"] = stats["file_count"] entry["total_size"] = stats["total_size"] @@ -314,7 +318,7 @@ def _build_directory_entry( def report_directory( path: str, - group: str | None = None, + groups: list[str] | None = None, light: bool = False, show_progress: bool = False, ignore_patterns: list[str] | None = None, @@ -323,7 +327,7 @@ def report_directory( Args: path: Directory to report on. - group: Optional Unix group name. + groups: Optional list of Unix group names. light: When ``True``, skip file-count and size scanning. show_progress: Display progress bars while scanning. ignore_patterns: Shell-style glob patterns; matched files and @@ -331,7 +335,7 @@ def report_directory( """ entry = _build_directory_entry( path, - group=group, + groups=groups, light=light, show_progress=show_progress, ignore_patterns=ignore_patterns, @@ -341,8 +345,8 @@ def report_directory( print(f"Directory : {entry['directory']}") print(f"Username : {entry['username']}") - if "group" in entry: - print(f"Group : {entry['group']}") + if "groups" in entry: + print(f"Groups : {', '.join(entry['groups'])}") if "file_count" in entry: print(f"Files : {entry['file_count']}") print(f"Total size: {entry['total_size_human']}") @@ -373,7 +377,7 @@ def _is_ignored(name: str, patterns: list[str]) -> bool: def main( config_path: str = "config.yaml", - group: str | None = None, + groups: list[str] | None = None, light: bool = False, progress: bool = False, output: str | None = None, @@ -382,7 +386,8 @@ def main( Args: config_path: Path to the YAML configuration file. - group: Unix group name; overrides the ``group`` key in the config file. + groups: List of Unix group names; overrides the ``groups`` key in the + config file. light: When ``True``, skip file-count and size scanning. progress: Display tqdm progress bars while scanning. output: Write YAML report to this path instead of printing to stdout. @@ -390,9 +395,9 @@ def main( config = load_config(config_path) directories = config.get("directories", []) - # Allow the group to be specified in the config file; CLI takes precedence. - if group is None: - group = config.get("group") + # Allow groups to be specified in the config file; CLI takes precedence. + if groups is None: + groups = config.get("groups") if not directories: print("No directories specified in configuration.", file=sys.stderr) @@ -405,9 +410,12 @@ def main( logger.info("Directories supplied: %s", directories) - if group is not None: - members = get_group_members(group) - logger.info("Users found in group '%s': %s", group, sorted(members)) + if groups is not None: + members: set[str] = set() + for group in groups: + group_members = get_group_members(group) + logger.info("Users found in group '%s': %s", group, sorted(group_members)) + members.update(group_members) subdirs: list[str] = [] for parent_dir in directories: @@ -434,7 +442,7 @@ def main( for path in iterator: entry = _build_directory_entry( path, - group=group, + groups=groups, light=light, show_progress=progress, ignore_patterns=ignore_patterns, @@ -448,7 +456,7 @@ def main( for path in iterator: report_directory( path, - group=group, + groups=groups, light=light, show_progress=progress, ignore_patterns=ignore_patterns, @@ -466,13 +474,15 @@ def main( help="Path to the configuration YAML file (default: config.yaml)", ) parser.add_argument( - "--group", + "--groups", + nargs="+", default=None, + metavar="GROUP", help=( "For each configured directory, report subdirectories owned by users " - "belonging to this Unix group. Descent continues into directories that " - "contain only subdirectories; a directory with at least one file is used " - "as the reporting directory." + "belonging to any of these Unix groups. Accepts one or more group names. " + "Descent continues into directories that contain only subdirectories; a " + "directory with at least one file is used as the reporting directory." ), ) parser.add_argument( @@ -496,7 +506,7 @@ def main( args = parser.parse_args() main( args.config, - group=args.group, + groups=args.groups, light=args.light, progress=args.progress, output=args.output, diff --git a/tests/test_ontrack.py b/tests/test_ontrack.py index f1ae4e0..ed88086 100644 --- a/tests/test_ontrack.py +++ b/tests/test_ontrack.py @@ -237,7 +237,7 @@ def test_get_directory_stats_group_matches_current_user(): with open(path_a, "w") as f: f.write("hello") # 5 bytes - stats = get_directory_stats(tmpdir, group=group_name) + stats = get_directory_stats(tmpdir, groups=[group_name]) assert stats["file_count"] == 1 assert stats["total_size"] == 5 @@ -267,7 +267,7 @@ def test_get_directory_stats_group_excludes_files(): if other_group is None: pytest.skip("No group found that excludes the current user") - stats = get_directory_stats(tmpdir, group=other_group) + stats = get_directory_stats(tmpdir, groups=[other_group]) assert stats["file_count"] == 0 assert stats["total_size"] == 0 @@ -278,7 +278,7 @@ def test_get_directory_stats_group_excludes_files(): def test_report_directory_with_group(capsys): - """report_directory prints the Group line when a group is supplied.""" + """report_directory prints the Groups line when groups are supplied.""" current_gid = os.getgid() group_name = grp.getgrgid(current_gid).gr_name @@ -286,21 +286,21 @@ def test_report_directory_with_group(capsys): with open(os.path.join(tmpdir, "sample.txt"), "w") as f: f.write("data") - report_directory(tmpdir, group=group_name) + report_directory(tmpdir, groups=[group_name]) captured = capsys.readouterr() - assert "Group" in captured.out + assert "Groups" in captured.out assert group_name in captured.out def test_report_directory_without_group_no_group_line(capsys): - """report_directory does not print a Group line when no group is given.""" + """report_directory does not print a Groups line when no groups are given.""" with tempfile.TemporaryDirectory() as tmpdir: with open(os.path.join(tmpdir, "sample.txt"), "w") as f: f.write("data") report_directory(tmpdir) captured = capsys.readouterr() - assert "Group" not in captured.out + assert "Groups" not in captured.out # --------------------------------------------------------------------------- @@ -323,10 +323,71 @@ def test_main_with_group(tmp_path, capsys): config_file = tmp_path / "config.yaml" config_file.write_text(f"directories:\n - {data_dir}\n") - main(str(config_file), group=group_name) + main(str(config_file), groups=[group_name]) + captured = capsys.readouterr() + assert str(user_subdir) in captured.out + assert "Groups" in captured.out + assert group_name in captured.out + + +def test_main_with_multiple_groups(tmp_path, capsys): + """main accepts multiple groups and reports subdirectories owned by members of any.""" + current_uid = os.getuid() + current_gid = os.getgid() + current_user = pwd.getpwuid(current_uid).pw_name + group_name = grp.getgrgid(current_gid).gr_name + + # Find a second group the current user belongs to, if one exists. + second_group = group_name + for g in grp.getgrall(): + if g.gr_name != group_name and current_user in g.gr_mem: + second_group = g.gr_name + break + + data_dir = tmp_path / "data" + data_dir.mkdir() + user_subdir = data_dir / "user_dir" + user_subdir.mkdir() + (user_subdir / "file.txt").write_text("hello world") + + config_file = tmp_path / "config.yaml" + config_file.write_text(f"directories:\n - {data_dir}\n") + + main(str(config_file), groups=[group_name, second_group]) + captured = capsys.readouterr() + assert str(user_subdir) in captured.out + assert "Groups" in captured.out + assert group_name in captured.out + + +def test_main_multiple_groups_from_config(tmp_path, capsys): + """main reads multiple groups from the config file's groups list.""" + current_uid = os.getuid() + current_gid = os.getgid() + current_user = pwd.getpwuid(current_uid).pw_name + group_name = grp.getgrgid(current_gid).gr_name + + # Find a second group the current user belongs to, if one exists. + second_group = group_name + for g in grp.getgrall(): + if g.gr_name != group_name and current_user in g.gr_mem: + second_group = g.gr_name + break + + data_dir = tmp_path / "data" + data_dir.mkdir() + user_subdir = data_dir / "user_dir" + user_subdir.mkdir() + (user_subdir / "file.txt").write_text("content") + + config_file = tmp_path / "config.yaml" + config_file.write_text( + f"directories:\n - {data_dir}\ngroups:\n - {group_name}\n - {second_group}\n" + ) + + main(str(config_file)) captured = capsys.readouterr() assert str(user_subdir) in captured.out - assert "Group" in captured.out assert group_name in captured.out @@ -366,7 +427,7 @@ def test_main_logs_group_members(tmp_path, caplog): config_file.write_text(f"directories:\n - {data_dir}\n") with caplog.at_level(logging.INFO, logger="ontrack"): - main(str(config_file), group=group_name) + main(str(config_file), groups=[group_name]) assert "Users found in group" in caplog.text assert group_name in caplog.text @@ -703,7 +764,7 @@ def test_find_reporting_directories_files_and_subdir(tmp_path): def test_main_group_from_config(tmp_path, capsys): - """main reads the group from the config file when --group is not supplied.""" + """main reads the groups from the config file when --groups is not supplied.""" current_gid = os.getgid() group_name = grp.getgrgid(current_gid).gr_name @@ -714,16 +775,16 @@ def test_main_group_from_config(tmp_path, capsys): (user_subdir / "file.txt").write_text("content") config_file = tmp_path / "config.yaml" - config_file.write_text(f"directories:\n - {data_dir}\ngroup: {group_name}\n") + config_file.write_text(f"directories:\n - {data_dir}\ngroups:\n - {group_name}\n") - main(str(config_file)) # no group kwarg; should come from config + main(str(config_file)) # no groups kwarg; should come from config captured = capsys.readouterr() assert str(user_subdir) in captured.out assert group_name in captured.out def test_main_cli_group_overrides_config(tmp_path, capsys): - """CLI --group takes precedence over the group key in the config file.""" + """CLI --groups takes precedence over the groups key in the config file.""" current_gid = os.getgid() group_name = grp.getgrgid(current_gid).gr_name @@ -734,10 +795,10 @@ def test_main_cli_group_overrides_config(tmp_path, capsys): (user_subdir / "file.txt").write_text("content") config_file = tmp_path / "config.yaml" - # Config contains a bogus group; the CLI group should win. - config_file.write_text(f"directories:\n - {data_dir}\ngroup: __bogus_group__\n") + # Config contains a bogus group; the CLI groups should win. + config_file.write_text(f"directories:\n - {data_dir}\ngroups:\n - __bogus_group__\n") - main(str(config_file), group=group_name) + main(str(config_file), groups=[group_name]) captured = capsys.readouterr() assert str(user_subdir) in captured.out assert group_name in captured.out @@ -751,7 +812,7 @@ def test_main_with_group_invalid_parent_dir(tmp_path, capsys): config_file = tmp_path / "config.yaml" config_file.write_text("directories:\n - /nonexistent/path/xyz\n") - main(str(config_file), group=group_name) + main(str(config_file), groups=[group_name]) captured = capsys.readouterr() assert "WARNING" in captured.err @@ -925,13 +986,13 @@ def test_build_directory_entry_invalid(capsys): def test_build_directory_entry_with_group(tmp_path): - """_build_directory_entry includes the group key when group is supplied.""" + """_build_directory_entry includes the groups key when groups are supplied.""" current_gid = os.getgid() group_name = grp.getgrgid(current_gid).gr_name (tmp_path / "f.txt").write_text("data") - entry = _build_directory_entry(str(tmp_path), group=group_name) + entry = _build_directory_entry(str(tmp_path), groups=[group_name]) assert entry is not None - assert entry["group"] == group_name + assert entry["groups"] == [group_name] # ---------------------------------------------------------------------------