From 92fcf8a824c94d33fee6926c5563981abc7bfec4 Mon Sep 17 00:00:00 2001 From: Martin Helm Date: Fri, 27 Feb 2026 12:11:39 +0100 Subject: [PATCH 1/3] Change parsing defaults! Remove auto parsing style option entirely. By default, all tif files inside a folder and all its subdirectories are parsed into a single Image element inside a SpatialData object. Only if the user specifically requests parsing subfolders, then the first level underneath the specified directory defines the image elements. All tifs in these directories, and subdirectores, will be parsed into separate image elements, with corresponding tables and coordinate systems. --- src/spatialdata_io/readers/macsima.py | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/src/spatialdata_io/readers/macsima.py b/src/spatialdata_io/readers/macsima.py index 2b4493f4..3fb0a1dd 100644 --- a/src/spatialdata_io/readers/macsima.py +++ b/src/spatialdata_io/readers/macsima.py @@ -47,7 +47,6 @@ class MACSimaParsingStyle(ModeEnum): PROCESSED_SINGLE_FOLDER = "processed_single_folder" PROCESSED_MULTIPLE_FOLDERS = "processed_multiple_folders" RAW = "raw" - AUTO = "auto" @dataclass @@ -223,7 +222,7 @@ def get_stack(self) -> da.Array: def macsima( path: str | Path, - parsing_style: MACSimaParsingStyle | str = MACSimaParsingStyle.AUTO, + parsing_style: MACSimaParsingStyle | str = MACSimaParsingStyle.PROCESSED_SINGLE_FOLDER, filter_folder_names: list[str] | None = None, imread_kwargs: Mapping[str, Any] = MappingProxyType({}), subset: int | None = None, @@ -294,18 +293,6 @@ def macsima( if not isinstance(parsing_style, MACSimaParsingStyle): parsing_style = MACSimaParsingStyle(parsing_style) - if parsing_style == MACSimaParsingStyle.AUTO: - assert path.is_dir(), f"Path {path} is not a directory." - - if any(p.suffix in [".tif", ".tiff"] for p in path.iterdir()): - # if path contains tifs, do parse_processed_folder on path - parsing_style = MACSimaParsingStyle.PROCESSED_SINGLE_FOLDER - elif all(p.is_dir() for p in path.iterdir() if not p.name.startswith(".")): - # if path contains only folders or hidden files, do parse_processed_folder on each folder - parsing_style = MACSimaParsingStyle.PROCESSED_MULTIPLE_FOLDERS - else: - raise ValueError(f"Cannot determine parsing style for path {path}. Please specify the parsing style.") - if parsing_style == MACSimaParsingStyle.PROCESSED_SINGLE_FOLDER: return parse_processed_folder( path=path, @@ -624,7 +611,7 @@ def parse_processed_folder( nuclei_channel_name: str = "DAPI", split_threshold_nuclei_channel: int | None = 2, skip_rounds: list[int] | None = None, - file_pattern: str = "*.tif*", + file_pattern: str = "**/*.tif*", include_cycle_in_channel_name: bool = False, ) -> SpatialData: """Parse a single folder containing images from a cyclical imaging platform.""" From 861155aba3e995d885be8ee896f199c9766515eb Mon Sep 17 00:00:00 2001 From: Martin Helm Date: Fri, 27 Feb 2026 12:27:07 +0100 Subject: [PATCH 2/3] Skip empty folders when parsing multiple folders --- src/spatialdata_io/readers/macsima.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/spatialdata_io/readers/macsima.py b/src/spatialdata_io/readers/macsima.py index 3fb0a1dd..3e234b73 100644 --- a/src/spatialdata_io/readers/macsima.py +++ b/src/spatialdata_io/readers/macsima.py @@ -318,6 +318,9 @@ def macsima( for p in path.iterdir() if p.is_dir() and (not filter_folder_names or not any(f in p.name for f in filter_folder_names)) ]: + if not len(list(p.glob("*.tif*"))): + warnings.warn(f"No tif files found in {p}, skipping it!", UserWarning, stacklevel=2) + continue sdatas[p.stem] = parse_processed_folder( path=p, imread_kwargs=imread_kwargs, From ca6cd7cae3d968d36b210ab809b4c43d178b730c Mon Sep 17 00:00:00 2001 From: Martin Helm Date: Tue, 5 May 2026 12:54:24 +0200 Subject: [PATCH 3/3] Clarify parsing_style docstring and add tests --- .gitignore | 5 ++- src/spatialdata_io/readers/macsima.py | 9 +++- tests/test_macsima.py | 64 +++++++++++++++++++++------ 3 files changed, 62 insertions(+), 16 deletions(-) diff --git a/.gitignore b/.gitignore index 39d7dd00..bb5e5537 100644 --- a/.gitignore +++ b/.gitignore @@ -50,5 +50,6 @@ data # data folder data/ tests/data -.venv -.uv.lock +uv.lock +.asv/ +.venv/ diff --git a/src/spatialdata_io/readers/macsima.py b/src/spatialdata_io/readers/macsima.py index e31bbb08..1d0650bf 100644 --- a/src/spatialdata_io/readers/macsima.py +++ b/src/spatialdata_io/readers/macsima.py @@ -254,7 +254,8 @@ def macsima( path Path to the directory containing the data. parsing_style - Parsing style to use. If ``auto``, the parsing style is determined based on the contents of the path. + Parsing style to use. If ``processed_single_folder``, all subfolders of ``path`` are combined into a stack. + If ``processed_multiple_folders``, a stack is created for each folder directly beneath ``path``. filter_folder_names List of folder names to filter out when parsing multiple folders. imread_kwargs @@ -295,6 +296,12 @@ def macsima( parsing_style = MACSimaParsingStyle(parsing_style) if parsing_style == MACSimaParsingStyle.PROCESSED_SINGLE_FOLDER: + if filter_folder_names: + warnings.warn( + "single_processed_folder was requested but filter_folder_names was specified. Note that it is ignored here, filtering only happens for processed_multi_folders!", + UserWarning, + stacklevel=2, + ) return parse_processed_folder( path=path, imread_kwargs=imread_kwargs, diff --git a/tests/test_macsima.py b/tests/test_macsima.py index 72b33fbe..44f29744 100644 --- a/tests/test_macsima.py +++ b/tests/test_macsima.py @@ -35,7 +35,6 @@ _parse_v1_ome_metadata, macsima, ) -from tests._utils import skip_if_below_python_version RNG = da.random.default_rng(seed=0) @@ -101,7 +100,6 @@ def test_exception_on_no_valid_files(tmp_path: Path) -> None: macsima(tmp_path) -@skip_if_below_python_version() @pytest.mark.parametrize( "dataset,expected", [ @@ -123,7 +121,6 @@ def test_image_size(dataset: str, expected: dict[str, Any]) -> None: assert extent == expected -@skip_if_below_python_version() @pytest.mark.parametrize( "dataset,expected", [("OMAP10_small", 4), ("OMAP23_small", 5)], @@ -139,7 +136,6 @@ def test_total_channels(dataset: str, expected: int) -> None: assert channels == expected -@skip_if_below_python_version() @pytest.mark.parametrize( "dataset,expected", [ @@ -161,7 +157,6 @@ def test_channel_names_with_cycle_in_name(dataset: str, expected: list[str]) -> assert list(channels) == expected -@skip_if_below_python_version() @pytest.mark.parametrize( "dataset,expected", [ @@ -178,7 +173,6 @@ def test_total_rounds(dataset: str, expected: list[int]) -> None: assert max_cycle == expected -@skip_if_below_python_version() @pytest.mark.parametrize( "dataset,skip_rounds,expected", [ @@ -201,6 +195,57 @@ def test_skip_rounds(dataset: str, skip_rounds: list[int], expected: list[str]) assert list(channels) == expected, f"Expected {expected}, got {list(channels)}" +def test_unsupported_parsing_styles() -> None: + with pytest.raises(ValueError): + macsima(Path(), parsing_style="not_a_parsing_style") + + +def test_processed_single_folder_parsing_returns_a_single_image_stack(tmp_path: Path) -> None: + omap10_path = Path("./data/OMAP10_small") + shutil.copytree(omap10_path, tmp_path / "OMAP10_small_1") + shutil.copytree(omap10_path, tmp_path / "OMAP10_small_2") + + sdata = macsima(tmp_path, parsing_style="processed_single_folder") + + assert len(sdata.images) == 1 + # omap10_small has 4 channels, so we expect 8 here + el = sdata[list(sdata.images.keys())[0]] + assert len(get_channel_names(el)) == 8 + assert len(sdata.tables) == 1 + + +def test_processed_single_folder_parsing_warns_when_specifying_filtered_folders(tmp_path: Path) -> None: + omap10_path = Path("./data/OMAP10_small") + shutil.copytree(omap10_path, tmp_path / "OMAP10_small_1") + shutil.copytree(omap10_path, tmp_path / "OMAP10_small_2") + with pytest.warns(UserWarning): + macsima(tmp_path, parsing_style="processed_single_folder", filter_folder_names=["OMAP10_small_2"]) + + +def test_processed_multiple_folders_returns_an_image_stack_per_subfolder(tmp_path: Path) -> None: + omap10_path = Path("./data/OMAP10_small") + shutil.copytree(omap10_path, tmp_path / "OMAP10_small_1") + shutil.copytree(omap10_path, tmp_path / "OMAP10_small_2") + + sdata = macsima(tmp_path, parsing_style="processed_multiple_folders") + + assert len(sdata.images) == 2 + for el in sdata.images.keys(): + assert len(get_channel_names(sdata[el])) == 4 + assert len(sdata.tables) == 2 + + +def test_processed_multiple_folders_skips_filtered_folder_names(tmp_path: Path) -> None: + shutil.copytree(Path("./data/OMAP10_small"), tmp_path / "OMAP10_small") + shutil.copytree(Path("./data/OMAP23_small"), tmp_path / "OMAP23_small") + + sdata = macsima(tmp_path, parsing_style="processed_multiple_folders", filter_folder_names=["OMAP10_small"]) + assert len(sdata.images) == 1 + assert list(sdata.images.keys()) == ["OMAP23_small_image"] + assert len(sdata.tables) == 1 + assert list(sdata.tables.keys()) == ["OMAP23_small_table"] + + METADATA_COLUMN_ORDER = [ "cycle", "imagetype", @@ -242,7 +287,6 @@ def test_skip_rounds(dataset: str, skip_rounds: list[int], expected: list[str]) ) -@skip_if_below_python_version() @pytest.mark.parametrize( "dataset,expected_df", [ @@ -262,11 +306,6 @@ def test_metadata_table(dataset: str, expected_df: pd.DataFrame) -> None: pd.testing.assert_frame_equal(actual, expected_df) -def test_parsing_style() -> None: - with pytest.raises(ValueError): - macsima(Path(), parsing_style="not_a_parsing_style") - - def test_mci_sort_by_channel() -> None: sizes = [100, 200, 300] c_names = ["test11", "test3", "test2"] @@ -315,7 +354,6 @@ def test_mci_array_reference() -> None: assert da.all(mci.data[0] == orig_arr1) -@skip_if_below_python_version() @pytest.mark.parametrize("dataset", ["OMAP10_small", "OMAP23_small"]) def test_cli_macsima(runner: CliRunner, dataset: str) -> None: f = Path("./data") / dataset