Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,6 @@ data
# data folder
data/
tests/data
.venv
.uv.lock
uv.lock
.asv/
.venv/
29 changes: 13 additions & 16 deletions src/spatialdata_io/readers/macsima.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ class MACSimaParsingStyle(ModeEnum):
PROCESSED_SINGLE_FOLDER = "processed_single_folder"
PROCESSED_MULTIPLE_FOLDERS = "processed_multiple_folders"
RAW = "raw"
AUTO = "auto"


@dataclass
Expand Down Expand Up @@ -224,7 +223,7 @@ def get_stack(self) -> da.Array:

def macsima(
path: str | Path,
parsing_style: MACSimaParsingStyle | str = MACSimaParsingStyle.AUTO,
parsing_style: MACSimaParsingStyle | str = MACSimaParsingStyle.PROCESSED_SINGLE_FOLDER,
filter_folder_names: list[str] | None = None,
imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
subset: int | None = None,
Expand Down Expand Up @@ -255,7 +254,8 @@ def macsima(
path
Path to the directory containing the data.
parsing_style
Parsing style to use. If ``auto``, the parsing style is determined based on the contents of the path.
Parsing style to use. If ``processed_single_folder``, all subfolders of ``path`` are combined into a stack.
If ``processed_multiple_folders``, a stack is created for each folder directly beneath ``path``.
filter_folder_names
List of folder names to filter out when parsing multiple folders.
imread_kwargs
Expand Down Expand Up @@ -295,19 +295,13 @@ def macsima(
if not isinstance(parsing_style, MACSimaParsingStyle):
parsing_style = MACSimaParsingStyle(parsing_style)

if parsing_style == MACSimaParsingStyle.AUTO:
assert path.is_dir(), f"Path {path} is not a directory."

if any(p.suffix in [".tif", ".tiff"] for p in path.iterdir()):
# if path contains tifs, do parse_processed_folder on path
parsing_style = MACSimaParsingStyle.PROCESSED_SINGLE_FOLDER
elif all(p.is_dir() for p in path.iterdir() if not p.name.startswith(".")):
# if path contains only folders or hidden files, do parse_processed_folder on each folder
parsing_style = MACSimaParsingStyle.PROCESSED_MULTIPLE_FOLDERS
else:
raise ValueError(f"Cannot determine parsing style for path {path}. Please specify the parsing style.")

if parsing_style == MACSimaParsingStyle.PROCESSED_SINGLE_FOLDER:
if filter_folder_names:
warnings.warn(
"single_processed_folder was requested but filter_folder_names was specified. Note that it is ignored here, filtering only happens for processed_multi_folders!",
UserWarning,
stacklevel=2,
)
return parse_processed_folder(
path=path,
imread_kwargs=imread_kwargs,
Expand All @@ -332,6 +326,9 @@ def macsima(
for p in path.iterdir()
if p.is_dir() and (not filter_folder_names or not any(f in p.name for f in filter_folder_names))
]:
if not len(list(p.glob("*.tif*"))):
warnings.warn(f"No tif files found in {p}, skipping it!", UserWarning, stacklevel=2)
continue
sdatas[p.stem] = parse_processed_folder(
path=p,
imread_kwargs=imread_kwargs,
Expand Down Expand Up @@ -625,7 +622,7 @@ def parse_processed_folder(
nuclei_channel_name: str = "DAPI",
split_threshold_nuclei_channel: int | None = 2,
skip_rounds: list[int] | None = None,
file_pattern: str = "*.tif*",
file_pattern: str = "**/*.tif*",
include_cycle_in_channel_name: bool = False,
) -> SpatialData:
"""Parse a single folder containing images from a cyclical imaging platform."""
Expand Down
64 changes: 51 additions & 13 deletions tests/test_macsima.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
_parse_v1_ome_metadata,
macsima,
)
from tests._utils import skip_if_below_python_version

RNG = da.random.default_rng(seed=0)

Expand Down Expand Up @@ -101,7 +100,6 @@ def test_exception_on_no_valid_files(tmp_path: Path) -> None:
macsima(tmp_path)


@skip_if_below_python_version()
@pytest.mark.parametrize(
"dataset,expected",
[
Expand All @@ -123,7 +121,6 @@ def test_image_size(dataset: str, expected: dict[str, Any]) -> None:
assert extent == expected


@skip_if_below_python_version()
@pytest.mark.parametrize(
"dataset,expected",
[("OMAP10_small", 4), ("OMAP23_small", 5)],
Expand All @@ -139,7 +136,6 @@ def test_total_channels(dataset: str, expected: int) -> None:
assert channels == expected


@skip_if_below_python_version()
@pytest.mark.parametrize(
"dataset,expected",
[
Expand All @@ -161,7 +157,6 @@ def test_channel_names_with_cycle_in_name(dataset: str, expected: list[str]) ->
assert list(channels) == expected


@skip_if_below_python_version()
@pytest.mark.parametrize(
"dataset,expected",
[
Expand All @@ -178,7 +173,6 @@ def test_total_rounds(dataset: str, expected: list[int]) -> None:
assert max_cycle == expected


@skip_if_below_python_version()
@pytest.mark.parametrize(
"dataset,skip_rounds,expected",
[
Expand All @@ -201,6 +195,57 @@ def test_skip_rounds(dataset: str, skip_rounds: list[int], expected: list[str])
assert list(channels) == expected, f"Expected {expected}, got {list(channels)}"


def test_unsupported_parsing_styles() -> None:
with pytest.raises(ValueError):
macsima(Path(), parsing_style="not_a_parsing_style")


def test_processed_single_folder_parsing_returns_a_single_image_stack(tmp_path: Path) -> None:
omap10_path = Path("./data/OMAP10_small")
shutil.copytree(omap10_path, tmp_path / "OMAP10_small_1")
shutil.copytree(omap10_path, tmp_path / "OMAP10_small_2")

sdata = macsima(tmp_path, parsing_style="processed_single_folder")

assert len(sdata.images) == 1
# omap10_small has 4 channels, so we expect 8 here
el = sdata[list(sdata.images.keys())[0]]
assert len(get_channel_names(el)) == 8
assert len(sdata.tables) == 1


def test_processed_single_folder_parsing_warns_when_specifying_filtered_folders(tmp_path: Path) -> None:
omap10_path = Path("./data/OMAP10_small")
shutil.copytree(omap10_path, tmp_path / "OMAP10_small_1")
shutil.copytree(omap10_path, tmp_path / "OMAP10_small_2")
with pytest.warns(UserWarning):
macsima(tmp_path, parsing_style="processed_single_folder", filter_folder_names=["OMAP10_small_2"])


def test_processed_multiple_folders_returns_an_image_stack_per_subfolder(tmp_path: Path) -> None:
omap10_path = Path("./data/OMAP10_small")
shutil.copytree(omap10_path, tmp_path / "OMAP10_small_1")
shutil.copytree(omap10_path, tmp_path / "OMAP10_small_2")

sdata = macsima(tmp_path, parsing_style="processed_multiple_folders")

assert len(sdata.images) == 2
for el in sdata.images.keys():
assert len(get_channel_names(sdata[el])) == 4
assert len(sdata.tables) == 2


def test_processed_multiple_folders_skips_filtered_folder_names(tmp_path: Path) -> None:
shutil.copytree(Path("./data/OMAP10_small"), tmp_path / "OMAP10_small")
shutil.copytree(Path("./data/OMAP23_small"), tmp_path / "OMAP23_small")

sdata = macsima(tmp_path, parsing_style="processed_multiple_folders", filter_folder_names=["OMAP10_small"])
assert len(sdata.images) == 1
assert list(sdata.images.keys()) == ["OMAP23_small_image"]
assert len(sdata.tables) == 1
assert list(sdata.tables.keys()) == ["OMAP23_small_table"]


METADATA_COLUMN_ORDER = [
"cycle",
"imagetype",
Expand Down Expand Up @@ -242,7 +287,6 @@ def test_skip_rounds(dataset: str, skip_rounds: list[int], expected: list[str])
)


@skip_if_below_python_version()
@pytest.mark.parametrize(
"dataset,expected_df",
[
Expand All @@ -262,11 +306,6 @@ def test_metadata_table(dataset: str, expected_df: pd.DataFrame) -> None:
pd.testing.assert_frame_equal(actual, expected_df)


def test_parsing_style() -> None:
with pytest.raises(ValueError):
macsima(Path(), parsing_style="not_a_parsing_style")


def test_mci_sort_by_channel() -> None:
sizes = [100, 200, 300]
c_names = ["test11", "test3", "test2"]
Expand Down Expand Up @@ -315,7 +354,6 @@ def test_mci_array_reference() -> None:
assert da.all(mci.data[0] == orig_arr1)


@skip_if_below_python_version()
@pytest.mark.parametrize("dataset", ["OMAP10_small", "OMAP23_small"])
def test_cli_macsima(runner: CliRunner, dataset: str) -> None:
f = Path("./data") / dataset
Expand Down
Loading