From 8dc6b446bb0b2e9c12ab4a703573a8b6f94920f7 Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Mon, 27 Apr 2026 11:02:20 +0100 Subject: [PATCH 1/7] Return shape of section output written to store --- httomo/runner/dataset_store_backing.py | 27 +++++++++++----------- tests/runner/test_dataset_store_backing.py | 14 +++++++---- 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/httomo/runner/dataset_store_backing.py b/httomo/runner/dataset_store_backing.py index b150fe553..561a628ce 100644 --- a/httomo/runner/dataset_store_backing.py +++ b/httomo/runner/dataset_store_backing.py @@ -26,29 +26,30 @@ def calculate_section_chunk_shape( return make_3d_shape_from_shape(shape) -def calculate_section_chunk_bytes( +def calculate_section_output_chunk_shape( chunk_shape: Tuple[int, int, int], - dtype: DTypeLike, section: Section, -) -> int: +) -> Tuple[int, int, int]: """ - Calculate the number of bytes in the section output chunk that is written to the store. Ths - accounts for data's non-slicing dims changing during processing, which changes the chunk - shape for the section and thus affects the number of bytes in the chunk. + Calculate the shape of the section output chunk that is written to the store. This + accounts for the data's non-slicing dims changing during processing, which changes the + chunk shape for the section. """ slicing_dim = _get_slicing_dim(section.pattern) - 1 non_slice_dims_list = list(chunk_shape) non_slice_dims_list.pop(slicing_dim) - non_slice_dims = (non_slice_dims_list[0], non_slice_dims_list[1]) + input_non_slice_dims = (non_slice_dims_list[0], non_slice_dims_list[1]) + output_non_slice_dims = input_non_slice_dims for method in section.methods: if method.memory_gpu is None: continue - non_slice_dims = method.calculate_output_dims(non_slice_dims) + output_non_slice_dims = method.calculate_output_dims(input_non_slice_dims) - return int( - np.prod(non_slice_dims) * chunk_shape[slicing_dim] * np.dtype(dtype).itemsize - ) + output_chunk_shape = list(output_non_slice_dims) + output_chunk_shape.insert(slicing_dim, chunk_shape[slicing_dim]) + + return make_3d_shape_from_shape(output_chunk_shape) class DataSetStoreBacking(Enum): @@ -106,11 +107,11 @@ def determine_store_backing( # Get the number of bytes in the input chunk to the section w/ potential modifications to # the non-slicing dims, to then determine the number of bytes in the output chunk written # by the current section - output_chunk_bytes = calculate_section_chunk_bytes( + output_chunk_shape = calculate_section_output_chunk_shape( chunk_shape=input_chunk_shape, - dtype=dtype, section=sections[section_idx], ) + output_chunk_bytes = int(np.prod(output_chunk_shape) * np.dtype(dtype).itemsize) send_buffer = np.zeros(1, dtype=bool) recv_buffer = np.zeros(1, dtype=bool) diff --git a/tests/runner/test_dataset_store_backing.py b/tests/runner/test_dataset_store_backing.py index 354545f09..7e70f961a 100644 --- a/tests/runner/test_dataset_store_backing.py +++ b/tests/runner/test_dataset_store_backing.py @@ -8,7 +8,7 @@ from httomo.runner.dataset_store_backing import ( DataSetStoreBacking, calculate_section_chunk_shape, - calculate_section_chunk_bytes, + calculate_section_output_chunk_shape, determine_store_backing, ) from httomo.runner.pipeline import Pipeline @@ -108,11 +108,13 @@ def test_calculate_section_chunk_bytes_output_dims_change(mocker: MockerFixture) # Check that the number of bytes in the chunk accounts for the non-slicing dims change by # the method in the section - section_output_chunk_bytes = calculate_section_chunk_bytes( + section_output_chunk_shape = calculate_section_output_chunk_shape( chunk_shape=SECTION_INPUT_CHUNK_SHAPE, - dtype=DTYPE, section=sections[0], ) + section_output_chunk_bytes = ( + np.prod(section_output_chunk_shape) * np.dtype(DTYPE).itemsize + ) assert section_output_chunk_bytes == EXPECTED_SECTION_OUTPUT_CHUNK_BYTES @@ -170,11 +172,13 @@ def test_calculate_section_chunk_bytes_output_dims_change_and_swap( # Check that the number of bytes in the chunk accounts for the non-slicing dims change by # the method in the section - section_output_chunk_bytes = calculate_section_chunk_bytes( + section_output_chunk_shape = calculate_section_output_chunk_shape( chunk_shape=SECTION_INPUT_CHUNK_SHAPE, - dtype=DTYPE, section=sections[0], ) + section_output_chunk_bytes = ( + np.prod(section_output_chunk_shape) * np.dtype(DTYPE).itemsize + ) assert section_output_chunk_bytes == EXPECTED_SECTION_OUTPUT_CHUNK_BYTES From 563d1b691590f077f7d802a91b6023115635b87d Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Mon, 27 Apr 2026 11:12:17 +0100 Subject: [PATCH 2/7] Rename function to specify input chunk shape calculation --- httomo/runner/dataset_store_backing.py | 8 ++++---- tests/runner/test_dataset_store_backing.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/httomo/runner/dataset_store_backing.py b/httomo/runner/dataset_store_backing.py index 561a628ce..62b3bb650 100644 --- a/httomo/runner/dataset_store_backing.py +++ b/httomo/runner/dataset_store_backing.py @@ -9,14 +9,14 @@ from httomo.utils import _get_slicing_dim, make_3d_shape_from_shape -def calculate_section_chunk_shape( +def calculate_section_input_chunk_shape( comm: MPI.Comm, global_shape: Tuple[int, int, int], slicing_dim: int, padding: Tuple[int, int], ) -> Tuple[int, int, int]: """ - Calculate chunk shape (w/ or w/o padding) for a section. + Calculate the shape of the section input chunk w/ or w/o padding. """ start = round((global_shape[slicing_dim] / comm.size) * comm.rank) stop = round((global_shape[slicing_dim] / comm.size) * (comm.rank + 1)) @@ -85,7 +85,7 @@ def determine_store_backing( # Get chunk shape created by reader of section `n` (the current section) that will account # for padding. This chunk shape is based on the chunk shape written by the writer of # section `n - 1` (the previous section) - padded_input_chunk_shape = calculate_section_chunk_shape( + padded_input_chunk_shape = calculate_section_input_chunk_shape( comm=comm, global_shape=global_shape, slicing_dim=_get_slicing_dim(sections[section_idx].pattern) - 1, @@ -97,7 +97,7 @@ def determine_store_backing( # Get unpadded chunk shape input to current section (for calculation of bytes in output # chunk for the current section) - input_chunk_shape = calculate_section_chunk_shape( + input_chunk_shape = calculate_section_input_chunk_shape( comm=comm, global_shape=global_shape, slicing_dim=_get_slicing_dim(sections[section_idx].pattern) - 1, diff --git a/tests/runner/test_dataset_store_backing.py b/tests/runner/test_dataset_store_backing.py index 7e70f961a..4eee5013c 100644 --- a/tests/runner/test_dataset_store_backing.py +++ b/tests/runner/test_dataset_store_backing.py @@ -7,7 +7,7 @@ from httomo.runner.dataset_store_backing import ( DataSetStoreBacking, - calculate_section_chunk_shape, + calculate_section_input_chunk_shape, calculate_section_output_chunk_shape, determine_store_backing, ) @@ -63,7 +63,7 @@ def test_calculate_section_chunk_shape( expected_chunk_shape[section_slicing_dim] = ( slicing_dim_len + section_padding[0] + section_padding[1] ) - section_chunk_shape = calculate_section_chunk_shape( + section_chunk_shape = calculate_section_input_chunk_shape( comm=mock_global_comm, global_shape=GLOBAL_SHAPE, slicing_dim=section_slicing_dim, From 3c7b5497d3d79d297b7d71faaee0fdd5079092b9 Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Mon, 27 Apr 2026 11:28:55 +0100 Subject: [PATCH 3/7] Add reslice memory estimation to store backing calculator --- httomo/runner/dataset_store_backing.py | 50 ++++++++++++++------------ 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/httomo/runner/dataset_store_backing.py b/httomo/runner/dataset_store_backing.py index 62b3bb650..a479f2869 100644 --- a/httomo/runner/dataset_store_backing.py +++ b/httomo/runner/dataset_store_backing.py @@ -5,6 +5,7 @@ from numpy.typing import DTypeLike from mpi4py import MPI +from httomo.data.hdf._utils.reslice import reslice_memory_estimator from httomo.runner.section import Section, determine_section_padding from httomo.utils import _get_slicing_dim, make_3d_shape_from_shape @@ -57,23 +58,6 @@ class DataSetStoreBacking(Enum): File = 2 -def _non_last_section_in_pipeline( - memory_limit_bytes: int, - write_chunk_bytes: int, - read_chunk_bytes: int, -) -> DataSetStoreBacking: - """ - Calculate backing of dataset store for non-last sections in pipeline - """ - if ( - memory_limit_bytes > 0 - and write_chunk_bytes + read_chunk_bytes >= memory_limit_bytes - ): - return DataSetStoreBacking.File - - return DataSetStoreBacking.RAM - - def determine_store_backing( comm: MPI.Comm, sections: List[Section], @@ -113,13 +97,35 @@ def determine_store_backing( ) output_chunk_bytes = int(np.prod(output_chunk_shape) * np.dtype(dtype).itemsize) + # If a reslice operation would occur in moving from the current section to the next + # section, then calculate the number of bytes the reslice operation would take, given the + # input to it (which would be the output chunk of the current section) + reslice_bytes = 0 + if ( + section_idx < len(sections) - 1 + and sections[section_idx].pattern != sections[section_idx + 1].pattern + ): + ring_algorithm_bytes, reslice_output_bytes = reslice_memory_estimator( + output_chunk_shape, + dtype, + _get_slicing_dim(sections[section_idx].pattern), + _get_slicing_dim(sections[section_idx + 1].pattern), + comm, + ) + reslice_bytes += ring_algorithm_bytes + reslice_output_bytes + send_buffer = np.zeros(1, dtype=bool) recv_buffer = np.zeros(1, dtype=bool) - store_backing = _non_last_section_in_pipeline( - memory_limit_bytes=memory_limit_bytes, - read_chunk_bytes=padded_input_chunk_bytes, - write_chunk_bytes=output_chunk_bytes, - ) + + store_backing: DataSetStoreBacking + if ( + memory_limit_bytes > 0 + and padded_input_chunk_bytes + output_chunk_bytes + reslice_bytes + >= memory_limit_bytes + ): + store_backing = DataSetStoreBacking.File + else: + store_backing = DataSetStoreBacking.RAM if store_backing is DataSetStoreBacking.File: send_buffer[0] = True From 19008dfcd81c508c1edcf24c9b7880ce11d20d34 Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Mon, 27 Apr 2026 11:31:08 +0100 Subject: [PATCH 4/7] Remove unnecessary intermediate variable --- httomo/runner/dataset_store_backing.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/httomo/runner/dataset_store_backing.py b/httomo/runner/dataset_store_backing.py index a479f2869..65a5fe345 100644 --- a/httomo/runner/dataset_store_backing.py +++ b/httomo/runner/dataset_store_backing.py @@ -117,17 +117,11 @@ def determine_store_backing( send_buffer = np.zeros(1, dtype=bool) recv_buffer = np.zeros(1, dtype=bool) - store_backing: DataSetStoreBacking if ( memory_limit_bytes > 0 and padded_input_chunk_bytes + output_chunk_bytes + reslice_bytes >= memory_limit_bytes ): - store_backing = DataSetStoreBacking.File - else: - store_backing = DataSetStoreBacking.RAM - - if store_backing is DataSetStoreBacking.File: send_buffer[0] = True # do a logical OR of all the enum variants across the processes From b1198ac047ff22f7166c6680fe6432633dd212cf Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Mon, 27 Apr 2026 12:05:13 +0100 Subject: [PATCH 5/7] Test store backing calculation with section break not via reslice Namely, a section break due to a method requiring the side output of a previous method. --- tests/runner/test_dataset_store_backing.py | 37 +++++++++++++++++----- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/tests/runner/test_dataset_store_backing.py b/tests/runner/test_dataset_store_backing.py index 4eee5013c..db0b87180 100644 --- a/tests/runner/test_dataset_store_backing.py +++ b/tests/runner/test_dataset_store_backing.py @@ -11,6 +11,7 @@ calculate_section_output_chunk_shape, determine_store_backing, ) +from httomo.runner.output_ref import OutputRef from httomo.runner.pipeline import Pipeline from httomo.runner.section import sectionize from httomo.utils import make_3d_shape_from_shape @@ -190,7 +191,7 @@ def test_calculate_section_chunk_bytes_output_dims_change_and_swap( ], ids=["6MB-limit-file-backing", "7MB-limit-ram-backing"], ) -def test_determine_store_backing_non_last_section_pipeline_single_proc( +def test_determine_store_backing_non_last_section_pipeline_no_reslice_single_proc( mocker: MockerFixture, memory_limit: int, expected_store_backing: DataSetStoreBacking, @@ -208,7 +209,12 @@ def test_determine_store_backing_non_last_section_pipeline_single_proc( # Define dummy loader and method wrapper objects loader = make_test_loader(mocker=mocker) m1 = make_test_method(mocker=mocker, method_name="m1", pattern=Pattern.projection) - m2 = make_test_method(mocker=mocker, method_name="m2", pattern=Pattern.sinogram) + m2 = make_test_method( + mocker=mocker, + method_name="m2", + pattern=Pattern.projection, + some_param=(OutputRef(m1, "some-param")), + ) # Get list of section objects that represent pipeline pipeline = Pipeline( @@ -247,7 +253,7 @@ def test_determine_store_backing_non_last_section_pipeline_single_proc( ], ids=["3MB-limit-file-backing", "4MB-limit-ram-backing"], ) -def test_determine_store_backing_non_last_section_pipeline_two_procs( +def test_determine_store_backing_non_last_section_pipeline_no_reslice_two_procs( mocker: MockerFixture, memory_limit: int, expected_store_backing: DataSetStoreBacking, @@ -265,7 +271,12 @@ def test_determine_store_backing_non_last_section_pipeline_two_procs( # Define dummy loader and method wrapper objects loader = make_test_loader(mocker=mocker) m1 = make_test_method(mocker=mocker, method_name="m1", pattern=Pattern.projection) - m2 = make_test_method(mocker=mocker, method_name="m2", pattern=Pattern.sinogram) + m2 = make_test_method( + mocker=mocker, + method_name="m2", + pattern=Pattern.projection, + some_param=(OutputRef(m1, "some-param")), + ) # Get list of section objects that represent pipeline pipeline = Pipeline( @@ -300,7 +311,7 @@ def test_determine_store_backing_non_last_section_pipeline_two_procs( ], ids=["41MB-limit-file-backing", "42MB-limit-ram-backing"], ) -def test_determine_store_backing_non_last_section_pipeline_large_padding_single_proc( +def test_determine_store_backing_non_last_section_pipeline_large_padding_no_reslice_single_proc( mocker: MockerFixture, memory_limit: int, expected_store_backing: DataSetStoreBacking, @@ -322,7 +333,12 @@ def test_determine_store_backing_non_last_section_pipeline_large_padding_single_ m1 = make_test_method( mocker=mocker, method_name="m1", pattern=Pattern.projection, padding=True ) - m2 = make_test_method(mocker=mocker, method_name="m2", pattern=Pattern.sinogram) + m2 = make_test_method( + mocker=mocker, + method_name="m2", + pattern=Pattern.projection, + some_param=(OutputRef(m1, "some-param")), + ) mocker.patch.object( target=m1, attribute="calculate_padding", @@ -367,7 +383,7 @@ def test_determine_store_backing_non_last_section_pipeline_large_padding_single_ ], ids=["37MB-limit-file-backing", "38MB-limit-ram-backing"], ) -def test_determine_store_backing_non_last_section_pipeline_large_padding_two_procs( +def test_determine_store_backing_non_last_section_pipeline_large_padding_no_reslice_two_procs( mocker: MockerFixture, memory_limit: int, expected_store_backing: DataSetStoreBacking, @@ -389,7 +405,12 @@ def test_determine_store_backing_non_last_section_pipeline_large_padding_two_pro m1 = make_test_method( mocker=mocker, method_name="m1", pattern=Pattern.projection, padding=True ) - m2 = make_test_method(mocker=mocker, method_name="m2", pattern=Pattern.sinogram) + m2 = make_test_method( + mocker=mocker, + method_name="m2", + pattern=Pattern.projection, + some_param=(OutputRef(m1, "some-param")), + ) mocker.patch.object( target=m1, attribute="calculate_padding", From 1eaa3f118575bcaa0f7a99ba9342f5aab6cc7507 Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Mon, 27 Apr 2026 12:49:40 +0100 Subject: [PATCH 6/7] Add dataset store backing calculation tests involving reslice --- httomo/runner/dataset_store_backing.py | 3 +- tests/runner/test_dataset_store_backing.py | 242 +++++++++++++++++++++ 2 files changed, 244 insertions(+), 1 deletion(-) diff --git a/httomo/runner/dataset_store_backing.py b/httomo/runner/dataset_store_backing.py index 65a5fe345..7c69701ad 100644 --- a/httomo/runner/dataset_store_backing.py +++ b/httomo/runner/dataset_store_backing.py @@ -102,7 +102,8 @@ def determine_store_backing( # input to it (which would be the output chunk of the current section) reslice_bytes = 0 if ( - section_idx < len(sections) - 1 + comm.size > 1 + and section_idx < len(sections) - 1 and sections[section_idx].pattern != sections[section_idx + 1].pattern ): ring_algorithm_bytes, reslice_output_bytes = reslice_memory_estimator( diff --git a/tests/runner/test_dataset_store_backing.py b/tests/runner/test_dataset_store_backing.py index db0b87180..8fe097a08 100644 --- a/tests/runner/test_dataset_store_backing.py +++ b/tests/runner/test_dataset_store_backing.py @@ -183,6 +183,60 @@ def test_calculate_section_chunk_bytes_output_dims_change_and_swap( assert section_output_chunk_bytes == EXPECTED_SECTION_OUTPUT_CHUNK_BYTES +@pytest.mark.parametrize( + "memory_limit, expected_store_backing", + [ + (6 * 1024**2, DataSetStoreBacking.File), + (7 * 1024**2, DataSetStoreBacking.RAM), + ], + ids=["6MB-limit-file-backing", "7MB-limit-ram-backing"], +) +def test_determine_store_backing_non_last_section_pipeline_reslice_single_proc( + mocker: MockerFixture, + memory_limit: int, + expected_store_backing: DataSetStoreBacking, +): + COMM = MPI.COMM_WORLD + + # For a single process, chunk shape = global shape + # + # The dtype and shape combined makes: + # - the write chunk ~3.4MB + # - the read chunk also ~3.4MB + # - reslice shouldn't occur due to running with a single process + DTYPE = np.float32 + GLOBAL_SHAPE = (10, 300, 300) + + # Define dummy loader and method wrapper objects + loader = make_test_loader(mocker=mocker) + m1 = make_test_method(mocker=mocker, method_name="m1", pattern=Pattern.projection) + m2 = make_test_method(mocker=mocker, method_name="m2", pattern=Pattern.sinogram) + + # Get list of section objects that represent pipeline + pipeline = Pipeline( + loader=loader, + methods=[m1, m2], + ) + sections = sectionize(pipeline) + + # For execution of non-last sections in pipelines, the writer must take into account that a + # copy of the chunk is made by the reader of the following section. Therefore, two copies + # of the chunk must be taken into account when deciding the backing of the store. + # + # Note that section 0 is only the section that is "not the last section", so it's the only + # one that will need to account for two copies of the chunk, and thus the main target of + # the test. Hence, why `section_idx=0` is given. + store_backing = determine_store_backing( + comm=COMM, + sections=sections, + memory_limit_bytes=memory_limit, + dtype=DTYPE, + global_shape=GLOBAL_SHAPE, + section_idx=0, + ) + assert store_backing is expected_store_backing + + @pytest.mark.parametrize( "memory_limit, expected_store_backing", [ @@ -241,6 +295,65 @@ def test_determine_store_backing_non_last_section_pipeline_no_reslice_single_pro assert store_backing is expected_store_backing +@pytest.mark.mpi +@pytest.mark.skipif( + MPI.COMM_WORLD.size != 2, reason="Only rank-2 MPI is supported with this test" +) +@pytest.mark.parametrize( + "memory_limit, expected_store_backing", + [ + (6 * 1024**2, DataSetStoreBacking.File), + (7 * 1024**2, DataSetStoreBacking.RAM), + ], + ids=["6MB-limit-file-backing", "7MB-limit-ram-backing"], +) +def test_determine_store_backing_non_last_section_pipeline_reslice_two_procs( + mocker: MockerFixture, + memory_limit: int, + expected_store_backing: DataSetStoreBacking, +): + COMM = MPI.COMM_WORLD + + # For two processes, chunk shape = half of global shape + # + # The dtype and shape combined makes: + # - the write chunk ~1.7MB + # - the read chunk also ~1.7MB + # - the output of reslice also ~1.7MB + # - the intermediate data created by reslice algorithm ~1.7MB + DTYPE = np.float32 + GLOBAL_SHAPE = (10, 300, 300) + + # Define dummy loader and method wrapper objects + loader = make_test_loader(mocker=mocker) + m1 = make_test_method(mocker=mocker, method_name="m1", pattern=Pattern.projection) + m2 = make_test_method(mocker=mocker, method_name="m2", pattern=Pattern.sinogram) + + # Get list of section objects that represent pipeline + pipeline = Pipeline( + loader=loader, + methods=[m1, m2], + ) + sections = sectionize(pipeline) + + # For exeuction of non-last sections in pipelines, the writer must take into account that a + # copy of the chunk is made by the reader of the following section. Therefore, two copies + # of the chunk must be taken into account when deciding the backing of the store. + # + # Note that section 0 is only the section that is "not the last section", so it's the only + # one that will need to account for two copies of the chunk, and thus the main target of + # the test. Hence, why `section_idx=0` is given. + store_backing = determine_store_backing( + comm=COMM, + sections=sections, + memory_limit_bytes=memory_limit, + dtype=DTYPE, + global_shape=GLOBAL_SHAPE, + section_idx=0, + ) + assert store_backing is expected_store_backing + + @pytest.mark.mpi @pytest.mark.skipif( MPI.COMM_WORLD.size != 2, reason="Only rank-2 MPI is supported with this test" @@ -303,6 +416,68 @@ def test_determine_store_backing_non_last_section_pipeline_no_reslice_two_procs( assert store_backing is expected_store_backing +@pytest.mark.parametrize( + "memory_limit, expected_store_backing", + [ + (41 * 1024**2, DataSetStoreBacking.File), + (42 * 1024**2, DataSetStoreBacking.RAM), + ], + ids=["41MB-limit-file-backing", "42MB-limit-ram-backing"], +) +def test_determine_store_backing_non_last_section_pipeline_large_padding_reslice_single_proc( + mocker: MockerFixture, + memory_limit: int, + expected_store_backing: DataSetStoreBacking, +): + COMM = MPI.COMM_WORLD + + # For a single process, chunk shape = global shape + # + # The dtype and shape combined makes: + # - the write chunk ~3.4MB + # - the padded input chunk ~37.7MB (110 * 300 * 300 * 4 / (1024 ** 2)) + # - reslice shouldn't occur due to running with a single process + DTYPE = np.float32 + GLOBAL_SHAPE = (10, 300, 300) + PADDING = (50, 50) + + # Define dummy loader and method wrapper objects + loader = make_test_loader(mocker=mocker) + m1 = make_test_method( + mocker=mocker, method_name="m1", pattern=Pattern.projection, padding=True + ) + m2 = make_test_method(mocker=mocker, method_name="m2", pattern=Pattern.sinogram) + mocker.patch.object( + target=m1, + attribute="calculate_padding", + return_value=PADDING, + ) + + # Get list of section objects that represent pipeline + pipeline = Pipeline( + loader=loader, + methods=[m1, m2], + ) + sections = sectionize(pipeline) + + # For execution of non-last sections in pipelines, the writer must take into account that a + # copy of the chunk is made by the reader of the following section. Therefore, two copies + # of the chunk must be taken into account when deciding the backing of the store. + # + # Note that section 0 is only the section that is "not the last section", so it's the only + # one that will need to account for two copies of the chunk, and thus the main target of + # the test. Hence, why `section_idx=0` is given. + store_backing = determine_store_backing( + comm=COMM, + sections=sections, + memory_limit_bytes=memory_limit, + dtype=DTYPE, + global_shape=GLOBAL_SHAPE, + section_idx=0, + ) + assert store_backing is expected_store_backing + + @pytest.mark.parametrize( "memory_limit, expected_store_backing", [ @@ -371,6 +546,73 @@ def test_determine_store_backing_non_last_section_pipeline_large_padding_no_resl assert store_backing is expected_store_backing +@pytest.mark.mpi +@pytest.mark.skipif( + MPI.COMM_WORLD.size != 2, reason="Only rank-2 MPI is supported with this test" +) +@pytest.mark.parametrize( + "memory_limit, expected_store_backing", + [ + (41 * 1024**2, DataSetStoreBacking.File), + (42 * 1024**2, DataSetStoreBacking.RAM), + ], + ids=["41MB-limit-file-backing", "42MB-limit-ram-backing"], +) +def test_determine_store_backing_non_last_section_pipeline_large_padding_reslice_two_procs( + mocker: MockerFixture, + memory_limit: int, + expected_store_backing: DataSetStoreBacking, +): + COMM = MPI.COMM_WORLD + + # For two processes, chunk shape = half of global shape + # + # The dtype and shape combined makes: + # - the write chunk ~1.7MB + # - the padded input chunk ~36.0MB (105 * 300 * 300 * 4 / (1024 ** 2)) + # - the output of reslice also ~1.7MB + # - the intermediate data created by reslice algorithm ~1.7MB + DTYPE = np.float32 + GLOBAL_SHAPE = (10, 300, 300) + PADDING = (50, 50) + + # Define dummy loader and method wrapper objects + loader = make_test_loader(mocker=mocker) + m1 = make_test_method( + mocker=mocker, method_name="m1", pattern=Pattern.projection, padding=True + ) + m2 = make_test_method(mocker=mocker, method_name="m2", pattern=Pattern.sinogram) + mocker.patch.object( + target=m1, + attribute="calculate_padding", + return_value=PADDING, + ) + + # Get list of section objects that represent pipeline + pipeline = Pipeline( + loader=loader, + methods=[m1, m2], + ) + sections = sectionize(pipeline) + + # For exeuction of non-last sections in pipelines, the writer must take into account that a + # copy of the chunk is made by the reader of the following section. Therefore, two copies + # of the chunk must be taken into account when deciding the backing of the store. + # + # Note that section 0 is only the section that is "not the last section", so it's the only + # one that will need to account for two copies of the chunk, and thus the main target of + # the test. Hence, why `section_idx=0` is given. + store_backing = determine_store_backing( + comm=COMM, + sections=sections, + memory_limit_bytes=memory_limit, + dtype=DTYPE, + global_shape=GLOBAL_SHAPE, + section_idx=0, + ) + assert store_backing is expected_store_backing + + @pytest.mark.mpi @pytest.mark.skipif( MPI.COMM_WORLD.size != 2, reason="Only rank-2 MPI is supported with this test" From e350974e26cfcdb9131d067ec53cbdfb875f4e8c Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Mon, 27 Apr 2026 12:55:00 +0100 Subject: [PATCH 7/7] Remove unnecessary mentions of "non-last section" in store backing calculation tests --- tests/runner/test_dataset_store_backing.py | 72 +++------------------- 1 file changed, 8 insertions(+), 64 deletions(-) diff --git a/tests/runner/test_dataset_store_backing.py b/tests/runner/test_dataset_store_backing.py index 8fe097a08..bb4123585 100644 --- a/tests/runner/test_dataset_store_backing.py +++ b/tests/runner/test_dataset_store_backing.py @@ -191,7 +191,7 @@ def test_calculate_section_chunk_bytes_output_dims_change_and_swap( ], ids=["6MB-limit-file-backing", "7MB-limit-ram-backing"], ) -def test_determine_store_backing_non_last_section_pipeline_reslice_single_proc( +def test_determine_store_backing_reslice_single_proc( mocker: MockerFixture, memory_limit: int, expected_store_backing: DataSetStoreBacking, @@ -219,13 +219,6 @@ def test_determine_store_backing_non_last_section_pipeline_reslice_single_proc( ) sections = sectionize(pipeline) - # For execution of non-last sections in pipelines, the writer must take into account that a - # copy of the chunk is made by the reader of the following section. Therefore, two copies - # of the chunk must be taken into account when deciding the backing of the store. - # - # Note that section 0 is only the section that is "not the last section", so it's the only - # one that will need to account for two copies of the chunk, and thus the main target of - # the test. Hence, why `section_idx=0` is given. store_backing = determine_store_backing( comm=COMM, sections=sections, @@ -245,7 +238,7 @@ def test_determine_store_backing_non_last_section_pipeline_reslice_single_proc( ], ids=["6MB-limit-file-backing", "7MB-limit-ram-backing"], ) -def test_determine_store_backing_non_last_section_pipeline_no_reslice_single_proc( +def test_determine_store_backing_no_reslice_single_proc( mocker: MockerFixture, memory_limit: int, expected_store_backing: DataSetStoreBacking, @@ -277,13 +270,6 @@ def test_determine_store_backing_non_last_section_pipeline_no_reslice_single_pro ) sections = sectionize(pipeline) - # For execution of non-last sections in pipelines, the writer must take into account that a - # copy of the chunk is made by the reader of the following section. Therefore, two copies - # of the chunk must be taken into account when deciding the backing of the store. - # - # Note that section 0 is only the section that is "not the last section", so it's the only - # one that will need to account for two copies of the chunk, and thus the main target of - # the test. Hence, why `section_idx=0` is given. store_backing = determine_store_backing( comm=COMM, sections=sections, @@ -307,7 +293,7 @@ def test_determine_store_backing_non_last_section_pipeline_no_reslice_single_pro ], ids=["6MB-limit-file-backing", "7MB-limit-ram-backing"], ) -def test_determine_store_backing_non_last_section_pipeline_reslice_two_procs( +def test_determine_store_backing_reslice_two_procs( mocker: MockerFixture, memory_limit: int, expected_store_backing: DataSetStoreBacking, @@ -336,13 +322,6 @@ def test_determine_store_backing_non_last_section_pipeline_reslice_two_procs( ) sections = sectionize(pipeline) - # For exeuction of non-last sections in pipelines, the writer must take into account that a - # copy of the chunk is made by the reader of the following section. Therefore, two copies - # of the chunk must be taken into account when deciding the backing of the store. - # - # Note that section 0 is only the section that is "not the last section", so it's the only - # one that will need to account for two copies of the chunk, and thus the main target of - # the test. Hence, why `section_idx=0` is given. store_backing = determine_store_backing( comm=COMM, sections=sections, @@ -366,7 +345,7 @@ def test_determine_store_backing_non_last_section_pipeline_reslice_two_procs( ], ids=["3MB-limit-file-backing", "4MB-limit-ram-backing"], ) -def test_determine_store_backing_non_last_section_pipeline_no_reslice_two_procs( +def test_determine_store_backing_no_reslice_two_procs( mocker: MockerFixture, memory_limit: int, expected_store_backing: DataSetStoreBacking, @@ -398,13 +377,6 @@ def test_determine_store_backing_non_last_section_pipeline_no_reslice_two_procs( ) sections = sectionize(pipeline) - # For exeuction of non-last sections in pipelines, the writer must take into account that a - # copy of the chunk is made by the reader of the following section. Therefore, two copies - # of the chunk must be taken into account when deciding the backing of the store. - # - # Note that section 0 is only the section that is "not the last section", so it's the only - # one that will need to account for two copies of the chunk, and thus the main target of - # the test. Hence, why `section_idx=0` is given. store_backing = determine_store_backing( comm=COMM, sections=sections, @@ -424,7 +396,7 @@ def test_determine_store_backing_non_last_section_pipeline_no_reslice_two_procs( ], ids=["41MB-limit-file-backing", "42MB-limit-ram-backing"], ) -def test_determine_store_backing_non_last_section_pipeline_large_padding_reslice_single_proc( +def test_determine_store_backing_large_padding_reslice_single_proc( mocker: MockerFixture, memory_limit: int, expected_store_backing: DataSetStoreBacking, @@ -460,13 +432,6 @@ def test_determine_store_backing_non_last_section_pipeline_large_padding_reslice ) sections = sectionize(pipeline) - # For execution of non-last sections in pipelines, the writer must take into account that a - # copy of the chunk is made by the reader of the following section. Therefore, two copies - # of the chunk must be taken into account when deciding the backing of the store. - # - # Note that section 0 is only the section that is "not the last section", so it's the only - # one that will need to account for two copies of the chunk, and thus the main target of - # the test. Hence, why `section_idx=0` is given. store_backing = determine_store_backing( comm=COMM, sections=sections, @@ -486,7 +451,7 @@ def test_determine_store_backing_non_last_section_pipeline_large_padding_reslice ], ids=["41MB-limit-file-backing", "42MB-limit-ram-backing"], ) -def test_determine_store_backing_non_last_section_pipeline_large_padding_no_reslice_single_proc( +def test_determine_store_backing_large_padding_no_reslice_single_proc( mocker: MockerFixture, memory_limit: int, expected_store_backing: DataSetStoreBacking, @@ -528,13 +493,6 @@ def test_determine_store_backing_non_last_section_pipeline_large_padding_no_resl sections = sectionize(pipeline) print(sections) - # For execution of non-last sections which have non-zero padding, the reader creates a - # padded copy of the chunk that is made. Therefore, two copies of the chunk must be taken - # into account when deciding the backing of the store. - # - # Note that section 0 is the only section of the two produced that is "not the last - # section", so it's the only one that will need to account for two copies of the chunk, and - # thus the main target of the test. Hence, why `section_idx=0` is given. store_backing = determine_store_backing( comm=COMM, sections=sections, @@ -558,7 +516,7 @@ def test_determine_store_backing_non_last_section_pipeline_large_padding_no_resl ], ids=["41MB-limit-file-backing", "42MB-limit-ram-backing"], ) -def test_determine_store_backing_non_last_section_pipeline_large_padding_reslice_two_procs( +def test_determine_store_backing_large_padding_reslice_two_procs( mocker: MockerFixture, memory_limit: int, expected_store_backing: DataSetStoreBacking, @@ -595,13 +553,6 @@ def test_determine_store_backing_non_last_section_pipeline_large_padding_reslice ) sections = sectionize(pipeline) - # For exeuction of non-last sections in pipelines, the writer must take into account that a - # copy of the chunk is made by the reader of the following section. Therefore, two copies - # of the chunk must be taken into account when deciding the backing of the store. - # - # Note that section 0 is only the section that is "not the last section", so it's the only - # one that will need to account for two copies of the chunk, and thus the main target of - # the test. Hence, why `section_idx=0` is given. store_backing = determine_store_backing( comm=COMM, sections=sections, @@ -625,7 +576,7 @@ def test_determine_store_backing_non_last_section_pipeline_large_padding_reslice ], ids=["37MB-limit-file-backing", "38MB-limit-ram-backing"], ) -def test_determine_store_backing_non_last_section_pipeline_large_padding_no_reslice_two_procs( +def test_determine_store_backing_large_padding_no_reslice_two_procs( mocker: MockerFixture, memory_limit: int, expected_store_backing: DataSetStoreBacking, @@ -666,13 +617,6 @@ def test_determine_store_backing_non_last_section_pipeline_large_padding_no_resl ) sections = sectionize(pipeline) - # For execution of non-last sections which have non-zero padding, the reader creates a - # padded copy of the chunk that is made. Therefore, two copies of the chunk must be taken - # into account when deciding the backing of the store. - # - # Note that section 0 is the only section of the two produced that is "not the last - # section", so it's the only one that will need to account for two copies of the chunk, and - # thus the main target of the test. Hence, why `section_idx=0` is given. store_backing = determine_store_backing( comm=COMM, sections=sections,