From 955c1adbdf7eb955965385130f004e5724991b11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ferenc=20T=C3=BCk=C3=B6r?= Date: Tue, 31 Mar 2026 11:33:36 +0200 Subject: [PATCH 1/6] Add reslice memory estimator and log result --- httomo/data/dataset_store.py | 6 ++- httomo/data/hdf/_utils/reslice.py | 74 +++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) diff --git a/httomo/data/dataset_store.py b/httomo/data/dataset_store.py index 139789373..c94e8ea50 100644 --- a/httomo/data/dataset_store.py +++ b/httomo/data/dataset_store.py @@ -25,7 +25,7 @@ import time import h5py from typing import List, Literal, Optional, Tuple, Union -from httomo.data.hdf._utils.reslice import reslice +from httomo.data.hdf._utils.reslice import reslice, reslice_memory_estimator from httomo.data.padding import extrapolate_after, extrapolate_before from httomo.runner.auxiliary_data import AuxiliaryData from httomo.runner.dataset import DataSetBlock @@ -286,6 +286,10 @@ def __init__( start = time.perf_counter() self._data = self._reslice(source.slicing_dim, slicing_dim, source_data) end = time.perf_counter() + log_once( + f"reslice_memory_estimator: {reslice_memory_estimator(source_data.shape, source_data.dtype, source.slicing_dim, slicing_dim, self._comm)}", + level=logging.ERROR, + ) if slicing_dim == 1: log_once( f"Slicing axis change (reslice) from projection to sinogram took {(end - start):.9f}s.", diff --git a/httomo/data/hdf/_utils/reslice.py b/httomo/data/hdf/_utils/reslice.py index 6da32c7a0..a05c9dedc 100644 --- a/httomo/data/hdf/_utils/reslice.py +++ b/httomo/data/hdf/_utils/reslice.py @@ -68,3 +68,77 @@ def reslice( start_idx = 0 if comm.rank == 0 else split_indices[comm.rank - 1] return new_data, next_slice_dim, start_idx + + +def reslice_memory_estimator( + data_shape: Tuple[int, int, int], + dtype: numpy.dtype, + current_slice_dim: int, + next_slice_dim: int, + comm: Comm, +) -> dict: + rank = comm.rank + nprocs = comm.size + itemsize = numpy.dtype(dtype).itemsize + input_size = numpy.prod(data_shape) * itemsize + + split_sizes = [] + length = data_shape[next_slice_dim] + split_indices = [round((length / nprocs) * r) for r in range(1, nprocs)] + + prev_idx = 0 + for i in range(nprocs): + next_idx = split_indices[i] if i < len(split_indices) else length + split_shape = list(data_shape) + split_shape[next_slice_dim] = next_idx - prev_idx + split_sizes.append(numpy.prod(split_shape) * itemsize) + prev_idx = next_idx + + total_split_size = sum(split_sizes) + + all_split_sizes = comm.allgather(split_sizes) + recv_sizes = [all_split_sizes[p][rank] for p in range(nprocs)] + + output_shape = list(data_shape) + output_shape[current_slice_dim] = sum( + recv_sizes[p] + // ( + itemsize + * numpy.prod([data_shape[d] for d in range(3) if d != next_slice_dim]) + ) + for p in range(nprocs) + ) + output_size = numpy.prod(output_shape) * itemsize + + max_send_buffer = max(split_sizes) + max_recv_buffer = max(recv_sizes) + + from httomo.data.mpiutil import _mpi_max_elements + + max_elements = _mpi_max_elements - 1 + max_transfer_elements = max( + max(split_sizes) // itemsize, max(recv_sizes) // itemsize + ) + + needs_chunking = max_transfer_elements > max_elements + + if needs_chunking: + chunk_overhead_send = max_send_buffer + chunk_overhead_recv = max_recv_buffer + else: + chunk_overhead_send = 0 + chunk_overhead_recv = 0 + + peak_before_ring = input_size + total_split_size + output_size + + peak_during_ring = ( + peak_before_ring + + max_send_buffer # Temporary send buffer + + max_recv_buffer # Temporary recv buffer + + chunk_overhead_send # Flattened send array (if chunking) + + chunk_overhead_recv # Flattened recv array (if chunking) + ) + + peak_after_ring = output_size + + return max(peak_before_ring, peak_during_ring, peak_after_ring) From 331ef554e39ec8a604f75640863b7b98e7139a3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ferenc=20T=C3=BCk=C3=B6r?= Date: Tue, 31 Mar 2026 11:53:21 +0200 Subject: [PATCH 2/6] Avoid underestimation by adding 1% --- httomo/data/hdf/_utils/reslice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/httomo/data/hdf/_utils/reslice.py b/httomo/data/hdf/_utils/reslice.py index a05c9dedc..85bbfb790 100644 --- a/httomo/data/hdf/_utils/reslice.py +++ b/httomo/data/hdf/_utils/reslice.py @@ -141,4 +141,4 @@ def reslice_memory_estimator( peak_after_ring = output_size - return max(peak_before_ring, peak_during_ring, peak_after_ring) + return max(peak_before_ring, peak_during_ring, peak_after_ring) * 1.01 From 3a850b1194696577f78b32d12116a453a974731a Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Wed, 22 Apr 2026 16:35:21 +0100 Subject: [PATCH 3/6] Remove split array sizes from peak allocation size before ring algorithm The `numpy.split()` function only creates views of the original array (see https://numpy.org/doc/2.4/reference/generated/numpy.split.html), which means that it doesn't cause any allocations to occur, so there's no need to include the size of the split arrays in the value of the peak allocation prior to the ring algorithm running. --- httomo/data/hdf/_utils/reslice.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/httomo/data/hdf/_utils/reslice.py b/httomo/data/hdf/_utils/reslice.py index 85bbfb790..2f250b830 100644 --- a/httomo/data/hdf/_utils/reslice.py +++ b/httomo/data/hdf/_utils/reslice.py @@ -94,8 +94,6 @@ def reslice_memory_estimator( split_sizes.append(numpy.prod(split_shape) * itemsize) prev_idx = next_idx - total_split_size = sum(split_sizes) - all_split_sizes = comm.allgather(split_sizes) recv_sizes = [all_split_sizes[p][rank] for p in range(nprocs)] @@ -129,7 +127,7 @@ def reslice_memory_estimator( chunk_overhead_send = 0 chunk_overhead_recv = 0 - peak_before_ring = input_size + total_split_size + output_size + peak_before_ring = input_size + output_size peak_during_ring = ( peak_before_ring From 0319a76914b14ae1ca2ccb40e13c26f54ba98445 Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Wed, 22 Apr 2026 16:39:56 +0100 Subject: [PATCH 4/6] Include input size in peak allocation size after ring algorithm The split arrays given as input to the `alltoall_ring()` function will not be deallocated during the execution of the function, because there exists references to them (and thus to the underlying data) during the entireity of `alltoall_ring()`. Due to how reference counting and function calls work in python, only when `alltoall_ring()` returns to its caller `reslice()` could there be the possibility of the array that was initially split by `reslice()` being deallocated. (And even then, the `reslice()` function gets its input `data` from its caller, so I think the original array that was split can't be deallocated in `reslice()` either, it must happen further up in the call stack). Therefore, even after the ring algorithm has completed in `alltoall_ring()`, the input is still allocated and thus its size must be accounted for in the peak allocation size after the ring algorithm. --- httomo/data/hdf/_utils/reslice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/httomo/data/hdf/_utils/reslice.py b/httomo/data/hdf/_utils/reslice.py index 2f250b830..734be1e3d 100644 --- a/httomo/data/hdf/_utils/reslice.py +++ b/httomo/data/hdf/_utils/reslice.py @@ -137,6 +137,6 @@ def reslice_memory_estimator( + chunk_overhead_recv # Flattened recv array (if chunking) ) - peak_after_ring = output_size + peak_after_ring = input_size + output_size return max(peak_before_ring, peak_during_ring, peak_after_ring) * 1.01 From 29ffb41ca743bf58f4e49bc639912c51998e11c7 Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Wed, 22 Apr 2026 17:03:29 +0100 Subject: [PATCH 5/6] Return ring algorithm allocation size and output size allocated --- httomo/data/hdf/_utils/reslice.py | 35 +++++++++++++++++++------------ 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/httomo/data/hdf/_utils/reslice.py b/httomo/data/hdf/_utils/reslice.py index 734be1e3d..b7d88c4cb 100644 --- a/httomo/data/hdf/_utils/reslice.py +++ b/httomo/data/hdf/_utils/reslice.py @@ -76,11 +76,10 @@ def reslice_memory_estimator( current_slice_dim: int, next_slice_dim: int, comm: Comm, -) -> dict: +) -> Tuple[int, int]: rank = comm.rank nprocs = comm.size itemsize = numpy.dtype(dtype).itemsize - input_size = numpy.prod(data_shape) * itemsize split_sizes = [] length = data_shape[next_slice_dim] @@ -127,16 +126,26 @@ def reslice_memory_estimator( chunk_overhead_send = 0 chunk_overhead_recv = 0 - peak_before_ring = input_size + output_size - - peak_during_ring = ( - peak_before_ring - + max_send_buffer # Temporary send buffer - + max_recv_buffer # Temporary recv buffer - + chunk_overhead_send # Flattened send array (if chunking) - + chunk_overhead_recv # Flattened recv array (if chunking) + # The final values for the peak allocation sizes before, during, and after the ring + # algorithm have been kept in for the sake of completeness. However, the values that matter + # most are the allocations that the reslice algorithm require, namely: + # - what the ring algorithm allocates + # - what the output size allocated is + # + # peak_before_ring = input_size + output_size + # + # peak_during_ring = ( + # peak_before_ring + # + max_send_buffer # Temporary send buffer + # + max_recv_buffer # Temporary recv buffer + # + chunk_overhead_send # Flattened send array (if chunking) + # + chunk_overhead_recv # Flattened recv array (if chunking) + # ) + # + # peak_after_ring = input_size + output_size + + ring_algorithm_allocations = ( + max_send_buffer + max_recv_buffer + chunk_overhead_send + chunk_overhead_recv ) - peak_after_ring = input_size + output_size - - return max(peak_before_ring, peak_during_ring, peak_after_ring) * 1.01 + return (ring_algorithm_allocations, output_size) From 7d3ac236943d27ed43a8733557f5a7d3b2238ed5 Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Wed, 22 Apr 2026 17:04:39 +0100 Subject: [PATCH 6/6] Change reslice memory estimation log level to debug --- httomo/data/dataset_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/httomo/data/dataset_store.py b/httomo/data/dataset_store.py index c94e8ea50..f8a65917c 100644 --- a/httomo/data/dataset_store.py +++ b/httomo/data/dataset_store.py @@ -288,7 +288,7 @@ def __init__( end = time.perf_counter() log_once( f"reslice_memory_estimator: {reslice_memory_estimator(source_data.shape, source_data.dtype, source.slicing_dim, slicing_dim, self._comm)}", - level=logging.ERROR, + level=logging.DEBUG, ) if slicing_dim == 1: log_once(