From 5b331f8aac6a8fcde84eb3469357a21e751bea26 Mon Sep 17 00:00:00 2001 From: carhc Date: Mon, 30 Dec 2024 13:20:27 +0100 Subject: [PATCH 01/25] Add functions for map computation --- invisible_cities/reco/krmap_functions.py | 384 ++++++++++++++++++++++- 1 file changed, 382 insertions(+), 2 deletions(-) diff --git a/invisible_cities/reco/krmap_functions.py b/invisible_cities/reco/krmap_functions.py index a967041fd..2841b0d65 100644 --- a/invisible_cities/reco/krmap_functions.py +++ b/invisible_cities/reco/krmap_functions.py @@ -1,9 +1,18 @@ -import numpy as np -import pandas as pd +import copy +import warnings +import itertools +import numpy as np +import pandas as pd +import scipy.stats as stats + + +from typing import Tuple, Optional from .. types.symbols import KrFitFunction from .. evm.ic_containers import FitFunction from .. core.fit_functions import polynom, expo +from ..core.core_functions import in_range, shift_to_bin_centers +from ..core.fit_functions import fit, get_chi2_and_pvalue def lin_seed(x : np.array, y : np.array): @@ -149,3 +158,374 @@ def transform_parameters(fit_output : FitFunction): return par, err, cov + +def create_df_kr_map(bins : Tuple[np.array, np.array], + counts : np.array, + n_min : int, + r_max : float)->pd.DataFrame: + ''' + This function creates the dataframe in which the map parameters are stored. + + Parameters + ---------- + + fittype : KrFitFunction + Chosen fit function for map computation + bins : Tuple[np.array, np.array] + Tuple containing bins in both axis + counts : np.array + Number of events falling into each bin + n_min : int + Min number of events per bin required to perform fits + r_max : float + Radius defining the active area of the detector + + Returns + ------- + + kr_map : pd.DataFrame + Kr map dataframe with all the info prior to the fits: bin label, + events per bin, bin in/outside the active volume, bin position + (X, Y, R), etc. + ''' + + columns = ['bin', 'counts', 'e0', 'ue0', 'lt', 'ult', 'covariance', 'res_std', 'chi2', + 'pval', 'in_active', 'has_min_counts', 'fit_success', 'valid', 'R', 'X', 'Y'] + + kr_map = pd.DataFrame(columns = columns) + + n_xbins = len(bins[0])-1 + n_ybins = len(bins[1])-1 + b_center = [shift_to_bin_centers(axis) for axis in bins] + + bin_index = range(n_xbins*n_ybins) + geom_comb = itertools.product(b_center[1], b_center[0]) + r_values = np.array([np.sqrt(x**2+y**2)for x, y in itertools.product(b_center[1], b_center[0])]) + + kr_map['bin'] = bin_index + kr_map['counts'] = counts + kr_map['R'] = r_values + kr_map[['Y', 'X']] = pd.DataFrame(geom_comb) + kr_map['in_active'] = kr_map['R'] <= r_max + kr_map['has_min_counts'] = kr_map['counts'] >= n_min + kr_map['fit_success'] = False + kr_map['valid'] = False + + return kr_map + + +def get_number_of_bins(nevents : Optional[int] = None, + thr : Optional[int] = 1e6, + n_bins : Optional[np.array] = None)->np.array: + ''' + Computes the number of XY bins to be used in the creation + of correction map regarding the number of selected events. + Parameters + --------- + nevents: int (optional) + Total number of provided events for the map computation. + thr: int (optional) + Event threshold to use 50x50 or 100x100 binning. + n_bins: int (optional) + The number of bins to use can be chosen a priori. If given, + the returned number of bins is the one provided by the user. + However, if no number of bins is given in advance, this will + automatically select a value depending on the amount of events + contained in the dst and the threshold. + Returns + --------- + n_bins: int + Number of bins in each direction (X,Y) (square map). + ''' + if n_bins is not None: return n_bins; + elif nevents < thr: return np.array([50, 50]); + else: return np.array([100, 100]); + + +def get_XY_bins(n_bins : np.array, + XYrange : Tuple[float, float]): + ''' + Returns the bins that will be used to make the map. + + Parameters + --------- + b_nins: np.array + array of len = 2 containing the number of bins in X and Y + XYrange: Tuple[float, float] + Limits (mm) of X and Y for the map computation + + Returns + --------- + bins: Tuple[np.array, np.array] + Bins in each direction (X,Y) (square map). + ''' + bins_x = np.linspace(*XYrange, n_bins[0]+1) + bins_y = np.linspace(*XYrange, n_bins[1]+1) + return bins_x, bins_y + + +def get_bin_counts_and_event_bin_id(dst : pd.DataFrame, + bins : Tuple[np.array, np.array]): + ''' + This function distributes all the events in the DST into the selected + bins. Given a certain binning, it computes which events fall into each + square of the grid formed by the bins arrays. binned_statistic_2d returns + counts (matrix here each matrix element is the number of events falling + into that specific bin) and bin_indexes (a 2-D array labeling each event + with the bin it belongs). Then counts is flattened into 1-D, bin_indexes + is transformed into 1-D using the number of bins on each axis. + + Parameters + -------------- + dst : pd.DataFrame + Krypton dataframe. + bins : Tuple[np.array, np.array] + Bins used to compute the map. + + Returns + ------------- + counts : np.array + Total number of events falling into each bin + bin_labels : np.array + 1D bin label for each event + + Further info: + ----------------- + Why set expand_binnumbers to True (2D binning) if then we transform it to 1D? + Because even though expand_binnumbers = False returns 1-D labels, it also adds + two additional bins (per axis), taking into account out-of-range events which + dont fall into the the binning passed to the binned_statistic_2d function. But + since the dst is going to be previously selected and filtered with the desired + binning, it's not convenient to use that. Maybe a visual example is more useful: + + 2x2 binning (4 bins), natural index values shown both as 1D and 2D: + + || 0 | 1 || || (0, 0) | (0, 1) || + || 2 | 3 || (1D) = || (1, 0) | (1, 1) || (2D) + + Using expand_binnumbers = False, the 1D index values instead of (0, ..., 3) + would be (0, ..., 15): + + || 0 | 1 | 2 | 3 || + || 4 | 5 | 6 | 7 || The bins that we "care about" (inner part) have indexes + || 8 | 9 |10 |11 || 5, 6, 9, 10 which I believe is not convenient at all. + ||12 |13 |14 |15 || This creates (nx+2)*(ny+2) bins. + ''' + n_xbins = len(bins[0])-1 + n_ybins = len(bins[1])-1 + + counts, _, _, bin_indexes = stats.binned_statistic_2d(x=dst.X, y=dst.Y, values=None, + bins=bins, statistic='count', + expand_binnumbers=True) + + counts = counts.flatten() + bin_indexes -= 1 + bin_labels = np.ravel_multi_index(bin_indexes, dims=(n_xbins, n_ybins), + mode='clip', order = 'F') + + return counts, bin_labels + + +def calculate_residuals(x : pd.Series, + y : pd.Series, + fit_output : FitFunction): + + ''' + Calculate residuals and their standard deviation for the fitted data. + + Parameters + ---------- + x : pd.Series + Independent variable + y : pd.Series + Dependent variable + fit_output : FitFunction + Container for IC's fit function result + + Returns + ------- + res : np.array + Residuals. + std : float + Standard deviation of residuals. + ''' + + function = fit_output.fn + + res = y - function(x) + std = res.std() + + return res, std + + +def calculate_pval(residuals : np.array): + + ''' + Calculate the p-value for the Shapiro-Wilk normality test of residuals. + + Parameters + ---------- + residuals : np.array + Residuals from the fitted model. + + Returns + ------- + pval : float + p-value for the Shapiro-Wilk normality test. + ''' + + pval = stats.shapiro(residuals)[1] if (len(residuals) > 10) else 0. + + return pval + + +def valid_bin_counter(map_df : pd.DataFrame, + validity_parameter : Optional[float] = 0.9): + ''' + Count the number of valid bins in the map DataFrame and issue a warning + if the validity threshold is not met. + + Parameters + ---------- + map_df : pd.DataFrame + DataFrame containing map data. + validity_parameter : float + Threshold for the ratio of valid bins (default set to 0.9). + + Returns + ------- + valid_per : float + Percentage of valid bins. + ''' + + inside = len(map_df[map_df['in_active'] == True]) + valid = len(map_df[map_df['valid'] == True]) + + valid_per = valid / inside * 100 + + if valid_per <= validity_parameter: + # If the percentage of valid bins is below the threshold, issue a warning + warnings.warn(f"{inside-valid} inner bins are not valid. {valid_per:.1f}% are successful.", UserWarning) + else: + # If the percentage of valid bins meets the threshold, print a message + print(f"{inside-valid} inner bins are not valid. {valid_per:.1f}% are successful.") + + return valid_per + + +def fit_and_fill_map(map_bin : pd.DataFrame, + dst : pd.DataFrame, + fittype : KrFitFunction): + ''' + This function is the core of the map computation. It's the one in charge of looping + over all the bins, performing the calculations of the lifetime fits and filling the + krypton map dataframe with all the obtained parameters. + + Basically, it's applied to the dataframe, and bin by bin, it checks if the conditions + for the map computation are met. If it's the case, it will call the corresponding fit + function specified in the fittype parameter, calculate all the different map values + and fill the dataframe with them. + + Parameters + -------------- + map_bin : pd.DataFrame + KrMap dataframe + dst : pd.DataFrame + kdst dataframe. + fittype : str + Type of fit to perform. + + Returns + ------------- + kr_map : pd.DataFrame + DataFrame containing map parameters. + ''' + if not map_bin['in_active'] or not map_bin['has_min_counts']: return map_bin + + k = map_bin.bin + + dst_bin = dst.query(f'bin_index == {k}') + + fit_func, seed = get_function_and_seed_lt(fittype = fittype) + x, y = select_fit_variables (fittype = fittype, + dst = dst_bin) + + fit_output, _, _, ier = fit(func = fit_func, + x = x, + y = y, + seed = seed(x, y), + full_output = True) + + par, err, cov = transform_parameters(fittype = fittype, + fit_output = fit_output) + + res, std = calculate_residuals(x, y, fit_output) # Still considering this + chi2, _ = get_chi2_and_pvalue(y, fit_output.fn(x), len(x)-len(par), std) + pval = calculate_pval(res) + + map_bin['e0'] = par[0] + map_bin['ue0'] = err[0] + map_bin['lt'] = par[1] + map_bin['ult'] = err[1] + map_bin['covariance'] = cov + map_bin['res_std'] = std + map_bin['chi2'] = chi2 + map_bin['pval'] = pval + map_bin['fit_success'] = True if ier in [1, 2, 3, 4] else False + map_bin['valid'] = map_bin['fit_success'] & map_bin['has_min_counts'] & map_bin['in_active'] + + return map_bin + + +def find_outliers(maps : pd.DataFrame, + x2range : Tuple[float, float] = (0, 2)): + ''' + For a given maps and deserved range, it returns a mask where values are + within the interval. + + Parameters + --------- + maps: pd.DataFrame + Map to check the outliers + x2range : Tuple[float, float] + Range for chi2 + + Returns + --------- + mask: pd.Series + Mask. + ''' + mask = in_range(maps.chi2, *x2range) + return mask + + +def regularize_map(maps : pd.DataFrame, + x2range : Tuple[float, float]): + ''' + Given a certain chi2 range, this function checks which bins are outside that + range and substitutes the (probably wrong) values of the map (e0, lt, etc) for + the mean value of the whole map. + + Parameters + --------- + maps: pd.DataFrame + Map to check the outliers + x2range : Tuple[float, float] + Range for chi2 + + Returns + --------- + new_map: pd.DataFrame + Regularized map.''' + + new_map = copy.deepcopy(maps) + + outliers = maps.in_active + outliers &= np.logical_not(find_outliers(new_map, x2range)) + + new_map['e0'] [outliers] = np.nanmean(maps['e0']) + new_map['lt'] [outliers] = np.nanmean(maps['lt']) + new_map['ue0'][outliers] = np.nanmean(maps['ue0']) + new_map['ult'][outliers] = np.nanmean(maps['ult']) + + return new_map From aedc73ead219b699d0afabbb58e06281c171a074 Mon Sep 17 00:00:00 2001 From: carhc Date: Mon, 30 Dec 2024 13:21:16 +0100 Subject: [PATCH 02/25] Add tests for map computation --- invisible_cities/reco/krmap_functions_test.py | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/invisible_cities/reco/krmap_functions_test.py b/invisible_cities/reco/krmap_functions_test.py index fcb5cbd34..c3633e332 100644 --- a/invisible_cities/reco/krmap_functions_test.py +++ b/invisible_cities/reco/krmap_functions_test.py @@ -7,6 +7,7 @@ from hypothesis import given, settings from hypothesis.strategies import floats, integers +from hypothesis.extra.numpy import arrays from .. reco import krmap_functions as krf from ..types.symbols import KrFitFunction @@ -104,3 +105,81 @@ def test_transform_parameters(a, b): npt.assert_allclose(transformed_par, [ E0_expected, lt_expected]) npt.assert_allclose(transformed_err, [s_E0_expected, s_lt_expected]) assert np.isclose(transformed_cov, cov_expected) + + +@given(integers(min_value = 0, max_value = 1e10), + integers(min_value = 0, max_value = 1e10), + arrays (dtype = np.int64, shape = (2,), + elements = integers(min_value = 1, + max_value = 1e4))) +def test_get_number_of_bins_performance_default_value(nevents, thr, n_bins): + bins = krf.get_number_of_bins(nevents, thr, n_bins) + + npt.assert_array_equal(bins, n_bins) + + +@given(integers(min_value = 0, max_value = 1e10), + integers(min_value = 0, max_value = 1e10)) +def test_get_number_of_bins_with_thresholds(nevents, thr): + bins = krf.get_number_of_bins(nevents, thr) + + if nevents < thr: + npt.assert_array_equal(bins, np.array([50, 50])) + if nevents >= thr: + npt.assert_array_equal(bins, np.array([100, 100])) + + +@given(integers(min_value = 0, max_value = 1e10), + integers(min_value = 0, max_value = 1e10), + arrays (dtype = np.int64, shape = (2,), + elements = integers(min_value = 1, + max_value = 1e4))) +def test_get_number_of_bins_returns_type(nevents, thr, n_bins): + + assert type(krf.get_number_of_bins(nevents, thr) ) == np.ndarray + assert type(krf.get_number_of_bins(n_bins=n_bins)) == np.ndarray + + +@given(arrays(dtype = int, shape = (2,), + elements = integers(min_value = 1, + max_value = 1e4)), + arrays(dtype = np.float64, + shape = (2,), + elements = floats(min_value = -1e4, + max_value = 1e4))) +def test_get_XY_bins_n(n_bins, XYrange): + bins_x, bins_y = krf.get_XY_bins(n_bins, XYrange) + + assert len(bins_x) == int(n_bins[0]) + 1 + assert len(bins_y) == int(n_bins[1]) + 1 + + +@given(n_bins=arrays(dtype = np.int, shape = (2,), + elements = integers(min_value = 2, + max_value = 1000)), + XYrange=arrays(dtype = np.float64, shape = (2,), + elements = floats(min_value = -1e4, + max_value = 1e4)), + n_min=integers(min_value=1, max_value=100), + r_max=floats (min_value=50, max_value=450)) +def test_create_df_kr_map_check_columns(n_bins, XYrange, n_min, r_max): + + n_bins_x = n_bins[0] + n_bins_y = n_bins[1] + + bins = krf.get_XY_bins(n_bins, XYrange) + counts = arrays(dtype=np.int64, shape=(n_bins_x * n_bins_y,), elements=integers(min_value=0, max_value=100)) + + df = krf.create_df_kr_map(bins, counts, n_min, r_max) + + columns = ['bin', 'counts', 'e0', 'ue0', 'lt', 'ult', 'covariance', 'res_std', 'chi2', + 'pval', 'in_active', 'has_min_counts', 'fit_success', 'valid', 'R', 'X', 'Y'] + + for element in df.columns.values: + + assert element in columns + + +def test_get_bin_counts_and_event_id_correct_output(): + + return From 7593cd96acf89c03c21b51de905ecba5b765e1ef Mon Sep 17 00:00:00 2001 From: carhc Date: Fri, 3 Jan 2025 12:08:45 +0100 Subject: [PATCH 03/25] Update valid_bin_counter function --- invisible_cities/reco/krmap_functions.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/invisible_cities/reco/krmap_functions.py b/invisible_cities/reco/krmap_functions.py index 2841b0d65..133295b83 100644 --- a/invisible_cities/reco/krmap_functions.py +++ b/invisible_cities/reco/krmap_functions.py @@ -398,17 +398,14 @@ def valid_bin_counter(map_df : pd.DataFrame, Percentage of valid bins. ''' - inside = len(map_df[map_df['in_active'] == True]) - valid = len(map_df[map_df['valid'] == True]) + inside = np.count_nonzero(map_df.in_active) + valid = np.count_nonzero(map_df.valid) - valid_per = valid / inside * 100 + valid_per = valid / inside if valid_per <= validity_parameter: - # If the percentage of valid bins is below the threshold, issue a warning - warnings.warn(f"{inside-valid} inner bins are not valid. {valid_per:.1f}% are successful.", UserWarning) - else: - # If the percentage of valid bins meets the threshold, print a message - print(f"{inside-valid} inner bins are not valid. {valid_per:.1f}% are successful.") + warnings.warn(f"{inside-valid} inner bins are not valid. {100*valid_per:.1f}% are successful.", UserWarning) + else: print(f"{inside-valid} inner bins are not valid. {100*valid_per:.1f}% are successful.") return valid_per From e943e9c66386f0ec115914f34bf2dd12b62f06ef Mon Sep 17 00:00:00 2001 From: carhc Date: Fri, 3 Jan 2025 12:12:46 +0100 Subject: [PATCH 04/25] Update fit_and_fill_map function --- invisible_cities/reco/krmap_functions.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/invisible_cities/reco/krmap_functions.py b/invisible_cities/reco/krmap_functions.py index 133295b83..dd1f86029 100644 --- a/invisible_cities/reco/krmap_functions.py +++ b/invisible_cities/reco/krmap_functions.py @@ -437,11 +437,9 @@ def fit_and_fill_map(map_bin : pd.DataFrame, kr_map : pd.DataFrame DataFrame containing map parameters. ''' - if not map_bin['in_active'] or not map_bin['has_min_counts']: return map_bin + if not map_bin.in_active or not map_bin.has_min_counts: return map_bin - k = map_bin.bin - - dst_bin = dst.query(f'bin_index == {k}') + dst_bin = dst.query(f'bin_index == {map_bin.bin}') fit_func, seed = get_function_and_seed_lt(fittype = fittype) x, y = select_fit_variables (fittype = fittype, @@ -464,12 +462,12 @@ def fit_and_fill_map(map_bin : pd.DataFrame, map_bin['ue0'] = err[0] map_bin['lt'] = par[1] map_bin['ult'] = err[1] - map_bin['covariance'] = cov + map_bin['cov'] = cov map_bin['res_std'] = std map_bin['chi2'] = chi2 map_bin['pval'] = pval - map_bin['fit_success'] = True if ier in [1, 2, 3, 4] else False - map_bin['valid'] = map_bin['fit_success'] & map_bin['has_min_counts'] & map_bin['in_active'] + map_bin['fit_success'] = ier in range(1, 5) + map_bin['valid'] = map_bin['fit_success'] and map_bin['has_min_counts'] and map_bin['in_active'] return map_bin From bd79755f861626dab834c14df870fcce965cec58 Mon Sep 17 00:00:00 2001 From: carhc Date: Tue, 7 Jan 2025 17:52:06 +0100 Subject: [PATCH 05/25] Update create_df_kr_map function --- invisible_cities/reco/krmap_functions.py | 40 ++++++++++++++---------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/invisible_cities/reco/krmap_functions.py b/invisible_cities/reco/krmap_functions.py index dd1f86029..6f6af7a81 100644 --- a/invisible_cities/reco/krmap_functions.py +++ b/invisible_cities/reco/krmap_functions.py @@ -188,28 +188,34 @@ def create_df_kr_map(bins : Tuple[np.array, np.array], events per bin, bin in/outside the active volume, bin position (X, Y, R), etc. ''' - - columns = ['bin', 'counts', 'e0', 'ue0', 'lt', 'ult', 'covariance', 'res_std', 'chi2', - 'pval', 'in_active', 'has_min_counts', 'fit_success', 'valid', 'R', 'X', 'Y'] - - kr_map = pd.DataFrame(columns = columns) - n_xbins = len(bins[0])-1 n_ybins = len(bins[1])-1 b_center = [shift_to_bin_centers(axis) for axis in bins] bin_index = range(n_xbins*n_ybins) - geom_comb = itertools.product(b_center[1], b_center[0]) - r_values = np.array([np.sqrt(x**2+y**2)for x, y in itertools.product(b_center[1], b_center[0])]) - - kr_map['bin'] = bin_index - kr_map['counts'] = counts - kr_map['R'] = r_values - kr_map[['Y', 'X']] = pd.DataFrame(geom_comb) - kr_map['in_active'] = kr_map['R'] <= r_max - kr_map['has_min_counts'] = kr_map['counts'] >= n_min - kr_map['fit_success'] = False - kr_map['valid'] = False + geom_comb = list(itertools.product(b_center[0], b_center[1])) + r_values = np.array([np.sqrt(x**2+y**2)for x, y in geom_comb]) + + kr_map = pd.DataFrame(dict(bin = bin_index, + counts = counts, + X = list(zip(*geom_comb))[0], + Y = list(zip(*geom_comb))[1], + R = r_values, + in_active = False, + has_min_counts = False, + fit_success = False, + valid = False, + e0 = np.nan, + ue0 = np.nan, + lt = np.nan, + ult = np.nan, + cov = np.nan, + res_std = np.nan, + chi2 = np.nan, + pval = np.nan)) + + kr_map.in_active = kr_map.R <= r_max + kr_map.has_min_counts = kr_map.counts >= n_min return kr_map From a27dd3462e83389330787595c13fe36e510a0585 Mon Sep 17 00:00:00 2001 From: carhc Date: Tue, 7 Jan 2025 18:02:05 +0100 Subject: [PATCH 06/25] Update calculate_residuals function --- invisible_cities/reco/krmap_functions.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/invisible_cities/reco/krmap_functions.py b/invisible_cities/reco/krmap_functions.py index 6f6af7a81..bd9c700d7 100644 --- a/invisible_cities/reco/krmap_functions.py +++ b/invisible_cities/reco/krmap_functions.py @@ -332,18 +332,17 @@ def get_bin_counts_and_event_bin_id(dst : pd.DataFrame, return counts, bin_labels -def calculate_residuals(x : pd.Series, - y : pd.Series, +def calculate_residuals(x : np.array, + y : np.array, fit_output : FitFunction): - ''' Calculate residuals and their standard deviation for the fitted data. Parameters ---------- - x : pd.Series + x : pd.array Independent variable - y : pd.Series + y : pd.array Dependent variable fit_output : FitFunction Container for IC's fit function result @@ -356,10 +355,7 @@ def calculate_residuals(x : pd.Series, Standard deviation of residuals. ''' - function = fit_output.fn - - res = y - function(x) - std = res.std() + res = y - fit_output.fn(x); std = res.std() return res, std From f480e1ecd32b12143039a9855fe8d60616034c09 Mon Sep 17 00:00:00 2001 From: carhc Date: Thu, 9 Jan 2025 09:43:27 +0100 Subject: [PATCH 07/25] Remove find_outliers function --- invisible_cities/reco/krmap_functions.py | 25 +----------------------- 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/invisible_cities/reco/krmap_functions.py b/invisible_cities/reco/krmap_functions.py index bd9c700d7..8ed82ff09 100644 --- a/invisible_cities/reco/krmap_functions.py +++ b/invisible_cities/reco/krmap_functions.py @@ -474,28 +474,6 @@ def fit_and_fill_map(map_bin : pd.DataFrame, return map_bin -def find_outliers(maps : pd.DataFrame, - x2range : Tuple[float, float] = (0, 2)): - ''' - For a given maps and deserved range, it returns a mask where values are - within the interval. - - Parameters - --------- - maps: pd.DataFrame - Map to check the outliers - x2range : Tuple[float, float] - Range for chi2 - - Returns - --------- - mask: pd.Series - Mask. - ''' - mask = in_range(maps.chi2, *x2range) - return mask - - def regularize_map(maps : pd.DataFrame, x2range : Tuple[float, float]): ''' @@ -517,8 +495,7 @@ def regularize_map(maps : pd.DataFrame, new_map = copy.deepcopy(maps) - outliers = maps.in_active - outliers &= np.logical_not(find_outliers(new_map, x2range)) + outliers = maps.in_active & ~in_range(new_map.chi2, *x2range) new_map['e0'] [outliers] = np.nanmean(maps['e0']) new_map['lt'] [outliers] = np.nanmean(maps['lt']) From 58b282445ed5ec86d8340a28b0d33c0ab8f4db9a Mon Sep 17 00:00:00 2001 From: carhc Date: Thu, 9 Jan 2025 09:50:56 +0100 Subject: [PATCH 08/25] Remove get_XY_bins function --- invisible_cities/reco/krmap_functions.py | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/invisible_cities/reco/krmap_functions.py b/invisible_cities/reco/krmap_functions.py index 8ed82ff09..9f5ff158e 100644 --- a/invisible_cities/reco/krmap_functions.py +++ b/invisible_cities/reco/krmap_functions.py @@ -248,28 +248,6 @@ def get_number_of_bins(nevents : Optional[int] = None, else: return np.array([100, 100]); -def get_XY_bins(n_bins : np.array, - XYrange : Tuple[float, float]): - ''' - Returns the bins that will be used to make the map. - - Parameters - --------- - b_nins: np.array - array of len = 2 containing the number of bins in X and Y - XYrange: Tuple[float, float] - Limits (mm) of X and Y for the map computation - - Returns - --------- - bins: Tuple[np.array, np.array] - Bins in each direction (X,Y) (square map). - ''' - bins_x = np.linspace(*XYrange, n_bins[0]+1) - bins_y = np.linspace(*XYrange, n_bins[1]+1) - return bins_x, bins_y - - def get_bin_counts_and_event_bin_id(dst : pd.DataFrame, bins : Tuple[np.array, np.array]): ''' @@ -361,7 +339,6 @@ def calculate_residuals(x : np.array, def calculate_pval(residuals : np.array): - ''' Calculate the p-value for the Shapiro-Wilk normality test of residuals. From 7928ec4291bc1061e9e2da3a7c59f1348bb72537 Mon Sep 17 00:00:00 2001 From: carhc Date: Thu, 9 Jan 2025 09:51:55 +0100 Subject: [PATCH 09/25] Remove get_XY_bins tests --- invisible_cities/reco/krmap_functions_test.py | 35 +++++-------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/invisible_cities/reco/krmap_functions_test.py b/invisible_cities/reco/krmap_functions_test.py index c3633e332..000f24192 100644 --- a/invisible_cities/reco/krmap_functions_test.py +++ b/invisible_cities/reco/krmap_functions_test.py @@ -140,44 +140,25 @@ def test_get_number_of_bins_returns_type(nevents, thr, n_bins): assert type(krf.get_number_of_bins(n_bins=n_bins)) == np.ndarray -@given(arrays(dtype = int, shape = (2,), - elements = integers(min_value = 1, - max_value = 1e4)), - arrays(dtype = np.float64, - shape = (2,), - elements = floats(min_value = -1e4, - max_value = 1e4))) -def test_get_XY_bins_n(n_bins, XYrange): - bins_x, bins_y = krf.get_XY_bins(n_bins, XYrange) - - assert len(bins_x) == int(n_bins[0]) + 1 - assert len(bins_y) == int(n_bins[1]) + 1 - - -@given(n_bins=arrays(dtype = np.int, shape = (2,), +@given(n_bins=arrays(dtype = int, shape = (2,), elements = integers(min_value = 2, - max_value = 1000)), - XYrange=arrays(dtype = np.float64, shape = (2,), - elements = floats(min_value = -1e4, - max_value = 1e4)), + max_value = 100)), n_min=integers(min_value=1, max_value=100), r_max=floats (min_value=50, max_value=450)) -def test_create_df_kr_map_check_columns(n_bins, XYrange, n_min, r_max): +def test_create_df_kr_map_check_columns(n_bins, n_min, r_max): + XYrange = (500, 500) n_bins_x = n_bins[0] n_bins_y = n_bins[1] - bins = krf.get_XY_bins(n_bins, XYrange) - counts = arrays(dtype=np.int64, shape=(n_bins_x * n_bins_y,), elements=integers(min_value=0, max_value=100)) - + bins = [np.linspace(*XYrange, n_bins_x+1), np.linspace(*XYrange, n_bins_y+1)] + counts = np.full(shape=(n_bins_x, n_bins_y), fill_value = 100, dtype=int).flatten() df = krf.create_df_kr_map(bins, counts, n_min, r_max) - columns = ['bin', 'counts', 'e0', 'ue0', 'lt', 'ult', 'covariance', 'res_std', 'chi2', + columns = ['bin', 'counts', 'e0', 'ue0', 'lt', 'ult', 'cov', 'res_std', 'chi2', 'pval', 'in_active', 'has_min_counts', 'fit_success', 'valid', 'R', 'X', 'Y'] - for element in df.columns.values: - - assert element in columns + assert all(element in columns for element in df.columns.values) def test_get_bin_counts_and_event_id_correct_output(): From a8de173306077537ce42054e247f157d381a029a Mon Sep 17 00:00:00 2001 From: carhc Date: Thu, 9 Jan 2025 10:08:00 +0100 Subject: [PATCH 10/25] Update calculate_pval function --- invisible_cities/reco/krmap_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/invisible_cities/reco/krmap_functions.py b/invisible_cities/reco/krmap_functions.py index 9f5ff158e..6f60f628b 100644 --- a/invisible_cities/reco/krmap_functions.py +++ b/invisible_cities/reco/krmap_functions.py @@ -353,7 +353,7 @@ def calculate_pval(residuals : np.array): p-value for the Shapiro-Wilk normality test. ''' - pval = stats.shapiro(residuals)[1] if (len(residuals) > 10) else 0. + pval = stats.shapiro(residuals)[1] if (len(residuals) > 3) else 0. return pval From 24419d844446f679be7052b5f85144e1eef99479 Mon Sep 17 00:00:00 2001 From: carhc Date: Thu, 9 Jan 2025 10:08:48 +0100 Subject: [PATCH 11/25] Update regularize_maps function --- invisible_cities/reco/krmap_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/invisible_cities/reco/krmap_functions.py b/invisible_cities/reco/krmap_functions.py index 6f60f628b..7afa43b10 100644 --- a/invisible_cities/reco/krmap_functions.py +++ b/invisible_cities/reco/krmap_functions.py @@ -472,7 +472,7 @@ def regularize_map(maps : pd.DataFrame, new_map = copy.deepcopy(maps) - outliers = maps.in_active & ~in_range(new_map.chi2, *x2range) + outliers = new_map.in_active & ~in_range(new_map.chi2, *x2range) new_map['e0'] [outliers] = np.nanmean(maps['e0']) new_map['lt'] [outliers] = np.nanmean(maps['lt']) From cd4e1706da071b654e5ece853124f030206376dd Mon Sep 17 00:00:00 2001 From: carhc Date: Tue, 28 Jan 2025 18:32:52 +0100 Subject: [PATCH 12/25] Update create_df_kr_map function --- invisible_cities/reco/krmap_functions.py | 34 +++++++++++------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/invisible_cities/reco/krmap_functions.py b/invisible_cities/reco/krmap_functions.py index 7afa43b10..e3deb5961 100644 --- a/invisible_cities/reco/krmap_functions.py +++ b/invisible_cities/reco/krmap_functions.py @@ -159,7 +159,8 @@ def transform_parameters(fit_output : FitFunction): return par, err, cov -def create_df_kr_map(bins : Tuple[np.array, np.array], +def create_df_kr_map(bins_x : np.array, + bins_y : np.array, counts : np.array, n_min : int, r_max : float)->pd.DataFrame: @@ -168,11 +169,10 @@ def create_df_kr_map(bins : Tuple[np.array, np.array], Parameters ---------- - - fittype : KrFitFunction - Chosen fit function for map computation - bins : Tuple[np.array, np.array] - Tuple containing bins in both axis + bins_x : np.array + Bins in x axis + bins_y : np.array + Bins in y axis counts : np.array Number of events falling into each bin n_min : int @@ -182,27 +182,26 @@ def create_df_kr_map(bins : Tuple[np.array, np.array], Returns ------- - kr_map : pd.DataFrame Kr map dataframe with all the info prior to the fits: bin label, events per bin, bin in/outside the active volume, bin position (X, Y, R), etc. ''' - n_xbins = len(bins[0])-1 - n_ybins = len(bins[1])-1 - b_center = [shift_to_bin_centers(axis) for axis in bins] + n_xbins = len(bins_x)-1 + n_ybins = len(bins_y)-1 + b_center = [shift_to_bin_centers(axis) for axis in [bins_x, bins_y]] bin_index = range(n_xbins*n_ybins) - geom_comb = list(itertools.product(b_center[0], b_center[1])) - r_values = np.array([np.sqrt(x**2+y**2)for x, y in geom_comb]) + geom_comb = np.array(list(itertools.product(*b_center))) + r_values = np.sum(geom_comb**2, axis=1)**0.5 kr_map = pd.DataFrame(dict(bin = bin_index, counts = counts, - X = list(zip(*geom_comb))[0], - Y = list(zip(*geom_comb))[1], + X = geom_comb.T[0], + Y = geom_comb.T[1], R = r_values, - in_active = False, - has_min_counts = False, + in_active = r_values <= r_max, + has_min_counts = counts >= n_min, fit_success = False, valid = False, e0 = np.nan, @@ -214,9 +213,6 @@ def create_df_kr_map(bins : Tuple[np.array, np.array], chi2 = np.nan, pval = np.nan)) - kr_map.in_active = kr_map.R <= r_max - kr_map.has_min_counts = kr_map.counts >= n_min - return kr_map From 6cb42ebda052e281e5084486d1d666a0a8e93333 Mon Sep 17 00:00:00 2001 From: carhc Date: Tue, 28 Jan 2025 18:35:24 +0100 Subject: [PATCH 13/25] Update get_bin_counts_and_event_bin_id function --- invisible_cities/reco/krmap_functions.py | 31 +++++++++++++----------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/invisible_cities/reco/krmap_functions.py b/invisible_cities/reco/krmap_functions.py index e3deb5961..25a042cdf 100644 --- a/invisible_cities/reco/krmap_functions.py +++ b/invisible_cities/reco/krmap_functions.py @@ -244,8 +244,9 @@ def get_number_of_bins(nevents : Optional[int] = None, else: return np.array([100, 100]); -def get_bin_counts_and_event_bin_id(dst : pd.DataFrame, - bins : Tuple[np.array, np.array]): +def get_bin_counts_and_event_bin_id(dst : pd.DataFrame, + bins_x : np.array, + bins_y : np.array): ''' This function distributes all the events in the DST into the selected bins. Given a certain binning, it computes which events fall into each @@ -255,22 +256,24 @@ def get_bin_counts_and_event_bin_id(dst : pd.DataFrame, with the bin it belongs). Then counts is flattened into 1-D, bin_indexes is transformed into 1-D using the number of bins on each axis. - Parameters - -------------- - dst : pd.DataFrame + Parameters + ---------- + dst : pd.DataFrame Krypton dataframe. - bins : Tuple[np.array, np.array] - Bins used to compute the map. + bins_x : np.array + Binning in x axis + bins_y : np.array + Binning in y axis - Returns - ------------- + Returns + ------- counts : np.array Total number of events falling into each bin bin_labels : np.array 1D bin label for each event - Further info: - ----------------- + Further info: + ------------- Why set expand_binnumbers to True (2D binning) if then we transform it to 1D? Because even though expand_binnumbers = False returns 1-D labels, it also adds two additional bins (per axis), taking into account out-of-range events which @@ -291,11 +294,11 @@ def get_bin_counts_and_event_bin_id(dst : pd.DataFrame, || 8 | 9 |10 |11 || 5, 6, 9, 10 which I believe is not convenient at all. ||12 |13 |14 |15 || This creates (nx+2)*(ny+2) bins. ''' - n_xbins = len(bins[0])-1 - n_ybins = len(bins[1])-1 + n_xbins = len(bins_x)-1 + n_ybins = len(bins_y)-1 counts, _, _, bin_indexes = stats.binned_statistic_2d(x=dst.X, y=dst.Y, values=None, - bins=bins, statistic='count', + bins=[bins_x, bins_y], statistic='count', expand_binnumbers=True) counts = counts.flatten() From 2ee3482f399f88ebec39aaf2fecd8ac18e5d648c Mon Sep 17 00:00:00 2001 From: carhc Date: Tue, 28 Jan 2025 18:36:34 +0100 Subject: [PATCH 14/25] Update fit_and_fill_map function --- invisible_cities/reco/krmap_functions.py | 31 ++++++++++++------------ 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/invisible_cities/reco/krmap_functions.py b/invisible_cities/reco/krmap_functions.py index 25a042cdf..531968462 100644 --- a/invisible_cities/reco/krmap_functions.py +++ b/invisible_cities/reco/krmap_functions.py @@ -401,8 +401,8 @@ def fit_and_fill_map(map_bin : pd.DataFrame, function specified in the fittype parameter, calculate all the different map values and fill the dataframe with them. - Parameters - -------------- + Parameters + ---------- map_bin : pd.DataFrame KrMap dataframe dst : pd.DataFrame @@ -410,31 +410,30 @@ def fit_and_fill_map(map_bin : pd.DataFrame, fittype : str Type of fit to perform. - Returns - ------------- + Returns + ------- kr_map : pd.DataFrame DataFrame containing map parameters. ''' if not map_bin.in_active or not map_bin.has_min_counts: return map_bin - dst_bin = dst.query(f'bin_index == {map_bin.bin}') + dst_bin = dst.loc[dst.bin_index == map_bin.bin] fit_func, seed = get_function_and_seed_lt(fittype = fittype) x, y = select_fit_variables (fittype = fittype, dst = dst_bin) - fit_output, _, _, ier = fit(func = fit_func, - x = x, - y = y, - seed = seed(x, y), - full_output = True) + fit_output = fit(func = fit_func, + x = x, + y = y, + seed = seed(x, y)) - par, err, cov = transform_parameters(fittype = fittype, - fit_output = fit_output) + par, err, cov = transform_parameters(fit_output = fit_output) - res, std = calculate_residuals(x, y, fit_output) # Still considering this - chi2, _ = get_chi2_and_pvalue(y, fit_output.fn(x), len(x)-len(par), std) - pval = calculate_pval(res) + res = y - fit_output.fn(x) + std = res.std() + chi2 = get_chi2_and_pvalue(y, fit_output.fn(x), len(x)-len(par), std)[0] + pval = stats.shapiro(res)[1] if (len(res) > 3) else 0. map_bin['e0'] = par[0] map_bin['ue0'] = err[0] @@ -444,7 +443,7 @@ def fit_and_fill_map(map_bin : pd.DataFrame, map_bin['res_std'] = std map_bin['chi2'] = chi2 map_bin['pval'] = pval - map_bin['fit_success'] = ier in range(1, 5) + map_bin['fit_success'] = fit_output.ier in range(1, 5) map_bin['valid'] = map_bin['fit_success'] and map_bin['has_min_counts'] and map_bin['in_active'] return map_bin From 11cb1e53e1538d4f47f8e1676edbd661a2273630 Mon Sep 17 00:00:00 2001 From: carhc Date: Tue, 28 Jan 2025 18:37:23 +0100 Subject: [PATCH 15/25] Update regularize_map function --- invisible_cities/reco/krmap_functions.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/invisible_cities/reco/krmap_functions.py b/invisible_cities/reco/krmap_functions.py index 531968462..7e52a8264 100644 --- a/invisible_cities/reco/krmap_functions.py +++ b/invisible_cities/reco/krmap_functions.py @@ -1,4 +1,3 @@ -import copy import warnings import itertools @@ -457,7 +456,7 @@ def regularize_map(maps : pd.DataFrame, the mean value of the whole map. Parameters - --------- + ---------- maps: pd.DataFrame Map to check the outliers x2range : Tuple[float, float] @@ -467,9 +466,7 @@ def regularize_map(maps : pd.DataFrame, --------- new_map: pd.DataFrame Regularized map.''' - - new_map = copy.deepcopy(maps) - + new_map = maps.copy() outliers = new_map.in_active & ~in_range(new_map.chi2, *x2range) new_map['e0'] [outliers] = np.nanmean(maps['e0']) From ddfc3eadde08f21dc03d40054ebea994d73b501c Mon Sep 17 00:00:00 2001 From: carhc Date: Tue, 28 Jan 2025 18:38:21 +0100 Subject: [PATCH 16/25] Cosmetics --- invisible_cities/reco/krmap_functions.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/invisible_cities/reco/krmap_functions.py b/invisible_cities/reco/krmap_functions.py index 7e52a8264..73c06bee2 100644 --- a/invisible_cities/reco/krmap_functions.py +++ b/invisible_cities/reco/krmap_functions.py @@ -221,6 +221,7 @@ def get_number_of_bins(nevents : Optional[int] = None, ''' Computes the number of XY bins to be used in the creation of correction map regarding the number of selected events. + Parameters --------- nevents: int (optional) @@ -233,8 +234,9 @@ def get_number_of_bins(nevents : Optional[int] = None, However, if no number of bins is given in advance, this will automatically select a value depending on the amount of events contained in the dst and the threshold. + Returns - --------- + ------- n_bins: int Number of bins in each direction (X,Y) (square map). ''' From 887eac64fd06e17d4030f483715c485c73a75a7f Mon Sep 17 00:00:00 2001 From: carhc Date: Tue, 28 Jan 2025 18:46:17 +0100 Subject: [PATCH 17/25] Update test for get_number_on_bins --- invisible_cities/reco/krmap_functions_test.py | 21 +++++-------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/invisible_cities/reco/krmap_functions_test.py b/invisible_cities/reco/krmap_functions_test.py index 000f24192..01be248a3 100644 --- a/invisible_cities/reco/krmap_functions_test.py +++ b/invisible_cities/reco/krmap_functions_test.py @@ -107,13 +107,11 @@ def test_transform_parameters(a, b): assert np.isclose(transformed_cov, cov_expected) -@given(integers(min_value = 0, max_value = 1e10), - integers(min_value = 0, max_value = 1e10), - arrays (dtype = np.int64, shape = (2,), - elements = integers(min_value = 1, - max_value = 1e4))) -def test_get_number_of_bins_performance_default_value(nevents, thr, n_bins): - bins = krf.get_number_of_bins(nevents, thr, n_bins) +@mark.parametrize('n_bins', [78, 125]) +def test_get_number_of_bins_performance_default_value(n_bins): + nevents = 1e7 + thr = 1e6 + bins = krf.get_number_of_bins(nevents, thr, n_bins) npt.assert_array_equal(bins, n_bins) @@ -129,15 +127,6 @@ def test_get_number_of_bins_with_thresholds(nevents, thr): npt.assert_array_equal(bins, np.array([100, 100])) -@given(integers(min_value = 0, max_value = 1e10), - integers(min_value = 0, max_value = 1e10), - arrays (dtype = np.int64, shape = (2,), - elements = integers(min_value = 1, - max_value = 1e4))) -def test_get_number_of_bins_returns_type(nevents, thr, n_bins): - - assert type(krf.get_number_of_bins(nevents, thr) ) == np.ndarray - assert type(krf.get_number_of_bins(n_bins=n_bins)) == np.ndarray @given(n_bins=arrays(dtype = int, shape = (2,), From c4ca12941e097a26d18b901e5ae7d4e6331c4ae0 Mon Sep 17 00:00:00 2001 From: carhc Date: Tue, 28 Jan 2025 18:48:50 +0100 Subject: [PATCH 18/25] Update test for create_df_kr_map function --- invisible_cities/reco/krmap_functions_test.py | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/invisible_cities/reco/krmap_functions_test.py b/invisible_cities/reco/krmap_functions_test.py index 01be248a3..1a6e970ab 100644 --- a/invisible_cities/reco/krmap_functions_test.py +++ b/invisible_cities/reco/krmap_functions_test.py @@ -5,6 +5,7 @@ import pandas as pd import scipy.optimize as so +from pytest import mark from hypothesis import given, settings from hypothesis.strategies import floats, integers from hypothesis.extra.numpy import arrays @@ -128,26 +129,28 @@ def test_get_number_of_bins_with_thresholds(nevents, thr): +@mark.parametrize('n_bins n_min r_max'.split(), + (((10, 10), 50, 200), + ( (50, 50), 10, 350), + ((100, 50), 1, 500),)) +def test_create_df_kr_map_shape(n_bins, n_min, r_max): -@given(n_bins=arrays(dtype = int, shape = (2,), - elements = integers(min_value = 2, - max_value = 100)), - n_min=integers(min_value=1, max_value=100), - r_max=floats (min_value=50, max_value=450)) -def test_create_df_kr_map_check_columns(n_bins, n_min, r_max): - - XYrange = (500, 500) + XYrange = (500, 500) n_bins_x = n_bins[0] n_bins_y = n_bins[1] - bins = [np.linspace(*XYrange, n_bins_x+1), np.linspace(*XYrange, n_bins_y+1)] - counts = np.full(shape=(n_bins_x, n_bins_y), fill_value = 100, dtype=int).flatten() - df = krf.create_df_kr_map(bins, counts, n_min, r_max) + bins_x = np.linspace(*XYrange, n_bins_x+1) + bins_y = np.linspace(*XYrange, n_bins_y+1) + counts = np.full(shape=(n_bins_x, n_bins_y), fill_value = 100, dtype=int).flatten() + + df = krf.create_df_kr_map(bins_x, bins_y, counts, n_min, r_max) columns = ['bin', 'counts', 'e0', 'ue0', 'lt', 'ult', 'cov', 'res_std', 'chi2', 'pval', 'in_active', 'has_min_counts', 'fit_success', 'valid', 'R', 'X', 'Y'] assert all(element in columns for element in df.columns.values) + assert df.bin.nunique() == n_bins_x*n_bins_y + def test_get_bin_counts_and_event_id_correct_output(): From 46955a9a822563d217526bf10af20996fb1eed33 Mon Sep 17 00:00:00 2001 From: carhc Date: Wed, 29 Jan 2025 13:52:58 +0100 Subject: [PATCH 19/25] Add test for get_bin_counts_and_event_bin_id function --- invisible_cities/reco/krmap_functions_test.py | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/invisible_cities/reco/krmap_functions_test.py b/invisible_cities/reco/krmap_functions_test.py index 1a6e970ab..573af259f 100644 --- a/invisible_cities/reco/krmap_functions_test.py +++ b/invisible_cities/reco/krmap_functions_test.py @@ -5,16 +5,16 @@ import pandas as pd import scipy.optimize as so -from pytest import mark +from pytest import mark, fixture from hypothesis import given, settings from hypothesis.strategies import floats, integers -from hypothesis.extra.numpy import arrays from .. reco import krmap_functions as krf from ..types.symbols import KrFitFunction from .. evm.ic_containers import FitFunction from .. core.fit_functions import expo - +from .. core.core_functions import in_range +from .. conftest import KrMC_kdst @given(floats(min_value = 0, max_value = 10), floats(min_value = 10, max_value = 20), @@ -152,7 +152,17 @@ def test_create_df_kr_map_shape(n_bins, n_min, r_max): assert df.bin.nunique() == n_bins_x*n_bins_y +@mark.parametrize('bins_x bins_y'.split(), + ((np.linspace(-100, +100, 5), np.linspace(-100, +100, 10)), + ( np.linspace(-400, +400, 10), np.linspace(-400, +400, 10)), + )) +def test_get_bin_counts_and_event_bin_id(KrMC_kdst, bins_x, bins_y): + + kr_df = KrMC_kdst[0].read + inside = in_range(kr_df.X, min(bins_x), max(bins_x)) & in_range(kr_df.Y, min(bins_y), max(bins_y)) + + counts, bin_labels = krf.get_bin_counts_and_event_bin_id(dst = kr_df, bins_x = bins_x, bins_y = bins_y) -def test_get_bin_counts_and_event_id_correct_output(): + assert counts.sum() == kr_df[inside].event.nunique() + assert len(bin_labels) == kr_df.event.nunique() - return From b30b7b891cde3efc5e882570fbc9bbd7a7338c43 Mon Sep 17 00:00:00 2001 From: carhc Date: Thu, 30 Jan 2025 15:52:31 +0100 Subject: [PATCH 20/25] Update valid_bin_counter function --- invisible_cities/reco/krmap_functions.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/invisible_cities/reco/krmap_functions.py b/invisible_cities/reco/krmap_functions.py index 73c06bee2..1ed358ba1 100644 --- a/invisible_cities/reco/krmap_functions.py +++ b/invisible_cities/reco/krmap_functions.py @@ -380,11 +380,10 @@ def valid_bin_counter(map_df : pd.DataFrame, inside = np.count_nonzero(map_df.in_active) valid = np.count_nonzero(map_df.valid) - valid_per = valid / inside + valid_per = valid / inside if valid <= inside else 0 if valid_per <= validity_parameter: warnings.warn(f"{inside-valid} inner bins are not valid. {100*valid_per:.1f}% are successful.", UserWarning) - else: print(f"{inside-valid} inner bins are not valid. {100*valid_per:.1f}% are successful.") return valid_per From e6d0256ffeb1dd9ee5df2cb11c46748ff85e87a3 Mon Sep 17 00:00:00 2001 From: carhc Date: Thu, 30 Jan 2025 15:54:27 +0100 Subject: [PATCH 21/25] Add tests for valid_bin_counter function --- invisible_cities/reco/krmap_functions_test.py | 34 ++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/invisible_cities/reco/krmap_functions_test.py b/invisible_cities/reco/krmap_functions_test.py index 573af259f..d4fd73139 100644 --- a/invisible_cities/reco/krmap_functions_test.py +++ b/invisible_cities/reco/krmap_functions_test.py @@ -5,7 +5,7 @@ import pandas as pd import scipy.optimize as so -from pytest import mark, fixture +from pytest import mark, warns from hypothesis import given, settings from hypothesis.strategies import floats, integers @@ -166,3 +166,35 @@ def test_get_bin_counts_and_event_bin_id(KrMC_kdst, bins_x, bins_y): assert counts.sum() == kr_df[inside].event.nunique() assert len(bin_labels) == kr_df.event.nunique() + +@mark.parametrize('n_bins rmax'.split(), + ((( 5, 5), 200), + (( 3, 3), 100),)) +def test_valid_bin_counter(n_bins, rmax): + counts = np.array(range(n_bins[0]*n_bins[1])) + krmap = krf.create_df_kr_map(bins_x = np.linspace(-rmax, +rmax, n_bins[0]+1), + bins_y = np.linspace(-rmax, +rmax, n_bins[1]+1), + counts = counts, + n_min = 0, + r_max = np.nextafter(np.sqrt(2)*rmax, np.inf)) + + krmap.valid.iloc[0 : 9] = True + + assert krf.valid_bin_counter(krmap) == 9 / (n_bins[0]*n_bins[1]) + +@mark.parametrize('n_bins rmax validity_parameter'.split(), + ((( 5, 5), 200, 1.0), + (( 3, 3), 100, 0.2))) +def test_valid_bin_counter_warning(n_bins, rmax, validity_parameter): + counts = np.array(range(n_bins[0]*n_bins[1])) + krmap = krf.create_df_kr_map(bins_x = np.linspace(-rmax, +rmax, n_bins[0]+1), + bins_y = np.linspace(-rmax, +rmax, n_bins[1]+1), + counts = counts, + n_min = 0, + r_max = np.nextafter(np.sqrt(2)*rmax, np.inf)) + + krmap.valid.iloc[0 : 9] = True + + if validity_parameter == 1: + with warns(UserWarning, match = "inner bins are not valid."): + krf.valid_bin_counter(map_df = krmap, validity_parameter = validity_parameter) From bae44b82552d6ef9547d07edcb34f65f243cd3d2 Mon Sep 17 00:00:00 2001 From: carhc Date: Thu, 30 Jan 2025 15:54:47 +0100 Subject: [PATCH 22/25] Cosmetics --- invisible_cities/reco/krmap_functions_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/invisible_cities/reco/krmap_functions_test.py b/invisible_cities/reco/krmap_functions_test.py index d4fd73139..45dbd85c9 100644 --- a/invisible_cities/reco/krmap_functions_test.py +++ b/invisible_cities/reco/krmap_functions_test.py @@ -128,7 +128,6 @@ def test_get_number_of_bins_with_thresholds(nevents, thr): npt.assert_array_equal(bins, np.array([100, 100])) - @mark.parametrize('n_bins n_min r_max'.split(), (((10, 10), 50, 200), ( (50, 50), 10, 350), @@ -182,6 +181,7 @@ def test_valid_bin_counter(n_bins, rmax): assert krf.valid_bin_counter(krmap) == 9 / (n_bins[0]*n_bins[1]) + @mark.parametrize('n_bins rmax validity_parameter'.split(), ((( 5, 5), 200, 1.0), (( 3, 3), 100, 0.2))) From 77757929af03a03ecd32ab244fb1a76fc35fbba7 Mon Sep 17 00:00:00 2001 From: carhc Date: Thu, 30 Jan 2025 18:12:10 +0100 Subject: [PATCH 23/25] Update valid_bin_counter function --- invisible_cities/reco/krmap_functions.py | 4 +- invisible_cities/reco/krmap_functions_test.py | 51 +++++++++++++------ 2 files changed, 37 insertions(+), 18 deletions(-) diff --git a/invisible_cities/reco/krmap_functions.py b/invisible_cities/reco/krmap_functions.py index 1ed358ba1..e090b9537 100644 --- a/invisible_cities/reco/krmap_functions.py +++ b/invisible_cities/reco/krmap_functions.py @@ -378,9 +378,9 @@ def valid_bin_counter(map_df : pd.DataFrame, ''' inside = np.count_nonzero(map_df.in_active) - valid = np.count_nonzero(map_df.valid) + valid = np.count_nonzero(map_df.in_active & map_df.valid) - valid_per = valid / inside if valid <= inside else 0 + valid_per = valid / inside if inside else 0 if valid_per <= validity_parameter: warnings.warn(f"{inside-valid} inner bins are not valid. {100*valid_per:.1f}% are successful.", UserWarning) diff --git a/invisible_cities/reco/krmap_functions_test.py b/invisible_cities/reco/krmap_functions_test.py index 45dbd85c9..7a5024bd1 100644 --- a/invisible_cities/reco/krmap_functions_test.py +++ b/invisible_cities/reco/krmap_functions_test.py @@ -165,22 +165,41 @@ def test_get_bin_counts_and_event_bin_id(KrMC_kdst, bins_x, bins_y): assert counts.sum() == kr_df[inside].event.nunique() assert len(bin_labels) == kr_df.event.nunique() - -@mark.parametrize('n_bins rmax'.split(), - ((( 5, 5), 200), - (( 3, 3), 100),)) -def test_valid_bin_counter(n_bins, rmax): - counts = np.array(range(n_bins[0]*n_bins[1])) - krmap = krf.create_df_kr_map(bins_x = np.linspace(-rmax, +rmax, n_bins[0]+1), - bins_y = np.linspace(-rmax, +rmax, n_bins[1]+1), - counts = counts, - n_min = 0, - r_max = np.nextafter(np.sqrt(2)*rmax, np.inf)) - - krmap.valid.iloc[0 : 9] = True - - assert krf.valid_bin_counter(krmap) == 9 / (n_bins[0]*n_bins[1]) - +@mark.parametrize('n', (0, 8, 50)) +def test_valid_bin_counter_all_in_active(n): + inside = np.ones(50, dtype = bool) + valid = inside.copy() + valid[:n//2] = False + valid[len(valid)-n//2:] = False + krmap = pd.DataFrame(dict(in_active = inside, + valid = valid)) + + assert krf.valid_bin_counter(krmap) == ((50-n) / 50) + + +def test_valid_bin_counter_valid_subset_of_active(): + inside = np.ones(50, dtype = bool) + inside[:3] = False + inside[-3:] = False + valid = inside.copy() + valid[:4] = False + valid[-4:] = False + krmap = pd.DataFrame(dict(in_active = inside, + valid = valid)) + + assert krf.valid_bin_counter(krmap) == ((50-6-2) / (50-6)) + +@mark.parametrize('ni', (0, 4, 25)) +@mark.parametrize('nv', (0, 3, 25)) +def test_valid_bin_counter_valid_outside_active(ni, nv): + inside = np.ones(50, dtype = bool) + inside[len(inside)-ni:] = False + valid = np.ones(50, dtype = bool) + valid[:nv] = False + krmap = pd.DataFrame(dict(in_active = inside, + valid = valid)) + + assert krf.valid_bin_counter(krmap) == ((50-ni-nv) / (50-ni)) @mark.parametrize('n_bins rmax validity_parameter'.split(), ((( 5, 5), 200, 1.0), From 0a48e25764b8d1b25da22a4f650ca4644b8e3a3a Mon Sep 17 00:00:00 2001 From: carhc Date: Thu, 30 Jan 2025 18:13:29 +0100 Subject: [PATCH 24/25] Update regularize_map_function --- invisible_cities/reco/krmap_functions.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/invisible_cities/reco/krmap_functions.py b/invisible_cities/reco/krmap_functions.py index e090b9537..8d0d57fec 100644 --- a/invisible_cities/reco/krmap_functions.py +++ b/invisible_cities/reco/krmap_functions.py @@ -450,7 +450,7 @@ def fit_and_fill_map(map_bin : pd.DataFrame, def regularize_map(maps : pd.DataFrame, - x2range : Tuple[float, float]): + x2range : Tuple[float, float] = None): ''' Given a certain chi2 range, this function checks which bins are outside that range and substitutes the (probably wrong) values of the map (e0, lt, etc) for @@ -468,7 +468,11 @@ def regularize_map(maps : pd.DataFrame, new_map: pd.DataFrame Regularized map.''' new_map = maps.copy() - outliers = new_map.in_active & ~in_range(new_map.chi2, *x2range) + outliers = new_map.in_active & ~new_map.valid + + if isinstance(x2range, Tuple): + outliers &= ~in_range(new_map.chi2, *x2range) + else: outliers = outliers new_map['e0'] [outliers] = np.nanmean(maps['e0']) new_map['lt'] [outliers] = np.nanmean(maps['lt']) From 9cb91179adf541e7f1d06ea740e3fcaa1b403428 Mon Sep 17 00:00:00 2001 From: carhc Date: Thu, 30 Jan 2025 18:39:19 +0100 Subject: [PATCH 25/25] Add test for regularize_map function --- invisible_cities/reco/krmap_functions_test.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/invisible_cities/reco/krmap_functions_test.py b/invisible_cities/reco/krmap_functions_test.py index 7a5024bd1..f7b5708db 100644 --- a/invisible_cities/reco/krmap_functions_test.py +++ b/invisible_cities/reco/krmap_functions_test.py @@ -217,3 +217,22 @@ def test_valid_bin_counter_warning(n_bins, rmax, validity_parameter): if validity_parameter == 1: with warns(UserWarning, match = "inner bins are not valid."): krf.valid_bin_counter(map_df = krmap, validity_parameter = validity_parameter) + + +def test_regularize_map_no_chi2range(): + + inside = np.ones(50, dtype = bool) + valid = inside.copy() + valid[20:30] = False + maps = pd.DataFrame(dict(in_active = inside, + valid = valid, + e0 = np.full(50, 10), + lt = np.full(50, 1000), + ue0 = np.full(50, 2), + ult = np.full(50, 100))) + + new_map = krf.regularize_map(maps = maps, x2range = None) + + for col in ['e0', 'lt', 'ue0', 'ult']: + mean_value = np.nanmean(maps[col]) + assert np.allclose(new_map[col][~valid], mean_value, rtol=0.01)