diff --git a/.gitignore b/.gitignore index 7c146c8..31f244b 100644 --- a/.gitignore +++ b/.gitignore @@ -165,4 +165,5 @@ cython_debug/ # ignore the new .txt files generated in test /packs/tests/data/repetitive_data/test_*.txt # temporary h5 files get ignored -*tmp.h5 \ No newline at end of file +*tmp.h5 +*tmp*.h5 diff --git a/packs/configs/process_WD2_3channel.conf b/packs/configs/process_WD2_3channel.conf index 07190a6..c3ab45a 100644 --- a/packs/configs/process_WD2_3channel.conf +++ b/packs/configs/process_WD2_3channel.conf @@ -8,4 +8,4 @@ save_path = '/path/to/file.h5' [optional] overwrite = True -counts = -1 \ No newline at end of file +print_mod = -1 diff --git a/packs/proc/proc.py b/packs/proc/proc.py index 863e24d..3a507e5 100644 --- a/packs/proc/proc.py +++ b/packs/proc/proc.py @@ -4,7 +4,7 @@ from packs.core.io import read_config_file from packs.proc.processing_utils import process_csv_lecroy -from packs.proc.processing_utils import process_bin_WD2 +from packs.proc.processing_utils import process_bin_WD2_lazy from packs.proc.processing_utils import process_bin_WD1 from packs.proc.calibration_utils import calibrate from packs.core.core_utils import check_test @@ -36,7 +36,7 @@ def proc(config_file): case 1: process_bin_WD1(**conf_dict) case 2: - process_bin_WD2(**conf_dict) + process_bin_WD2_lazy(**conf_dict) case other: raise RuntimeError(f"wavedump edition {other} decoding isn't currently implemented.") else: @@ -48,4 +48,4 @@ def proc(config_file): except KeyError as e: print(f"\nError in the configuration file, incorrect or missing argument: {e} \n") traceback.print_exc() - sys.exit(2) \ No newline at end of file + sys.exit(2) diff --git a/packs/proc/processing_utils.py b/packs/proc/processing_utils.py index c7ec2ed..469ec31 100644 --- a/packs/proc/processing_utils.py +++ b/packs/proc/processing_utils.py @@ -13,8 +13,11 @@ from typing import BinaryIO from typing import Generic from typing import Optional -from datetime import datetime from typing import List +from typing import Generator + +from datetime import datetime + # imports start from MULE/ from packs.core.core_utils import flatten @@ -196,9 +199,9 @@ def process_header(file_path : str, # open file if not os.path.exists(file_path): - raise FileNotFoundError(2, 'Path or file not found', file_path) + raise FileNotFoundError(2, 'Path or file not found', file_path) - with open(file_path, 'rb') as file: + with open(file_path, 'rb') as file: event_number, timestamp, samples, sampling_period = read_defaults_WD2(file, byte_order) # attempt to read channels @@ -231,6 +234,41 @@ def process_header(file_path : str, return wdtype, samples, sampling_period, channels +def read_binary_lazy(file : BinaryIO, + wdtype : np.dtype) -> Generator: + ''' + Reads the binary in with the expected format/offset, lazily, + depending on counts to break the data up. + + NOTE: + The counts are hardset to 1, making this function relatively inefficient. + In the future, the logic should be revised to allow `np.fromfile`'s count + value to be set based on optimal read-in speed. The logic of the WD2 function + will have to accomodate this when indexing the files. + + Parameters + ---------- + + file (BufferedReader) : Opened file + wdtype (ndtype) : Custom data type for extracting information from + binary files + + Returns + ------- + data (ndarray) : Unformatted data from binary file + + ''' + # initialise data to start the loop + data = (np.fromfile(file, dtype=wdtype, count = 1)) + while len(data) != 0: + yield (True, data) + # ensure data is loaded in after the yield, so the while check is done + data = (np.fromfile(file, dtype=wdtype, count = 1)) + # yield 1 when finished + print('Processing Finished!') + yield (False, np.zeros(shape = (1,))) + + def read_binary(file : BinaryIO, wdtype : np.dtype, counts : Optional[int] = -1, @@ -258,6 +296,18 @@ def read_binary(file : BinaryIO, return data + +def number_of_events_WD2(file_path : str, + samples : int, + channels : int, + header_size : int) -> int: + file_size = os.path.getsize(file_path) + waveform_size = ((samples * channels * 4 ) + header_size) # can't remember why *2, will need to test this + num_of_events = int(file_size / waveform_size) + + return num_of_events + + def format_wfs(data : np.ndarray, wdtype : np.dtype, samples : int, @@ -414,7 +464,6 @@ def process_event_lazy_WD1(file_object : BinaryIO): # header to check against sanity_header = header.copy() - # continue only if data exists while len(header) > 0: @@ -424,10 +473,8 @@ def process_event_lazy_WD1(file_object : BinaryIO): # collect waveform, no of samples and timestamp yield (np.fromfile(file_object, dtype = np.dtype(' List: + batch_size : int) -> List: ''' Outputs a list of all the second elements of a row for each batch then goes to the next row @@ -681,14 +786,14 @@ def process_event_lazy_lecroy(file_obj : io.TextIOWrapper): # time since first sample recorded evt_info_times[evt_info_line_idx] = evt_info_line[2] # end of header - + # start of data data_heading = next(file_obj).split(',') reader = csv.reader(file_obj) wf_num = 0 while batch := get_batch(reader, segment_size): - yield (batch, evt_info_times[wf_num]) + yield (batch, evt_info_times[wf_num]) wf_num += 1 # end of data @@ -697,7 +802,7 @@ def process_event_lazy_lecroy(file_obj : io.TextIOWrapper): def process_csv_lecroy(file_path : str, save_path : str, overwrite : Optional[bool] = False, - print_mod : Optional[int] = -1): + print_mod : Optional[int] = -1): """ Process a Lecroy CSV waveform file and write the parsed events to a structured output file. This only works for individual channels at the moment, as Lecroy oscilloscopes save one file per channel. @@ -737,4 +842,4 @@ def process_csv_lecroy(file_path : str, # add data to df write('event_info', event_info, (True, num_of_events, i)) - write('rwf', waveforms, (True, num_of_events, i)) \ No newline at end of file + write('rwf', waveforms, (True, num_of_events, i)) diff --git a/packs/tests/data/10000bytes.bin b/packs/tests/data/10000bytes.bin new file mode 100644 index 0000000..e64c723 Binary files /dev/null and b/packs/tests/data/10000bytes.bin differ diff --git a/packs/tests/data/100bytes.bin b/packs/tests/data/100bytes.bin new file mode 100644 index 0000000..eeb5760 Binary files /dev/null and b/packs/tests/data/100bytes.bin differ diff --git a/packs/tests/data/configs/process_WD2_1channel.conf b/packs/tests/data/configs/process_WD2_1channel.conf index 82dfe2a..b7b3404 100644 --- a/packs/tests/data/configs/process_WD2_1channel.conf +++ b/packs/tests/data/configs/process_WD2_1channel.conf @@ -6,4 +6,4 @@ save_path = '/home/e78368jw/Documents/MULE/packs/tests/data/one_channel_tmp.h5' [optional] overwrite = True - +print_mod = -1 diff --git a/packs/tests/data/configs/process_WD2_3channel.conf b/packs/tests/data/configs/process_WD2_3channel.conf index e82645e..205576e 100644 --- a/packs/tests/data/configs/process_WD2_3channel.conf +++ b/packs/tests/data/configs/process_WD2_3channel.conf @@ -6,5 +6,5 @@ save_path = '/home/e78368jw/Documents/MULE/packs/tests/data/three_channels_tmp.h [optional] overwrite = True -counts = 10 +print_mod = -1 diff --git a/packs/tests/processing_test.py b/packs/tests/processing_test.py index 11912ae..05ef170 100644 --- a/packs/tests/processing_test.py +++ b/packs/tests/processing_test.py @@ -19,9 +19,11 @@ from packs.proc.processing_utils import read_defaults_WD2 from packs.proc.processing_utils import process_header from packs.proc.processing_utils import read_binary +from packs.proc.processing_utils import read_binary_lazy from packs.proc.processing_utils import format_wfs from packs.proc.processing_utils import check_save_path from packs.proc.processing_utils import save_data +from packs.proc.processing_utils import number_of_events_WD2 from packs.types.types import generate_wfdtype from packs.types.types import rwf_type @@ -64,7 +66,7 @@ def test_header_components_read_as_expected(wd2_3ch_bin): def test_nonexistent_file_raises_error(): - + fake_path = '/this/path/does/not/exist.bin' with raises(FileNotFoundError): @@ -152,7 +154,7 @@ def test_formatting_works(data_dir, wd2_3ch_bin): def test_save_path_exists(): data_path = 'some/fake/path/three_channels_WD2.h5' - + with raises(FileNotFoundError): check_save_path(data_path, overwrite = False) @@ -276,4 +278,43 @@ def test_lazy_loading_short_header_WD1(MULE_dir): a = process_event_lazy_WD1(file) next(a) +@mark.parametrize("file, samples, channels, header_size, output", [('100bytes.bin', 1, 1, 0, 25), ('100bytes.bin', 1, 1, 46, 2), ('100bytes.bin', 2, 10, 20, 1), ('10000bytes.bin', 4, 8, 72, 50)]) +def test_number_of_events_correct(data_dir, file, samples, channels, header_size, output): + ''' + Simple test to ensure the logic returns the number of events we expect. + ''' + file_path = data_dir + file + + assert output == number_of_events_WD2(file_path, samples, channels, header_size) + + +@mark.parametrize("inpt", [("one_channel_WD2.bin"),("three_channels_WD2.bin")]) +def test_lazy_eager_WD2_match(data_dir, inpt): + ''' + test to ensure that lazy and eager WD2 + provide the same result + ''' + + # how many events are we looking at? + counts = 30 + + # extract directory + file_path = data_dir + inpt + + # collect header info + wdtype, samples, sampling_period, channels = process_header(file_path) + + # collect lazy data + lazy_data = [] + with open(file_path) as f: + binary_lazy_readout = read_binary_lazy(f, wdtype) + for i in range(0,counts): + _, lazy_wf = next(binary_lazy_readout) + lazy_data.append(lazy_wf) + + # open eager data + with open(file_path) as f: + data = read_binary (f, wdtype, counts) + for i in range(0,counts): + assert data[i] == lazy_data[i]