From a067c2894598eedd7d585961ac4e17a1c1e22863 Mon Sep 17 00:00:00 2001 From: Tuomas Siipola Date: Fri, 6 Mar 2026 16:21:16 +0200 Subject: [PATCH 1/7] Add experimental disdrometer readers --- cloudnetpy/disdronator/__init__.py | 3 + cloudnetpy/disdronator/lpm.py | 132 ++++++++ cloudnetpy/disdronator/parsivel.py | 527 +++++++++++++++++++++++++++++ cloudnetpy/disdronator/rd80.py | 31 ++ cloudnetpy/disdronator/utils.py | 27 ++ pyproject.toml | 2 +- 6 files changed, 721 insertions(+), 1 deletion(-) create mode 100644 cloudnetpy/disdronator/__init__.py create mode 100644 cloudnetpy/disdronator/lpm.py create mode 100644 cloudnetpy/disdronator/parsivel.py create mode 100644 cloudnetpy/disdronator/rd80.py create mode 100644 cloudnetpy/disdronator/utils.py diff --git a/cloudnetpy/disdronator/__init__.py b/cloudnetpy/disdronator/__init__.py new file mode 100644 index 00000000..dfb09a77 --- /dev/null +++ b/cloudnetpy/disdronator/__init__.py @@ -0,0 +1,3 @@ +from cloudnetpy.disdronator.lpm import read_lpm +from cloudnetpy.disdronator.parsivel import read_parsivel +from cloudnetpy.disdronator.rd80 import read_rd80 diff --git a/cloudnetpy/disdronator/lpm.py b/cloudnetpy/disdronator/lpm.py new file mode 100644 index 00000000..6724e0fd --- /dev/null +++ b/cloudnetpy/disdronator/lpm.py @@ -0,0 +1,132 @@ +import datetime +from os import PathLike +from typing import TypeAlias + +import numpy as np +import numpy.typing as npt + +from cloudnetpy.disdronator.utils import convert_to_numpy + +LpmOutput: TypeAlias = tuple[list, dict[int, list]] + +# fmt: off +INT_KEYS = { + 2, 3, 7, 8, 11, 12, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, + 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 524, +} +FLOAT_KEYS = {10, 14, 15, 16, 17, 19, 21, 46, 521, 522, 523} +FILL_VALUES = { + 18: 99999, + 19: -9.9, + 46: 99999, + 47: 999, + 48: 9999, + 49: 9999, + 50: 9999, + 521: 99999, + 522: 99999, + 523: 9999, + 524: 999, +} +Dlow = np.array([ + 0.125, 0.250, 0.375, 0.500, 0.750, 1.000, 1.250, 1.500, 1.750, 2.000, 2.500, + 3.000, 3.500, 4.000, 4.500, 5.000, 5.500, 6.000, 6.500, 7.000, 7.500, + 8.000, +]) +Dspr = np.array([ + 0.125, 0.125, 0.125, 0.250, 0.250, 0.250, 0.250, 0.250, 0.250, 0.500, 0.500, + 0.500, 0.500, 0.500, 0.500, 0.500, 0.500, 0.500, 0.500, 0.500, 0.500, + 0.500, +]) +Vlow = np.array([ + 0.000, 0.200, 0.400, 0.600, 0.800, 1.000, 1.400, 1.800, 2.200, 2.600, 3.000, + 3.400, 4.200, 5.000, 5.800, 6.600, 7.400, 8.200, 9.000, 10.000, +]) +Vspr = np.array([ + 0.200, 0.200, 0.200, 0.200, 0.200, 0.400, 0.400, 0.400, 0.400, 0.400, 0.400, + 0.800, 0.800, 0.800, 0.800, 0.800, 0.800, 0.800, 1.000, 10.000, +]) +# fmt: on + +Dmid = Dlow + Dspr / 2 +Vmid = Vlow + Vspr / 2 +A = (20 / 1000) * (228 / 1000) # TODO: AU + + +def _read_telegram(telegram: str, data: dict[int, list]) -> None: + telegram = telegram.strip().rstrip(";") + values = telegram.split(";") + if len(values) not in (520, 524): + msg = "Invalid telegram length" + raise ValueError(msg) + for i, value in enumerate(values[:-1]): + no = i + 2 + parsed: datetime.date | datetime.time | int | float | str + if no == 5: + parsed = datetime.datetime.strptime(value, "%d.%m.%y").date() + elif no == 6: + parsed = datetime.datetime.strptime(value, "%H:%M:%S").time() + elif no in INT_KEYS or 81 <= no <= 520: + parsed = int(value) + elif no in FLOAT_KEYS: + parsed = float(value) + else: + parsed = value + if no not in data: + data[no] = [] + data[no].append(parsed) + + +def _read_pyatmoslogger(filename: str | PathLike) -> LpmOutput: + time = [] + data: dict = {} + with open(filename, errors="ignore") as f: + f.readline() + for line in f: + timestamp, telegram = line.split(";", maxsplit=1) + try: + _read_telegram(telegram, data) + time.append(datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S")) + except ValueError: + pass + return time, data + + +def _read_lampedusa(filename: str | PathLike) -> LpmOutput: + time = [] + data: dict = {} + with open(filename) as f: + _, _, _, _ = f.readline(), f.readline(), f.readline(), f.readline() + for line in f: + cols = [col.strip('"') for col in line.strip().split(",")] + try: + _read_telegram(cols[2], data) + time.append(datetime.datetime.strptime(cols[0], "%Y-%m-%d %H:%M:%S")) + except ValueError: + pass + return time, data + + +def _read_raw(filename: str | PathLike) -> LpmOutput: + time = [] + data: dict = {} + with open(filename) as f: + for line in f: + try: + _read_telegram(line, data) + time.append(datetime.datetime.combine(data[5][-1], data[6][-1])) + except ValueError: + pass + return time, data + + +def read_lpm(filename: str | PathLike) -> tuple[npt.NDArray, dict[int, npt.NDArray]]: + with open(filename, "rb") as f: + head = f.read(50) + if head.lower().startswith(b"datetime [utc]"): + time, data = _read_pyatmoslogger(filename) + elif b"TOA5" in head: + time, data = _read_lampedusa(filename) + else: + time, data = _read_raw(filename) + return np.ndarray(time), convert_to_numpy(data, FILL_VALUES) diff --git a/cloudnetpy/disdronator/parsivel.py b/cloudnetpy/disdronator/parsivel.py new file mode 100644 index 00000000..a0fb10c2 --- /dev/null +++ b/cloudnetpy/disdronator/parsivel.py @@ -0,0 +1,527 @@ +import datetime +import re +from os import PathLike +from typing import TypeAlias + +import cftime +import netCDF4 +import numpy as np +import numpy.typing as npt + +from cloudnetpy.disdronator.utils import convert_to_numpy + +ParsivelOutput: TypeAlias = tuple[list, dict[int, list]] + +# pyAtmosLogger headers +PYATLO_HEADER: dict[bytes, str | int | None] = { + b"datetime_utc": "%Y-%m-%d %H:%M:%S", + b"rain rate [mm/h]": 1, + b"rain accum [mm]": 2, + b"wawa": 3, # missing in some files + b"Z [dBz]": 7, + b"MOR visibility [m]": 8, + b"sample interval [s]": 9, + b"Signal amplitude": 10, + b"Number of detected particles": 11, + b"Temperature sensor [\xc2\xb0C]": 12, # utf8 + b"Temperature sensor [\xb0C]": 12, # latin1 + b"Serial number": 13, + b"IOP firmware version": 14, + b"Current heating system [A]": 16, + b"Power supply voltage in the sensor [V]": 17, + b"Sensor status": 18, + b"Station name": 22, + b"Rain amount absolute [mm]": 24, + b"Error code": 25, + b"N": 90, + b"v": 91, + b"M": 93, + # Custom headers (Kenttärova and Vehmasmäki): + b"wawa [ww]": 4, + b"wawa [METAR]": 5, + b"wawa [NWS]": 6, + b"DSP firmware version": 15, + b"Start of measurement [DD.MM.YY_HH:MM:SS]": 19, + b"Sensor time [HH:MM:SS]": 20, + b"Sensor date [DD.MM.YY]": 21, + b"Station number": 23, + b"Temperature PCB [\xc2\xb0C]": 26, # utf8 + b"Temperature PCB [\xb0C]": 26, # latin1 + b"Temperature right sensor head [\xc2\xb0C]": 27, # utf8 + b"Temperature right sensor head [\xb0C]": 27, # latin1 + b"Temperature left sensor head [\xc2\xb0C]": 28, # utf8 + b"Temperature left sensor head [\xb0C]": 28, # utf8 + b"Rain intensity 16 bit low [mm/h]": 30, + b"Rain intensity 16 bit high [mm/h]": 31, + b"Rain accumulated 16 bit [mm]": 32, + b"Reflectivity 16 bit [dBZ]": 33, + b"Kinetic energy [J m-2 h-1)]": 34, + b"Snow depth intensity (vol equiv.) [mm/h]": 35, + b"Number of particles": 60, + b"Particle list (empty, see particle file)": None, +} + +# Headers used in OTT's ASDO software. +# https://www.otthydromet.com/en/p-asdo-application-software-ott-parsivel/6610001432 +ASDO_HEADER: dict[bytes, str | int | None] = { + b"Date": "%Y/%m/%d", + b"Time": "%H:%M:%S", + b"Intensity of precipitation (mm/h)": 1, + b"Precipitation since start (mm)": 2, + b"Radar reflectivity (dBz)": 7, + b"MOR Visibility (m)": 8, + b"Signal amplitude of Laserband": 10, + b"Number of detected particles": 11, + b"Temperature in sensor (\xc2\xb0C)": 12, # utf8 + b"Temperature in sensor (\xb0C)": 12, # latin1 + b"Heating current (A)": 16, + b"Sensor voltage (V)": 17, + b"Kinetic Energy": 34, + b"Snow intensity (mm/h)": 35, + b"Weather code SYNOP WaWa": 3, + b"Weather code METAR/SPECI": 5, + b"Weather code NWS": 6, + b"Optics status": 18, + b"Spectrum": 93, +} + + +GRANADA_HEADERS: dict[bytes, str | int | None] = { + b"TIMESTAMP": '"%Y-%m-%d %H:%M:%S"', + b"RECORD": None, + b"rain_intensity": 1, + b"snow_intensity": 35, + b"precipitation": 24, + b"weather_code_wawa": 3, + b"radar_reflectivity": 7, + b"mor_visibility": 8, + b"kinetic_energy": 34, + b"signal_amplitude": 10, + b"sensor_temperature": 12, + b"pbc_temperature": 26, + b"right_temperature": 27, + b"left_temperature": 28, + b"heating_current": 16, + b"sensor_voltage": 17, + b"sensor_status": 18, + b"error_code": 25, + b"number_particles": 11, + b"N": 90, + b"V": 91, + b"spectrum": 93, +} + +# parsivel2nc +# https://github.com/lacros-tropos/parsivel2tools +PARSIVEL2NC_KEYS = { + "interval": 9, + "data_raw": 93, + "number_concentration": 90, + "fall_velocity": 91, + "n_particles": 11, + "rainfall_rate": 1, + "radar_reflectivity": 7, + "E_kin": 24, + "visibility": 8, + "synop_WaWa": 3, + "synop_WW": 4, + "T_sensor": 12, + "sig_laser": 10, + "state_sensor": 18, + "V_sensor": 17, + "I_heating": 16, + "error_code": 25, +} + +# disdroDL +# https://github.com/ruisdael-observatory/disdroDL/blob/main/configs_netcdf/config_general_parsivel.yml +DISDRODL_KEYS = { + "rain_intensity": 1, + "code_4680": 3, + "code_4677": 4, + "code_4678": 5, + "code_NWS": 6, + "reflectivity": 7, + "MOR": 8, + "amplitude": 10, + "n_particles": 11, + "T_sensor": 12, + "I_heating": 16, + "V_power_supply": 17, + "state_sensor": 18, + "absolute_rain_amount": 24, + "error_code": 25, + "T_pcb": 26, + "T_L_sensor_head": 27, + "T_R_sensor_head": 28, + "kinetic_energy": 34, + "snowfall_intensity": 35, + "fieldN": 90, + "fieldV": 91, + "data_raw": 93, +} + +# Similar to netCDF files from pyAtmosLogger but with differences in variables +# names and without any global attributes. +MUNICH_KEYS = { + "status_sensor": 18, + "sensor_time": 20, + "error_code": 25, + "rr": 1, + "rain_accum": 2, + "wawa": 3, + "Ze": 7, + "n_particles": 11, + "snow_intensity": 35, + "sample_interval": 9, + "serial_no": 13, + "firmware_IOP": 14, + "firmware_DSP": 15, + "curr_heating": 16, + "volt_sensor": 17, + "signal_amplitude": 10, + "T_sensor_housing": 12, + "T_pcb": 26, + "T_sensor_right": 27, + "T_sensor_left": 28, + "N": 90, + "v": 91, + "M": 93, +} + +# Possibly an old version of parsivel2nc used in older Leipzig files. +LEIPZIG_KEYS = { + "Meas_Interval": 9, + "RR_Intensity": 1, + "RR_Accumulated": 2, + "RR_Total": 24, + "Synop_WaWa": 3, + "Synop_WW": 4, + "Reflectivity": 7, + "Visibility": 8, + "T_Sensor": 12, + "Sig_Laser": 10, + "N_Particles": 11, + "State_Sensor": 18, + "E_kin": 24, + "V_Sensor": 17, + "I_Heating": 16, + "Error_Code": 25, + "Data_N_Field": 90, + "Data_V_Field": 91, + "Data_Raw": 93, +} + +FLOAT_KEYS = {1, 2, 7, 16, 17, 24, 30, 31, 33, 34, 35} +INT_KEYS = {3, 4, 8, 9, 10, 11, 12, 13, 18, 25, 26, 27, 28, 60} + + +def _read_lines( + telegram: list[str | int | None], + content: bytes, + field_separator: bytes, + decimal_separator: bytes, +) -> ParsivelOutput: + # Expand spectra in ASDO files. + content = re.sub(rb"([^>]*)", _expand_spectrum, content) + expected_len = 0 + for t in telegram: + if t == 90 or t == 91: + expected_len += 32 + elif t == 93: + expected_len += 1024 + else: + expected_len += 1 + data: dict = {t: [] for t in telegram if isinstance(t, int)} + times = [] + dates = [] + datetimes = [] + for line in content.splitlines(): + values = line.rstrip(field_separator).split(field_separator) + if len(values) != expected_len: + continue + try: + row_time = None + row_date = None + row_datetime = None + row_data: dict = {} + for t in telegram: + if t in FLOAT_KEYS: + row_data[t] = float(values[0].replace(decimal_separator, b".")) + values = values[1:] + elif t in INT_KEYS: + row_data[t] = int(values[0]) + values = values[1:] + elif t in (90, 91): + row_data[t] = [float(x) for x in values[:32]] + values = values[32:] + elif t == 93: + spectrum = [float(x) for x in values[:1024]] + row_data[t] = np.reshape(spectrum, (32, 32)) + values = values[1024:] + elif isinstance(t, str): + dt = datetime.datetime.strptime(values[0].decode(), t) + if "%H" in t and "%Y" not in t: + row_time = dt.time() + elif "%H" not in t: + row_date = dt.date() + else: + row_datetime = dt + values = values[1:] + elif t is None: + values = values[1:] + else: + row_data[t] = values[0] + values = values[1:] + if row_time is not None: + times.append(row_time) + if row_date is not None: + dates.append(row_date) + if row_datetime is not None: + datetimes.append(row_datetime) + for t, value in row_data.items(): + data[t].append(value) + except ValueError: + continue + if not datetimes: + datetimes = [ + datetime.datetime.combine(date, time) + for date, time in zip(dates, times, strict=True) + ] + return datetimes, data + + +def _expand_spectrum(m: re.Match) -> bytes: + if m[1] == b"ZERO": + return b"0;" * 1024 + return b";".join([x if x else b"0" for x in m[1].split(b";")]) + + +def _read_typ_op4a(content: bytes) -> dict: + lines = content.splitlines() + if lines[0] != b"TYP OP4A": + msg = "Invalid message" + raise ValueError(msg) + data: dict = {} + for line in lines[1:]: + key, value = line.split(b":", maxsplit=1) + num = int(key) + if num in INT_KEYS: + data[num] = int(value) + elif num in FLOAT_KEYS: + data[num] = float(value) + elif num in (90, 91): + data[num] = [float(x) for x in value.rstrip(b";").split(b";")] + elif num == 93: + spectrum = [int(x) for x in value.rstrip(b";").split(b";")] + data[num] = np.reshape(spectrum, (32, 32)) + return data + + +def _read_pyatmoslogger(filename: str | PathLike) -> ParsivelOutput: + with open(filename, "rb") as f: + header = f.readline().rstrip(b"\r\n") + content = f.read() + v_header = ";".join(f"v{i:02}" for i in range(32)) + N_header = ";".join(f"N{i:02}" for i in range(32)) + M_header = ";".join(f"M_{i}_{j}" for i in range(32) for j in range(32)) + header = ( + header.replace(N_header.encode(), b"N") + .replace(v_header.encode(), b"v") + .replace(M_header.encode(), b"M") + ) + telegram = [PYATLO_HEADER[key] for key in header.split(b";")] + return _read_lines(telegram, content, b";", b".") + + +def _read_asdo(filename: str | PathLike) -> ParsivelOutput: + with open(filename, "rb") as f: + headers = f.readline().rstrip(b"\r\n").split(b";") + content = f.read() + telegram = [ASDO_HEADER[header] for header in headers] + return _read_lines(telegram, content, b";", b",") + + +def _read_granada(filename: str | PathLike) -> ParsivelOutput: + with open(filename, "rb") as f: + _, header, _, _ = ( + f.readline(), + f.readline().rstrip(b"\r\n"), + f.readline(), + f.readline(), + ) + content = f.read() + v_header = ",".join(f'"V({i + 1})"' for i in range(32)) + N_header = ",".join(f'"N({i + 1})"' for i in range(32)) + M_header = ",".join(f'"spectrum({i + 1})"' for i in range(1024)) + header = ( + header.replace(N_header.encode(), b"N") + .replace(v_header.encode(), b"V") + .replace(M_header.encode(), b"spectrum") + ) + telegram = [GRANADA_HEADERS[key.strip(b'"')] for key in header.split(b",")] + return _read_lines(telegram, content, b",", b".") + + +def _read_headerless( + filename: str | PathLike, + telegram: list[int | str | None], + field_separator: bytes, + decimal_separator: bytes, +) -> ParsivelOutput: + with open(filename, "rb") as f: + content = f.read() + return _read_lines(telegram, content, field_separator, decimal_separator) + + +def _read_hyytiala(filename: str | PathLike) -> ParsivelOutput: + time: list = [] + data: dict = {} + with open(filename, "rb") as f: + content = f.read() + for m in re.finditer( + rb"\[(?P\d+)-(?P\d+)-(?P\d+) " + rb"(?P\d+):(?P\d+):(?P\d+)\r?\n" + rb"(?P[^\]]*)\]", + content, + ): + try: + record = _read_typ_op4a(m["output"]) + timestamp = datetime.datetime( + int(m["year"]), + int(m["month"]), + int(m["day"]), + int(m["hour"]), + int(m["minute"]), + int(m["second"]), + ) + except ValueError: + continue + for key in record: + if key not in data: + data[key] = [None] * len(time) + for key in data: + data[key].append(record.get(key)) + time.append(timestamp) + return time, data + + +def _read_parsivel2nc(filename: str | PathLike) -> ParsivelOutput: + with netCDF4.Dataset(filename) as nc: + time = cftime.num2pydate(nc["time"][:], units=nc["time"].units) + data = {num: nc[key][:] for key, num in PARSIVEL2NC_KEYS.items()} + # The data logger converts mm/h to m/s, so we need to revert this. + data[1] *= 3600 * 1000 + # The data logger attempts to convert temperature from °C to K, but this + # is incorrectly done only for the first value. + data[12][0] -= 273 + # Sensor serial number from global attribute. + data[13] = np.repeat(int(nc.Sensor_ID), len(time)) + return time, data + + +def _read_disdrodl(filename: str | PathLike) -> ParsivelOutput: + with netCDF4.Dataset(filename) as nc: + time = cftime.num2pydate(nc["time"][:], units=nc["time"].units) + data = { + num: nc[key][:] for key, num in DISDRODL_KEYS.items() if key in nc.variables + } + data[9] = np.repeat(nc["time_interval"][:], len(time)) + data[13] = np.repeat(int(nc.sensor_serial_number), len(time)) + return time, data + + +def _read_munich(filename: str | PathLike) -> ParsivelOutput: + with netCDF4.Dataset(filename) as nc: + time = cftime.num2pydate(nc["time"][:], units=nc["time"].units) + data = {num: nc[key][:] for key, num in MUNICH_KEYS.items()} + data[93] = np.transpose(data[93], (0, 2, 1)) + return time, data + + +def _read_leipzig(filename: str | PathLike) -> ParsivelOutput: + with netCDF4.Dataset(filename) as nc: + time = cftime.num2pydate(nc["Meas_Time"][:], units="seconds since 1970-01-01") + data = {num: nc[key][:] for key, num in LEIPZIG_KEYS.items()} + data[13] = np.repeat(int(nc.Sensor_ID), len(time)) + data[22] = np.repeat(nc.Station_Name, len(time)) + data[23] = np.repeat(nc.Station_ID, len(time)) + return time, data + + +def _read_parsivel( + filename: str | PathLike, + telegram: list[int | str | None] | None = None, + field_separator: str = ";", + decimal_separator: str = ".", +) -> ParsivelOutput: + try: + with netCDF4.Dataset(filename) as nc: + if "number_concentration" in nc.variables: + return _read_parsivel2nc(filename) + if "fieldN" in nc.variables: + return _read_disdrodl(filename) + if "N" in nc.variables: + return _read_munich(filename) + if "Data_N_Field" in nc.variables: + return _read_leipzig(filename) + msg = "Unsupported netCDF file" + raise ValueError(msg) + except OSError: + pass + with open(filename, "rb") as f: + head = f.read(50) + if head.startswith(b"datetime_utc;"): + return _read_pyatmoslogger(filename) + if head.startswith(b"Date;Time;"): + return _read_asdo(filename) + if head.startswith(b'"TOA5"'): + return _read_granada(filename) + if b"TYP OP4A" in head: + return _read_hyytiala(filename) + if telegram is None: + msg = "telegram must be defined" + raise ValueError(msg) + return _read_headerless( + filename, telegram, field_separator.encode(), decimal_separator.encode() + ) + + +def read_parsivel( + filename: str | PathLike, + telegram: list[int | str | None] | None = None, + field_separator: str = ";", + decimal_separator: str = ".", +) -> tuple[npt.NDArray, dict[int, npt.NDArray]]: + time, data = _read_parsivel(filename, telegram, field_separator, decimal_separator) + return np.array(time), convert_to_numpy(data, {}, INT_KEYS, FLOAT_KEYS) + + +# fmt: off +# mm +Dmid = np.array([ + 0.062, 0.187, 0.312, 0.437, 0.562, 0.687, 0.812, 0.937, 1.062, 1.187, 1.375, + 1.625, 1.875, 2.125, 2.375, 2.750, 3.250, 3.750, 4.250, 4.750, 5.500, 6.500, + 7.500, 8.500, 9.500, 11.000, 13.000, 15.000, 17.000, 19.000, 21.500, 24.500, +]) +# mm +Dspr = np.array( [ + 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.250, + 0.250, 0.250, 0.250, 0.250, 0.500, 0.500, 0.500, 0.500, 0.500, 1.000, 1.000, + 1.000, 1.000, 1.000, 2.000, 2.000, 2.000, 2.000, 2.000, 3.000, 3.000, +]) +# m/s +Vmid = np.array([ + 0.050, 0.150, 0.250, 0.350, 0.450, 0.550, 0.650, 0.750, 0.850, 0.950, 1.100, + 1.300, 1.500, 1.700, 1.900, 2.200, 2.600, 3.000, 3.400, 3.800, 4.400, 5.200, + 6.000, 6.800, 7.600, 8.800, 10.400, 12.000, 13.600, 15.200, 17.600, 20.800, +]) +# m/s +Vspr = np.array([ + 0.100, 0.100, 0.100, 0.100, 0.100, 0.100, 0.100, 0.100, 0.100, 0.100, 0.200, + 0.200, 0.200, 0.200, 0.200, 0.400, 0.400, 0.400, 0.400, 0.400, 0.800, 0.800, + 0.800, 0.800, 0.800, 1.600, 1.600, 1.600, 1.600, 1.600, 3.200, 3.200, +]) +# fmt: on + +A = 0.0054 # sampling area [m2] 30 mm * 180 mm diff --git a/cloudnetpy/disdronator/rd80.py b/cloudnetpy/disdronator/rd80.py new file mode 100644 index 00000000..8a865f66 --- /dev/null +++ b/cloudnetpy/disdronator/rd80.py @@ -0,0 +1,31 @@ +import datetime +from os import PathLike + + +def read_rd80(filename: str | PathLike) -> tuple[list, dict[str, list]]: + time = [] + data: dict[str, list] = { + "n": [], + "Status": [], + "Interval [s]": [], + "RI [mm/h]": [], + "RA [mm]": [], + "RAT [mm]": [], + } + with open(filename) as f: + keys = f.readline().rstrip("\r\n").split("\t") + for line in f: + try: + row = dict(zip(keys, line.rstrip("\r\n").split("\t"), strict=True)) + except ValueError: + continue + dt = datetime.datetime.strptime( + f"{row['YYYY-MM-DD']} {row['hh:mm:ss']}", "%Y-%m-%d %H:%M:%S" + ) + time.append(dt) + data["n"].append([int(row[f"n{i + 1}"]) for i in range(20)]) + data["Status"].append(row["Status"]) + data["Interval [s]"].append(int(row["Interval [s]"])) + for key in ("RI [mm/h]", "RA [mm]", "RAT [mm]"): + data[key].append(float(row[key].replace(",", "."))) + return time, data diff --git a/cloudnetpy/disdronator/utils.py b/cloudnetpy/disdronator/utils.py new file mode 100644 index 00000000..5917892c --- /dev/null +++ b/cloudnetpy/disdronator/utils.py @@ -0,0 +1,27 @@ +from collections.abc import Container + +import numpy as np +from numpy import ma + + +def convert_to_numpy( + data: dict, + fill_values: dict, + int_keys: Container | None = None, + float_keys: Container | None = None, +) -> dict: + output = {} + for key, value in data.items(): + arr = np.array(value) + if key in fill_values: + arr = ma.masked_where(arr == fill_values[key], arr) + if int_keys is not None and key in int_keys: + new_arr = np.astype(arr, np.int32) + if not np.all(arr == new_arr): + msg = "Cannot convert non-integer float to integer" + raise ValueError(msg) + arr = new_arr + elif float_keys is not None and key in float_keys: + arr = np.astype(arr, np.float32) + output[key] = arr + return output diff --git a/pyproject.toml b/pyproject.toml index cb264ef2..c1f53636 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,7 +52,7 @@ Changelog = "https://github.com/actris-cloudnet/cloudnetpy/blob/main/CHANGELOG.m check_untyped_defs = true [[tool.mypy.overrides]] -module = ["mpl_toolkits.*", "scipy.*", "voodoonet.*"] +module = ["cftime.*", "mpl_toolkits.*", "scipy.*", "voodoonet.*"] ignore_missing_imports = true [tool.release-version] From 402e9dbdfde2324056d8995befe1373c4cc1591e Mon Sep 17 00:00:00 2001 From: Tuomas Siipola Date: Wed, 11 Mar 2026 09:56:22 +0200 Subject: [PATCH 2/7] Add support for RD-80 disdrometer --- cloudnetpy/disdronator/rd80.py | 33 +++- cloudnetpy/disdronator/utils.py | 4 +- cloudnetpy/instruments/__init__.py | 2 +- .../instruments/disdrometer/__init__.py | 1 + cloudnetpy/instruments/disdrometer/rd80.py | 143 ++++++++++++++++++ cloudnetpy/instruments/instruments.py | 7 + cloudnetpy/plotting/plot_meta.py | 4 +- tests/unit/data/rd80/RD-211231-181400.txt | 4 + tests/unit/test_disdrometer.py | 21 +++ 9 files changed, 211 insertions(+), 8 deletions(-) create mode 100644 cloudnetpy/instruments/disdrometer/rd80.py create mode 100644 tests/unit/data/rd80/RD-211231-181400.txt diff --git a/cloudnetpy/disdronator/rd80.py b/cloudnetpy/disdronator/rd80.py index 8a865f66..26e467f1 100644 --- a/cloudnetpy/disdronator/rd80.py +++ b/cloudnetpy/disdronator/rd80.py @@ -1,12 +1,16 @@ import datetime from os import PathLike +import numpy as np +import numpy.typing as npt -def read_rd80(filename: str | PathLike) -> tuple[list, dict[str, list]]: +from cloudnetpy.disdronator.utils import convert_to_numpy + + +def read_rd80(filename: str | PathLike) -> tuple[npt.NDArray, dict[str, npt.NDArray]]: time = [] data: dict[str, list] = { "n": [], - "Status": [], "Interval [s]": [], "RI [mm/h]": [], "RA [mm]": [], @@ -24,8 +28,29 @@ def read_rd80(filename: str | PathLike) -> tuple[list, dict[str, list]]: ) time.append(dt) data["n"].append([int(row[f"n{i + 1}"]) for i in range(20)]) - data["Status"].append(row["Status"]) data["Interval [s]"].append(int(row["Interval [s]"])) for key in ("RI [mm/h]", "RA [mm]", "RAT [mm]"): data[key].append(float(row[key].replace(",", "."))) - return time, data + return np.array(time), convert_to_numpy(data) + + +# fmt: off +Dmid = np.array([ + 0.359, 0.455, 0.551, 0.656, 0.771, 0.913, 1.116, 1.331, 1.506, 1.665, 1.912, + 2.259, 2.584, 2.869, 3.198, 3.544, 3.916, 4.350, 4.859, 5.373 +]) # mm +Dspr = np.array([ + 0.092, 0.100, 0.091, 0.119, 0.112, 0.172, 0.233, 0.197, 0.153, 0.166, 0.329, + 0.364, 0.286, 0.284, 0.374, 0.319, 0.423, 0.446, 0.572, 0.455 +]) # mm +Dlow = np.array([ + 0.313, 0.405, 0.505, 0.596, 0.715, 0.827, 0.999, 1.232, 1.429, 1.582, 1.748, + 2.077, 2.441, 2.727, 3.011, 3.385, 3.704, 4.127, 4.573, 5.145 +]) # mm +Dupp = np.append(Dlow[1:], Dlow[-1] + Dspr[-1]) # mm +Vmid = np.array([ + 1.435, 1.862, 2.267, 2.692, 3.154, 3.717, 4.382, 4.986, 5.423, 5.793, 6.315, + 7.009, 7.546, 7.903, 8.258, 8.556, 8.784, 8.965, 9.076, 9.137 +]) # m s-1 +A = 0.005 # m2 +# fmt: on diff --git a/cloudnetpy/disdronator/utils.py b/cloudnetpy/disdronator/utils.py index 5917892c..f4e9012a 100644 --- a/cloudnetpy/disdronator/utils.py +++ b/cloudnetpy/disdronator/utils.py @@ -6,14 +6,14 @@ def convert_to_numpy( data: dict, - fill_values: dict, + fill_values: dict | None = None, int_keys: Container | None = None, float_keys: Container | None = None, ) -> dict: output = {} for key, value in data.items(): arr = np.array(value) - if key in fill_values: + if fill_values is not None and key in fill_values: arr = ma.masked_where(arr == fill_values[key], arr) if int_keys is not None and key in int_keys: new_arr = np.astype(arr, np.int32) diff --git a/cloudnetpy/instruments/__init__.py b/cloudnetpy/instruments/__init__.py index 5d0eddb1..e58334cd 100644 --- a/cloudnetpy/instruments/__init__.py +++ b/cloudnetpy/instruments/__init__.py @@ -2,7 +2,7 @@ from .bowtie import bowtie2nc from .ceilo import ceilo2nc from .copernicus import copernicus2nc -from .disdrometer import parsivel2nc, thies2nc +from .disdrometer import parsivel2nc, rd802nc, thies2nc from .fd12p import fd12p2nc from .galileo import galileo2nc from .hatpro import hatpro2l1c, hatpro2nc diff --git a/cloudnetpy/instruments/disdrometer/__init__.py b/cloudnetpy/instruments/disdrometer/__init__.py index a93593f6..75c39ed6 100644 --- a/cloudnetpy/instruments/disdrometer/__init__.py +++ b/cloudnetpy/instruments/disdrometer/__init__.py @@ -1,3 +1,4 @@ from .common import ATTRIBUTES from .parsivel import parsivel2nc +from .rd80 import rd802nc from .thies import thies2nc diff --git a/cloudnetpy/instruments/disdrometer/rd80.py b/cloudnetpy/instruments/disdrometer/rd80.py new file mode 100644 index 00000000..6c3d9094 --- /dev/null +++ b/cloudnetpy/instruments/disdrometer/rd80.py @@ -0,0 +1,143 @@ +import datetime +from collections import defaultdict +from collections.abc import Iterable +from os import PathLike +from uuid import UUID + +import numpy as np +from numpy import ma + +from cloudnetpy import output +from cloudnetpy.cloudnetarray import CloudnetArray +from cloudnetpy.constants import MM_TO_M, SEC_IN_HOUR +from cloudnetpy.disdronator.rd80 import A, Dlow, Dmid, Dspr, Dupp, Vmid, read_rd80 +from cloudnetpy.instruments import instruments +from cloudnetpy.instruments.cloudnet_instrument import CloudnetInstrument +from cloudnetpy.utils import get_uuid + +from .common import ATTRIBUTES + + +def rd802nc( + input_file: str | PathLike | list[str | PathLike], + output_file: str | PathLike, + site_meta: dict, + uuid: str | UUID | None = None, + date: str | datetime.date | None = None, +) -> UUID: + """Converts Distromet RD-80 disdrometer data into Cloudnet Level 1b netCDF file. + + Args: + input_file: Filename(s) of RD-80 disdrometer data file(s). Can be a + single file or a list of files. + output_file: Output filename for the netCDF file. + site_meta: Dictionary containing information about the site. Required + key is `name`. + uuid: Set specific UUID for the file. If not provided, a new UUID will + be generated. + date: Expected date of the measurements as YYYY-MM-DD or datetime.date + object. If not provided, the date will be inferred from the input + file(s). + + Returns: + UUID of the generated file. + + Examples: + >>> from cloudnetpy.instruments import rd802nc + >>> site_meta = {'name': 'Campina', 'altitude': 30, 'latitude': -2.18, + 'longitude': -59.02} + >>> uuid = rd802nc('RD-220101-181400.txt', 'rd80.nc', site_meta) + """ + if isinstance(date, str): + date = datetime.date.fromisoformat(date) + uuid = get_uuid(uuid) + if isinstance(input_file, str | PathLike): + input_file = [input_file] + disdrometer = Rd80(input_file, site_meta, date) + disdrometer.sort_and_dedup_timestamps() + disdrometer.convert_to_cloudnet_arrays() + disdrometer.add_meta() + attributes = output.add_time_attribute(ATTRIBUTES, disdrometer.date) + output.update_attributes(disdrometer.data, attributes) + output.save_level1b(disdrometer, output_file, uuid) + return uuid + + +class Rd80(CloudnetInstrument): + def __init__( + self, + filenames: Iterable[str | PathLike], + site_meta: dict, + expected_date: datetime.date | None = None, + ) -> None: + super().__init__() + self.site_meta = site_meta + self._read_data(filenames) + self._screen_time(expected_date) + self.n_velocity = 20 + self.n_diameter = 20 + self.serial_number = None + self.instrument = instruments.RD80 + + def _read_data(self, filenames: Iterable[str | PathLike]) -> None: + times = [] + data = defaultdict(list) + for filename in filenames: + file_time, file_data = read_rd80(filename) + times.append(file_time) + for key, value in file_data.items(): + data[key].append(value) + self.raw_time = np.concatenate(times) + self.raw_data = {key: np.concatenate(value) for key, value in data.items()} + + def _screen_time(self, expected_date: datetime.date | None = None) -> None: + if expected_date is None: + self.date = self.time[0].date() + else: + is_valid = [dt.date() == expected_date for dt in self.raw_time] + self.raw_time = self.raw_time[is_valid] + for key in self.raw_data: + self.raw_data[key] = self.raw_data[key][is_valid] + self.date = expected_date + + def sort_and_dedup_timestamps(self) -> None: + self.raw_time, time_ind = np.unique(self.raw_time, return_index=True) + for key in self.raw_data: + self.raw_data[key] = self.raw_data[key][time_ind] + + def add_meta(self) -> None: + valid_keys = ("latitude", "longitude", "altitude") + for key, value in self.site_meta.items(): + name = key.lower() + if name in valid_keys: + self.data[name] = CloudnetArray(float(value), name) + + def convert_to_cloudnet_arrays(self) -> None: + mmh_to_ms = SEC_IN_HOUR / MM_TO_M + mm_to_m = 1000 + hour = ( + self.raw_time - datetime.datetime.combine(self.date, datetime.time()) + ) / datetime.timedelta(hours=1) + rainfall_rate = self.raw_data["RI [mm/h]"] + n_particles = np.sum(self.raw_data["n"], axis=1) + dt = self.raw_data["Interval [s]"] + n = self.raw_data["n"] + numcon = n / (A * dt[:, np.newaxis] * Vmid * Dspr) + Z = np.sum(n / Vmid * Dmid**6, axis=1) / (A * dt) + ZdB = 10 * ma.log10(ma.masked_where(Z == 0, Z)) + ZdB[ZdB < -10] = ma.masked + self.data["diameter"] = CloudnetArray(Dmid / mm_to_m, "diameter") + self.data["diameter_spread"] = CloudnetArray(Dspr / mm_to_m, "diameter_spread") + self.data["diameter_bnds"] = CloudnetArray( + np.dstack([Dlow, Dupp]) / mm_to_m, "diameter_bnds" + ) + self.data["time"] = CloudnetArray(hour.astype(np.float32), "time") + self.data["interval"] = CloudnetArray(dt, "interval") + self.data["rainfall_rate"] = CloudnetArray( + rainfall_rate / mmh_to_ms, "rainfall_rate" + ) + self.data["n_particles"] = CloudnetArray(n_particles, "n_particles") + self.data["number_concentration"] = CloudnetArray( + numcon, "number_concentration" + ) + self.data["radar_reflectivity"] = CloudnetArray(ZdB, "radar_reflectivity") diff --git a/cloudnetpy/instruments/instruments.py b/cloudnetpy/instruments/instruments.py index bb24d31a..da0a6c80 100644 --- a/cloudnetpy/instruments/instruments.py +++ b/cloudnetpy/instruments/instruments.py @@ -206,6 +206,13 @@ class Instrument: model="LNM", ) +RD80 = Instrument( + manufacturer="Disdromet", + domain="disdrometer", + category="disdrometer", + model="RD-80", +) + PLUVIO2 = Instrument( manufacturer="OTT HydroMet", domain="rain-gauge", diff --git a/cloudnetpy/plotting/plot_meta.py b/cloudnetpy/plotting/plot_meta.py index f246dcc3..aefb8cc4 100644 --- a/cloudnetpy/plotting/plot_meta.py +++ b/cloudnetpy/plotting/plot_meta.py @@ -424,7 +424,9 @@ class PlotMeta(NamedTuple): plot_range=(1e-5, 1e-2), log_scale=True, ), - "number_concentration": PlotMeta(plot_range=(1e-2, 1e3), log_scale=True), + "number_concentration": PlotMeta( + plot_range=(1e-2, 1e3), log_scale=True, mask_zeros=True + ), "fall_velocity": PlotMeta( plot_range=(0, 10), mask_zeros=True, diff --git a/tests/unit/data/rd80/RD-211231-181400.txt b/tests/unit/data/rd80/RD-211231-181400.txt new file mode 100644 index 00000000..00274a11 --- /dev/null +++ b/tests/unit/data/rd80/RD-211231-181400.txt @@ -0,0 +1,4 @@ +YYYY-MM-DD hh:mm:ss Status Interval [s] n1 n2 n3 n4 n5 n6 n7 n8 n9 n10 n11 n12 n13 n14 n15 n16 n17 n18 n19 n20 RI [mm/h] RA [mm] RAT [mm] +2022-01-01 12:42:00 60 34 15 2 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0,0284 0,0005 61,9251 +2022-01-01 12:43:00 60 110 85 14 21 1 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0,1546 0,0026 61,9277 +2022-01-01 12:44:00 60 52 60 55 163 184 153 16 1 0 0 0 0 0 0 0 0 0 0 0 0 1,8136 0,0302 61,9579 diff --git a/tests/unit/test_disdrometer.py b/tests/unit/test_disdrometer.py index 0d153864..d7e2f3b0 100644 --- a/tests/unit/test_disdrometer.py +++ b/tests/unit/test_disdrometer.py @@ -439,3 +439,24 @@ class TestInvalidCharacters(Check): def test_skips_invalid_row(self): assert len(self.nc.variables["time"]) == 4 + + +class TestRd80(Check): + date = "2022-01-01" + temp_dir = TemporaryDirectory() + temp_path = temp_dir.name + "/test.nc" + filename = f"{SCRIPT_PATH}/data/rd80/RD-211231-181400.txt" + site_meta = SITE_META + uuid = disdrometer.rd802nc(filename, temp_path, site_meta, date=date) + + def test_processing(self): + assert self.nc.title == f"RD-80 disdrometer from {self.site_meta['name']}" + assert self.nc.year == "2022" + assert self.nc.month == "01" + assert self.nc.day == "01" + assert self.nc.location == "Kumpula" + assert self.nc.cloudnet_file_type == "disdrometer" + assert np.allclose( + self.nc["rainfall_rate"][:], + [0.0284 / 3600000, 0.1546 / 3600000, 1.8136 / 3600000], + ) From 149a10686180d3fc49ec8becd82cebd3561e6cae Mon Sep 17 00:00:00 2001 From: Tuomas Siipola Date: Wed, 11 Mar 2026 12:58:23 +0200 Subject: [PATCH 3/7] Fix manufacturer name --- cloudnetpy/instruments/instruments.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudnetpy/instruments/instruments.py b/cloudnetpy/instruments/instruments.py index da0a6c80..130cfd2e 100644 --- a/cloudnetpy/instruments/instruments.py +++ b/cloudnetpy/instruments/instruments.py @@ -207,7 +207,7 @@ class Instrument: ) RD80 = Instrument( - manufacturer="Disdromet", + manufacturer="Distromet", domain="disdrometer", category="disdrometer", model="RD-80", From c6809c644bb9b08421d81b360ba1c44ec57d89f6 Mon Sep 17 00:00:00 2001 From: Tuomas Siipola Date: Wed, 11 Mar 2026 12:59:05 +0200 Subject: [PATCH 4/7] Fix Numpy array initialization --- cloudnetpy/disdronator/lpm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudnetpy/disdronator/lpm.py b/cloudnetpy/disdronator/lpm.py index 6724e0fd..e7979007 100644 --- a/cloudnetpy/disdronator/lpm.py +++ b/cloudnetpy/disdronator/lpm.py @@ -129,4 +129,4 @@ def read_lpm(filename: str | PathLike) -> tuple[npt.NDArray, dict[int, npt.NDArr time, data = _read_lampedusa(filename) else: time, data = _read_raw(filename) - return np.ndarray(time), convert_to_numpy(data, FILL_VALUES) + return np.array(time), convert_to_numpy(data, FILL_VALUES) From 42d65f104c9291eb041c248219dccb75f1489c0b Mon Sep 17 00:00:00 2001 From: Tuomas Siipola Date: Wed, 11 Mar 2026 13:02:44 +0200 Subject: [PATCH 5/7] Fix dimensions of Numpy array --- cloudnetpy/instruments/disdrometer/rd80.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudnetpy/instruments/disdrometer/rd80.py b/cloudnetpy/instruments/disdrometer/rd80.py index 6c3d9094..e8c866cb 100644 --- a/cloudnetpy/instruments/disdrometer/rd80.py +++ b/cloudnetpy/instruments/disdrometer/rd80.py @@ -129,7 +129,7 @@ def convert_to_cloudnet_arrays(self) -> None: self.data["diameter"] = CloudnetArray(Dmid / mm_to_m, "diameter") self.data["diameter_spread"] = CloudnetArray(Dspr / mm_to_m, "diameter_spread") self.data["diameter_bnds"] = CloudnetArray( - np.dstack([Dlow, Dupp]) / mm_to_m, "diameter_bnds" + np.stack((Dlow, Dupp), axis=1) / mm_to_m, "diameter_bnds" ) self.data["time"] = CloudnetArray(hour.astype(np.float32), "time") self.data["interval"] = CloudnetArray(dt, "interval") From 25c724b116d9cd47dab3daa02ed7aedccfaf50bf Mon Sep 17 00:00:00 2001 From: Tuomas Siipola Date: Wed, 11 Mar 2026 13:04:42 +0200 Subject: [PATCH 6/7] Simplify code --- cloudnetpy/disdronator/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cloudnetpy/disdronator/utils.py b/cloudnetpy/disdronator/utils.py index f4e9012a..307dc508 100644 --- a/cloudnetpy/disdronator/utils.py +++ b/cloudnetpy/disdronator/utils.py @@ -16,12 +16,12 @@ def convert_to_numpy( if fill_values is not None and key in fill_values: arr = ma.masked_where(arr == fill_values[key], arr) if int_keys is not None and key in int_keys: - new_arr = np.astype(arr, np.int32) + new_arr = arr.astype(np.int32) if not np.all(arr == new_arr): msg = "Cannot convert non-integer float to integer" raise ValueError(msg) arr = new_arr elif float_keys is not None and key in float_keys: - arr = np.astype(arr, np.float32) + arr = arr.astype(np.float32) output[key] = arr return output From a8d7874ea1beee1b44743254be3cc9211676ab1c Mon Sep 17 00:00:00 2001 From: Tuomas Siipola Date: Wed, 11 Mar 2026 13:13:34 +0200 Subject: [PATCH 7/7] Fix attribute in docstring --- cloudnetpy/plotting/plot_meta.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudnetpy/plotting/plot_meta.py b/cloudnetpy/plotting/plot_meta.py index aefb8cc4..523f7a75 100644 --- a/cloudnetpy/plotting/plot_meta.py +++ b/cloudnetpy/plotting/plot_meta.py @@ -19,7 +19,7 @@ class PlotMeta(NamedTuple): moving_average: Whether to plot a moving average in a 1d plot. contour: Whether to plot contours on top of a filled colormap. zero_line: Whether to plot a zero line in a 1d plot. - mask_zero: Whether to mask zero values in the plot. + mask_zeros: Whether to mask zero values in the plot. time_smoothing_duration: The duration of the time smoothing window (in 2d plots) in minutes. """