Skip to content

Commit b0977a1

Browse files
committed
time series parsers use names in output files
1 parent 4aa5b12 commit b0977a1

3 files changed

Lines changed: 17 additions & 28 deletions

File tree

src/stagpy/stagyydata.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,14 +147,14 @@ def _cached_extra(self) -> dict[str, dt.Tseries]:
147147
@cached_property
148148
def _data(self) -> DataFrame | None:
149149
timefile = self.sdat.filename("TimeSeries.h5")
150-
data = stagyyparsers.time_series_h5(timefile, list(phyvars.TIME.keys()))
150+
data = stagyyparsers.time_series_h5(timefile)
151151
if data is not None:
152152
return data
153153
timefile = self.sdat.filename("time.dat")
154154
if self.sdat.hdf5 and not timefile.is_file():
155155
# check legacy folder as well
156156
timefile = self.sdat.filename("time.dat", force_legacy=True)
157-
data = stagyyparsers.time_series(timefile, list(phyvars.TIME.keys()))
157+
data = stagyyparsers.time_series(timefile)
158158
return data
159159

160160
@property

src/stagpy/stagyyparsers.py

Lines changed: 12 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -52,16 +52,11 @@ def _tidy_names(
5252
del names[nnames:]
5353

5454

55-
def time_series(timefile: Path, colnames: list[str]) -> DataFrame | None:
55+
def time_series(timefile: Path) -> DataFrame | None:
5656
"""Read temporal series text file.
5757
58-
If `colnames` is too long, it will be truncated. If it is too short,
59-
additional numeric column names from 0 to N-1 will be attributed to the N
60-
extra columns present in `timefile`.
61-
6258
Args:
6359
timefile: path of the time.dat file.
64-
colnames: names of the variables expected in `timefile` (may be modified).
6560
6661
Returns:
6762
A `pandas.DataFrame` containing the time series, organized by
@@ -71,13 +66,9 @@ def time_series(timefile: Path, colnames: list[str]) -> DataFrame | None:
7166
return None
7267

7368
with timefile.open() as fid:
74-
names_in_file = fid.readline().strip().split()
75-
_tidy_names(
76-
colnames,
77-
len(names_in_file) + 9, # extra columns in case some were added mid-run
78-
extra_names=names_in_file[len(colnames) + 1 :],
79-
)
80-
colnames.insert(0, "istep")
69+
colnames = fid.readline().strip().split()
70+
# extra columns in case some were added mid-run
71+
_tidy_names(colnames, len(colnames) + 10)
8172

8273
data = pd.read_csv(
8374
timefile,
@@ -110,15 +101,11 @@ def time_series(timefile: Path, colnames: list[str]) -> DataFrame | None:
110101
return data
111102

112103

113-
def time_series_h5(timefile: Path, colnames: list[str]) -> DataFrame | None:
104+
def time_series_h5(timefile: Path) -> DataFrame | None:
114105
"""Read temporal series HDF5 file.
115106
116-
If `colnames` is too long, it will be truncated. If it is too short,
117-
additional column names will be deduced from the content of the file.
118-
119107
Args:
120108
timefile: path of the TimeSeries.h5 file.
121-
colnames: names of the variables expected in `timefile` (may be modified).
122109
123110
Returns:
124111
A `pandas.DataFrame` containing the time series, organized by
@@ -130,10 +117,14 @@ def time_series_h5(timefile: Path, colnames: list[str]) -> DataFrame | None:
130117
dset = h5f["tseries"]
131118
_, ncols = dset.shape
132119
ncols -= 1 # first is istep
133-
h5names = h5f["names"].asstr()[len(colnames) + 1 :]
134-
_tidy_names(colnames, ncols, h5names)
120+
colnames = list(h5f["names"].asstr()[()])
121+
_tidy_names(colnames, ncols + 1)
135122
data = dset[()]
136-
pdf = pd.DataFrame(data[:, 1:], index=data[:, 0].astype(np.int64), columns=colnames)
123+
pdf = pd.DataFrame(
124+
data[:, 1:],
125+
index=data[:, 0].astype(np.int64),
126+
columns=colnames[1:],
127+
)
137128
# remove duplicated lines in case of restart
138129
return pdf.loc[~pdf.index.duplicated(keep="last")]
139130

tests/test_parsers.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,23 +6,21 @@
66

77
def test_time_series_prs(sdat_legacy: StagyyData) -> None:
88
sdat = sdat_legacy
9-
names = ["aa", "bb", "cc"]
10-
data = prs.time_series(sdat.filename("time.dat"), list(names))
9+
data = prs.time_series(sdat.filename("time.dat"))
1110
assert data is not None
12-
assert (data.columns[:3] == names).all()
1311
assert (data.columns[3:6] == ["Tmin", "Tmean", "Tmax"]).all()
1412

1513

1614
def test_time_series_h5(sdat_h5: StagyyData) -> None:
1715
sdat = sdat_h5
1816
assert sdat.hdf5 is not None
19-
data = prs.time_series_h5(sdat.hdf5 / "TimeSeries.h5", colnames=[])
17+
data = prs.time_series_h5(sdat.hdf5 / "TimeSeries.h5")
2018
assert data is not None
2119
assert (data.columns[3:6] == ["Tmin", "Tmean", "Tmax"]).all()
2220

2321

2422
def test_time_series_invalid_prs() -> None:
25-
assert prs.time_series(Path("dummy"), []) is None
23+
assert prs.time_series(Path("dummy")) is None
2624

2725

2826
def test_rprof_prs(sdat_legacy: StagyyData) -> None:

0 commit comments

Comments
 (0)