-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmage.py
More file actions
608 lines (513 loc) · 25.6 KB
/
mage.py
File metadata and controls
608 lines (513 loc) · 25.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
from typing import Literal, Union
import numpy as np
import pandas as pd
from .utils import CGMS2DayByDay, check_data_columns,gd2d_to_df
def mage(
data: Union[pd.DataFrame, pd.Series],
version: Literal["ma", "naive"] = "ma",
sd_multiplier: float = 1.0,
short_ma: int = 5,
long_ma: int = 32,
return_type: Literal["num", "df"] = "num",
direction: Literal["avg", "service", "max", "plus", "minus"] = "avg",
tz: str = "",
inter_gap: int = 45,
max_gap: int = 180,
) -> pd.DataFrame:
"""
Calculate Mean Amplitude of Glycemic Excursions (MAGE).
The function calculates MAGE values using either a moving average ('ma') or naive ('naive') algorithm.
The 'ma' algorithm is more accurate and is the default. It uses crosses of short and long moving
averages to identify intervals where a peak/nadir might exist, then calculates the height from
one peak/nadir to the next nadir/peak from the original glucose values.
If version 'ma' is selected, the function computationally emulates the manual method for calculating
the mean amplitude of glycemic excursions (MAGE) first suggested in
"Mean Amplitude of Glycemic Excursions, a Measure of Diabetic Instability", (Service, 1970).
For this version, glucose values will be interpolated over a uniform time grid prior to calculation.
'ma' is a more accurate algorithm that uses the crosses of a short and long moving average
to identify intervals where a peak/nadir might exist. Then, the height from one peak/nadir
to the next nadir/peak is calculated from the _original_ (not moving average) glucose values.
(Note: this function internally uses CGMS2DayByDay with dt0 = 5.
Thus, all CGM data is linearly interpolated to 5 minute intervals. See the MAGE vignette for more details.)
'naive' algorithm calculates MAGE by taking the mean of absolute glucose differences
(between each value and the mean) that are greater than the standard deviation. A multiplier can be added
to the standard deviation using the `sd_multiplier` argument.
Parameters
----------
data : Union[pd.DataFrame, pd.Series]
DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values
version : Literal['ma', 'naive'], default='ma'
Algorithm version to use. 'ma' is more accurate and is the default.
'naive' is included for backward compatibility.
sd_multiplier : float, default=1.0
Multiplier for standard deviation used in naive algorithm to determine
size of glycemic excursions. Only used if version='naive'.
short_ma : int, default=5
Period length of short moving average. Must be positive and less than long_ma.
Recommended < 15.
long_ma : int, default=32
Period length of long moving average. Must be positive and greater than short_ma.
Recommended > 20.
return_type : Literal['num', 'df'], default='num'
Return type. 'num' returns a single MAGE value, 'df' returns a DataFrame with
MAGE values for each segment.
direction : Literal['avg', 'service', 'max', 'plus', 'minus'], default='avg'
Direction of MAGE calculation:
- 'avg': Average of MAGE+ and MAGE-
- 'service': Based on first countable excursion
- 'max': Maximum of MAGE+ and MAGE-
- 'plus': MAGE+ (nadir to peak)
- 'minus': MAGE- (peak to nadir)
tz : str, default=""
Time zone to use for datetime conversion. Empty string means use local time zone.
inter_gap : int, default=45
Maximum gap in minutes for interpolation. Gaps larger than this will not be
interpolated.
max_gap : int, default=180
Maximum gap in minutes before splitting into segments.
Returns
-------
pd.DataFrame
DataFrame with columns:
- id: subject identifier (if DataFrame input)
- MAGE: Mean Amplitude of Glycemic Excursions value
References
----------
Service et al. (1970) Mean amplitude of glycemic excursions, a measure of diabetic instability
Diabetes 19:644-655, doi:10.2337/diab.19.9.644.
Fernandes, Nathaniel J., et al. "Open-source algorithm to calculate mean amplitude of glycemic
excursions using short and long moving averages." Journal of diabetes science and technology
16.2 (2022): 576-577, doi:10.1177/19322968211061165.
Examples
--------
>>> data = pd.DataFrame({
... 'id': ['subject1', 'subject1', 'subject2', 'subject2'],
... 'time': ['2020-01-01 00:00:00', '2020-01-01 00:05:00',
... '2020-01-01 00:00:00', '2020-01-01 00:05:00'],
... 'gl': [150, 200, 130, 190]
... })
>>> data['time'] = pd.to_datetime(data['time'])
>>> mage(data)
id MAGE
0 subject1 50.0
1 subject2 60.0
>>> mage(data['gl'], version='naive', sd_multiplier=1.5)
MAGE
0 45.0
"""
def mage_naive(data: pd.DataFrame) -> float:
"""Calculate MAGE using naive algorithm"""
# Calculate absolute differences from mean
mean_gl = data["gl"].mean()
abs_diff_mean = abs(data["gl"] - mean_gl)
# Calculate standard deviation
std_gl = data["gl"].std()
# Calculate MAGE as mean of differences greater than sd_multiplier * std
mage_val = abs_diff_mean[abs_diff_mean > (sd_multiplier * std_gl)].mean()
return float(mage_val) if not pd.isna(mage_val) else np.nan
def mage_ma_single(data: pd.DataFrame, short_ma: int, long_ma: int,
direction:str ='avg', return_type:str = "num") -> pd.DataFrame:
"""Calculate MAGE using moving average algorithm for a single subject"""
## 1. Preprocessing
# 1.1 Interpolate over uniform grid
# Note: always interpolate to 5 minute grid
data_ip = CGMS2DayByDay(data, dt0=5, inter_gap=inter_gap, tz=tz)
dt0 = data_ip[2] # Time between measurements in minutes
# replace for 5 min to fix bug in CGMS2DayByDay
dt0 = 5
day_one = data_ip[1][0]
ndays = len(data_ip[1])
# 1.2 Generate grid times by starting from day one and cumulatively summing
# note fix 5 min used in interpretation
gl = data_ip[0].flatten().tolist()
time_ip = [pd.Timedelta(i * 5, unit="m") + day_one for i in range(1,len(gl)+1)]
# 1.3 Recalculate short_ma and long_ma because short and long are based on 5 minutes originally
# > Multiply by 5 to get length in min
# > Divide by dt0 to get rounded number of measurements that are roughly equal to original short/long ma definition
# short_ma = round(short_ma*5/dt0)
# long_ma = round(long_ma*5/dt0)
# Ensure short_ma and long_ma are appropriate
if short_ma >= long_ma:
short_ma, long_ma = long_ma, short_ma
## 2. Change to interpolated data (times and glucose)
# > change data into id, interpolated times, interpolated glucose (t to get rowwise)
# > drop NA rows before first glucose reading
# > then drop NA rows after last glucose reading
# > Label NA glucose as gap (gap = 1)
interpolated_data = pd.DataFrame({
"id" : data['id'].iloc[0],
"time": pd.Series(time_ip, dtype='datetime64[ns]'),
"gl": pd.Series(gl, dtype='float64')
})
# Drop NA rows before first glucose reading
first_valid_idx = interpolated_data['gl'].first_valid_index()
if first_valid_idx is not None:
interpolated_data = interpolated_data.iloc[first_valid_idx:]
# Drop NA rows after last glucose reading
last_valid_idx = interpolated_data['gl'].last_valid_index()
if last_valid_idx is not None:
interpolated_data = interpolated_data.iloc[:last_valid_idx+1]
# Add gap column to mark NA values as 1
interpolated_data['gap'] = interpolated_data['gl'].isna().astype(int)
# 4. Time Series Segmentation: split gaps > max_gap into separate segments
dfs = segment_time_series(interpolated_data,max_gap) # note: max_gap is in minutes
# 5. Calculate MAGE on each identified segment
return_val = pd.DataFrame(columns=["start", "end", "mage", "plus_or_minus", "first_excursion"])
for segment in dfs:
ret = mage_atomic(segment,short_ma,long_ma)
if return_val.empty:
return_val = ret
else:
return_val = pd.concat([return_val, ret], ignore_index=True)
if return_type == 'df':
return return_val
"""Process MAGE results with filtering and weighting."""
# Filter by direction (equivalent to the previous R filtering code)
if direction == 'plus':
res = return_val[return_val['plus_or_minus'] == 'PLUS'].copy()
elif direction == 'minus':
res = return_val[return_val['plus_or_minus'] == 'MINUS'].copy()
elif direction == 'avg':
res = return_val[return_val['MAGE'].notna()].copy()
elif direction == 'max':
# Group by start,end and keep max mage in each group
idx = return_val.groupby(['start', 'end'])['MAGE'].idxmax()
res = return_val.loc[idx].reset_index(drop=True)
else: # default: first excursions only
res = return_val[return_val['first_excursion'] == True].copy()
# Calculate time-weighted MAGE
if res.empty:
return np.nan
res['hours'] = res['end'] - res['start']
res['weight'] = res['hours'] / res['hours'].sum()
weighted_mage = (res['MAGE'] * res['weight']).sum()
return weighted_mage
def mage_atomic(data, short_ma,long_ma):
""" 0. Calculates MAGE on 1 segment of CGM trace """
# 2c. Calculate the moving average values
data = data.copy()
data["MA_Short"] = data["gl"].rolling(window=short_ma, min_periods=1).mean()
data["MA_Long"] = data["gl"].rolling(window=long_ma, min_periods=1).mean()
# Fill leading NAs (forward fill first valid value)
if short_ma > len(data):
data.loc[data.index[:short_ma], 'MA_Short'] = data['MA_Short'].iloc[-1]
else:
data.loc[data.index[:short_ma], 'MA_Short'] = data['MA_Short'].iloc[short_ma-1]
if long_ma > len(data):
data.loc[data.index[:long_ma], 'MA_Long'] = data['MA_Long'].iloc[-1]
else:
data.loc[data.index[:long_ma], 'MA_Long'] = data['MA_Long'].iloc[long_ma-1]
# Calculate difference
data['DELTA_SHORT_LONG'] = data['MA_Short'] - data['MA_Long']
data = data.reset_index(drop=True)
nmeasurements = len(data)
# Sanity check
if (
data['gl'].isnull().all() or
nmeasurements < 7 or
nmeasurements < short_ma or
np.std(data['gl'], ddof=1) < 1
):
return pd.DataFrame({
'start': [data['time'].iloc[0]],
'end': [data['time'].iloc[-1]],
'MAGE': [np.nan],
'plus_or_minus': [np.nan],
'first_excursion': [np.nan]
})
# 2d. Create a preallocated list of crossing point ids & type
# Find crossing points
# Detect trend reversal points in glucose data using DELTA signal.
# Initialize variables
idx = list(data.index) # R: idx = as.numeric(rownames(.data))
types = {'REL_MIN': 0, 'REL_MAX': 1} # R: types = list2env(list(REL_MIN=0, REL_MAX=1))
# Create storage lists - R: list_cross <- list("id" = rep.int(NA, nmeasurements), "type" = rep.int(NA, nmeasurements))
list_cross = {
'id': [np.nan] * nmeasurements,
'type': [np.nan] * nmeasurements
}
# Always add 1st point
list_cross['id'][0] = idx[0]
list_cross['type'][0] = types['REL_MAX'] if data['DELTA_SHORT_LONG'].iloc[0] > 0 else types['REL_MIN']
count = 1 # Python uses 0-based indexing, so count starts at 1
# treat DELTA_SHORT_LONG==0 as NaN ( so we can skip its multiplication)
data.loc[data['DELTA_SHORT_LONG'] == 0, 'DELTA_SHORT_LONG'] = np.nan
# Main loop - R: for(i in 2:length(.data$DELTA_SHORT_LONG))
for i in range(1, len(data['DELTA_SHORT_LONG'])):
# Check data validity
if (not pd.isna(data['gl'].iloc[i]) and
not pd.isna(data['gl'].iloc[i-1]) and
not pd.isna(data['DELTA_SHORT_LONG'].iloc[i]) and
not pd.isna(data['DELTA_SHORT_LONG'].iloc[i-1])):
# Primary crossover detection: crossing point if DELTA changes sign
if (data['DELTA_SHORT_LONG'].iloc[i] * data['DELTA_SHORT_LONG'].iloc[i-1] < 0):
list_cross['id'][count] = idx[i]
if data['DELTA_SHORT_LONG'].iloc[i] < data['DELTA_SHORT_LONG'].iloc[i-1]:
list_cross['type'][count] = types['REL_MIN']
else:
list_cross['type'][count] = types['REL_MAX']
count += 1
# Gap handling: needed for gaps, where DELTA_SHORT_LONG(i-1 | i-2) = NaN
elif (not pd.isna(data['DELTA_SHORT_LONG'].iloc[i]) and
count >= 1): # Make sure we have a previous crossover
# R: match(list_cross$id[count-1], idx) - find index of previous crossover
try:
prev_cross_idx = idx.index(list_cross['id'][count-1])
prev_delta = data['DELTA_SHORT_LONG'].iloc[prev_cross_idx]
if (data['DELTA_SHORT_LONG'].iloc[i] * prev_delta < 0):
list_cross['id'][count] = idx[i]
if data['DELTA_SHORT_LONG'].iloc[i] < prev_delta:
list_cross['type'][count] = types['REL_MIN']
else:
list_cross['type'][count] = types['REL_MAX']
count += 1
except ValueError:
# Handle case where previous crossover id not found in idx
pass
# Add last point to capture excursion at end
# R: utils::tail(idx, 1)
last_idx = idx[-1]
list_cross['id'][count] = last_idx
if data['DELTA_SHORT_LONG'].iloc[-1] > 0:
list_cross['type'][count] = types['REL_MAX']
else:
list_cross['type'][count] = types['REL_MIN']
# Filter out NaN values - R: list_cross$id[!is.na(list_cross$id)]
clean_ids = [x for x in list_cross['id'] if not pd.isna(x)]
clean_types = [x for x in list_cross['type'] if not pd.isna(x)]
# Create DataFrame - R: do.call(cbind.data.frame, list_cross)
crosses = pd.DataFrame({
"id":clean_ids,
"type":clean_types
})
# 2e. Calculate min and max glucose values from ids and types in crosses + store indexes for plotting later
# R: num_extrema = nrow(crosses)-1
num_extrema = len(crosses) - 1
# R: minmax <- rep(NA_real_, num_extrema), indexes <- rep(NA_real_, num_extrema)
minmax = [np.nan] * num_extrema
indexes = [np.nan] * num_extrema
# R: for(i in 1:num_extrema)
for i in range(num_extrema):
# Define search boundaries
# R: s1 <- ifelse(i == 1, crosses[i, 1], indexes[i-1])
if i == 0: # First extrema
s1 = int(crosses.iloc[i]['id']) # crosses[i, 1] in R (1-indexed)
else:
s1 = int(indexes[i-1]) # last minmax index
# R: s2 <- crosses[i+1,1]
s2 = int(crosses.iloc[i+1]['id']) # crosses[i+1, 1] in R
# Extract glucose segment - R: .data[as.character(s1:s2), ]$gl
segment_start = s1
segment_end = s2
glucose_segment = data['gl'].iloc[segment_start:segment_end+1] # including next cross point
# Find min or max based on crossover type
if crosses.iloc[i]['type'] == types['REL_MIN']: # crosses[i, "type"] in R
# R: min(.data[as.character(s1:s2), ]$gl, na.rm = TRUE)
minmax[i] = glucose_segment.min()
# R: which.min(.data[as.character(s1:s2), ]$gl)+s1-1
indexes[i] = glucose_segment.idxmin()
else:
# R: max(.data[as.character(s1:s2), ]$gl, na.rm = TRUE)
minmax[i] = glucose_segment.max()
# R: which.max(.data[as.character(s1:s2), ]$gl)+s1-1
indexes[i] = glucose_segment.idxmax()
# excursion elimination
differences = np.subtract.outer(minmax, minmax).T
standardD = data['gl'].std() # pandas uses sample std dev by default
N = len(minmax)
# MAGE+ algorithm, which identifies and measures positive glycemic excursions
# (nadir-to-peak movements that exceed the standard deviation threshold).
mage_plus_heights, mage_plus_tp_pairs = calculate_mage_plus(differences, minmax, standardD)
# MAGE- algorithm, which identifies and measures negative glycemic excursions
# (peak-to-nadir movements that exceed the standard deviation threshold).
mage_minus_heights, mage_minus_tp_pairs = calculate_mage_minus(differences, minmax, standardD)
if len(mage_minus_heights) == 0 and len(mage_plus_heights) == 0:
return pd.DataFrame({
'start': [data['time'].iloc[0]],
'end': [data['time'].iloc[-1]],
'MAGE': [np.nan],
'plus_or_minus': [np.nan],
'first_excursion': [np.nan]
}, index=[0])
# Determine which excursion type occurs first
if (len(mage_plus_heights) > 0 and
(len(mage_minus_heights) == 0 or
mage_plus_tp_pairs[0][1] <= mage_minus_tp_pairs[0][0])):
is_plus_first = True
else:
is_plus_first = False
# Create MAGE+ result dataframe
mage_plus = pd.DataFrame({
'start': [data['time'].iloc[0]],
'end': [data['time'].iloc[-1]],
'MAGE': [np.mean(mage_plus_heights) if len(mage_plus_heights) > 0 else np.nan],
'plus_or_minus': ['PLUS'],
'first_excursion': [is_plus_first]
})
# Create MAGE- result dataframe
mage_minus = pd.DataFrame({
'start': [data['time'].iloc[0]],
'end': [data['time'].iloc[-1]],
'MAGE': [abs(np.mean(mage_minus_heights)) if len(mage_minus_heights) > 0 else np.nan],
'plus_or_minus': ['MINUS'],
'first_excursion': [not is_plus_first]
})
# Determine which direction has maximum MAGE value
is_plus_max = ((mage_plus['MAGE'].iloc[0] >= mage_minus['MAGE'].iloc[0])
if not pd.isna(mage_plus['MAGE'].iloc[0])
and not pd.isna(mage_minus['MAGE'].iloc[0])
else False
)
return pd.concat([mage_plus, mage_minus], ignore_index=True)
# -------------------
# start mage()
# Handle Series input
if isinstance(data, pd.Series):
# Convert Series to DataFrame format
data_df = pd.DataFrame(
{
"id": ["subject1"] * len(data),
"time": pd.date_range(
start="2020-01-01", periods=len(data), freq="5min"
),
"gl": data.values,
}
)
if version == "ma":
mage_val = mage_ma_single(data_df, short_ma, long_ma, direction, return_type='num')
result = pd.DataFrame({"MAGE": [mage_val]})
else:
result = pd.DataFrame({"MAGE": [mage_naive(data_df)]})
return result
# Handle DataFrame input
data = check_data_columns(data)
# Calculate MAGE for each subject
result = []
for subject in data["id"].unique():
subject_data = data[data["id"] == subject].copy()
if len(subject_data.dropna(subset=["gl"])) == 0:
continue
if version == "ma":
mage_val = mage_ma_single(subject_data, short_ma, long_ma, direction, return_type)
if return_type == "df" :
subject_result_dict = mage_val.to_dict()
else:
subject_result_dict = {"MAGE": mage_val}
else:
mage_val = mage_naive(subject_data)
subject_result_dict = {"MAGE": mage_val}
result.append({"id": subject, **subject_result_dict})
return pd.DataFrame(result)
def calculate_mage_plus(differences, minmax, standardD):
"""
Calculate MAGE+ (positive glycemic excursions)
Args:
differences: NxN matrix of pairwise differences between extrema
minmax: Array of extrema values (peaks and nadirs)
standardD: Standard deviation threshold
Returns:
tuple: (mage_plus_heights, mage_plus_tp_pairs)
"""
N = len(minmax)
mage_plus_heights = []
mage_plus_tp_pairs = []
j = prev_j = 0 # Python uses 0-based indexing
while j < N:
# Get differences from previous extrema to current point j
delta = differences[prev_j:j+1, j] # j+1 because Python slicing is exclusive
if len(delta) == 0:
j += 1
continue
max_v = np.max(delta) # Find maximum upward movement
i = int(np.argmax(delta) + prev_j) # Index of extrema creating maximum
if max_v > standardD:
# Found significant upward excursion (nadir to peak > SD)
k = j
while k < N:
if minmax[k] >= minmax[j]:
j = k # Continue riding the peak upward
# Check if excursion ends (significant drop or end of data)
if differences[j, k] < -standardD or k == N - 1:
max_v = minmax[j] - minmax[i]
# Record the excursion
mage_plus_heights.append(max_v)
mage_plus_tp_pairs.append((i, j)) # (nadir_index, peak_index)
prev_j = k
j = k
break
k += 1
else:
j += 1
return mage_plus_heights, mage_plus_tp_pairs
def calculate_mage_minus(differences, minmax, standardD):
"""
Calculate MAGE- (negative glycemic excursions)
Args:
differences: NxN matrix of pairwise differences between extrema
minmax: Array of extrema values (peaks and nadirs)
standardD: Standard deviation threshold
Returns:
tuple: (mage_minus_heights, mage_minus_tp_pairs)
"""
N = len(minmax)
mage_minus_heights = []
mage_minus_tp_pairs = []
j = prev_j = 0 # Python uses 0-based indexing
while j < N:
# Get differences from previous extrema to current point j
delta = differences[prev_j:j+1, j] # j+1 because Python slicing is exclusive
if len(delta) == 0:
j += 1
continue
min_v = np.min(delta) # Find maximum downward movement (most negative)
i = np.argmin(delta) + prev_j # Index of extrema creating minimum
if min_v < -standardD: # Found significant downward excursion
k = j
while k < N:
if minmax[k] <= minmax[j]:
j = k # Continue riding the nadir downward
# Check if excursion ends (significant rise or end of data)
if differences[j, k] > standardD or k == N - 1:
min_v = minmax[j] - minmax[i] # Calculate final excursion magnitude
# Record the excursion (note: min_v will be negative)
mage_minus_heights.append(min_v)
mage_minus_tp_pairs.append((i, j, k)) # (peak_index, nadir_index, end_index)
prev_j = j
j = k
break
k += 1
else:
j += 1
return mage_minus_heights, mage_minus_tp_pairs
def segment_time_series(data, max_gap_minutes):
"""
Split glucose time series into segments based on large gaps
Simpler approach using time differences
"""
# Calculate time differences
# Calculate time differences between consecutive non-NA glucose readings
data['time_diff'] = np.nan
valid_indices = data['gl'].notna()
if valid_indices.any():
# Get timestamps of valid readings
valid_times = data.loc[valid_indices, 'time']
# Calculate differences between consecutive valid readings
time_diffs = valid_times.diff().dt.total_seconds() / 60 # Convert to minutes
# Assign differences back to original dataframe at valid indices
data.loc[valid_indices, 'time_diff'] = time_diffs
# Identify where gaps exceed threshold
large_gaps = data['time_diff'] > max_gap_minutes
# Create segment labels by cumulatively summing large gaps
# This creates a new segment ID each time we encounter a large gap
data['segment_id'] = large_gaps.cumsum()
# Group by segment and return list of DataFrames
segments = []
for segment_id, group in data.groupby('segment_id'):
# Drop the temporary columns we added
group = group.drop(['time_diff', 'segment_id'], axis=1)
# Drop rows with NA glucose values at the end of the segment
while len(group) > 0 and pd.isna(group['gl'].iloc[-1]):
group = group.iloc[:-1]
segments.append(group.reset_index(drop=True))
return segments
# Identify where gaps exceed threshold