-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathdata_retrieval.py
More file actions
72 lines (59 loc) · 2.44 KB
/
data_retrieval.py
File metadata and controls
72 lines (59 loc) · 2.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import pandas as pd
from datetime import datetime, timezone
import microSWIFTtelemetry
from data_cleaning import clean_data
def get_swift_data(buoy_ids, start_date, end_date=None):
"""
Retrieve SWIFT data for given buoy IDs starting from a specified date.
Args:
buoy_ids (list): List of buoy IDs.
start_date (str or datetime): Start date for data retrieval.
end_date (str or datetime, optional): End date for data retrieval. If not provided, retrieves data up to the present.
Returns:
DataFrame: Multi-index DataFrame containing data for the specified buoy IDs, or None if no valid data is retrieved.
"""
if not buoy_ids:
raise ValueError("buoy_ids list cannot be empty")
microSWIFT_df = pd.DataFrame()
for buoy_id in buoy_ids:
if end_date is None:
# Must specify an end date because default parameters are evaluated once at function definition
# and not each time the function is called.
data, error = microSWIFTtelemetry.pull_telemetry_as_var(
buoy_id,
start_date,
datetime.now(timezone.utc),
var_type="pandas",
)
else:
data, error = microSWIFTtelemetry.pull_telemetry_as_var(
buoy_id, start_date, end_date, var_type="pandas"
)
if len(data) != 0:
data["Buoy ID"] = buoy_id
data["time"] = data.index
data = clean_data(data)
microSWIFT_df = pd.concat([microSWIFT_df, data], ignore_index=False)
else:
print(f"Error retrieving data for buoy ID {buoy_id}: {error}")
if microSWIFT_df.empty:
print("No valid data retrieved for buoy ID(s).")
return None
microSWIFT_df.set_index(["Buoy ID", microSWIFT_df.index], inplace=True)
return microSWIFT_df
def get_recent_data(df, buoy_id):
"""
Retrieve the most recent data for a specific buoy ID from the DataFrame.
Args:
df (pd.DataFrame): Multi-index DataFrame containing SWIFT data.
buoy_id (str): The specific buoy ID for which to retrieve the last row.
Returns:
pd.Series: The recent data for the specified buoy ID.
"""
try:
buoy_data = df.xs(buoy_id, level="Buoy ID")
last_row = buoy_data.tail(1).squeeze()
return last_row
except KeyError:
print(f"Buoy ID {buoy_id} not found in the DataFrame.")
return None