-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpredictions.py
More file actions
90 lines (77 loc) · 3.52 KB
/
predictions.py
File metadata and controls
90 lines (77 loc) · 3.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# Time Series Predictions - SKTIME
import warnings
#warnings.filterwarnings("ignore")
warnings.simplefilter(action='ignore', category=FutureWarning)
# from mock_data import *
from pred_utils import *
# Remove later when these information are in the db
use_cases = {"1": "queueing",
"2": "freeSeats",
"3": "event"}
# Time Series Forecasts
def create_future_data(total_df):
# General variables: fh (pre-specified)
device_lst = []
# Forecasting Horizon (2 months = 1460 h)
fh = np.arange(1, 1460 + 1)
# 1. Retrieve all devices (deviceID)
devices = total_df["deviceID"].values.ravel()
device_arr = pd.unique(devices)
# 2. Loop through devices
for i in device_arr:
# 3. Filter dataframe for device and respective use case
# 4. Pre-Process data for all (if "event": pass in function to process)
y_uc = total_df[total_df["deviceID"] == i]
# Get use case
uc_arr = y_uc["recordType"].unique()
record_type = uc_arr[0]
uc = use_cases[record_type]
y_uc = y_uc.reindex(["timestamp", uc], axis=1)
# Make data univariate for forecasts and in 1 hour intervals
y_uc = y_uc.set_index("timestamp")
if uc == "event":
# Person enters +1, Person leaves -1
# Create numpy array of length y_uc. Do cumulative sum.
y_uc[y_uc == "personIn"] = 1
y_uc[y_uc == "personOut"] = -1
y_uc["event"] = pd.to_numeric(np.cumsum(y_uc["event"]))
# Sum Footfall
y = y_uc.groupby(pd.Grouper(freq="60Min")).aggregate(np.mean)
y.columns = ["y"]
y["y"] = y["y"].fillna(0)
y["y"] = y["y"].astype(float)
y = pd.Series(y["y"])
# Handle anomalies
y_ = anomaly_handler(y, uc)
# plot_series(y_)
# Do log transformation to prevent negative forecasts
y_log = np.log(y_ + 1)
# 5. Get Prophet Param Dictionary (from function, argument is n_calls)
prophet_param_dict = get_tuned_hyperparameters(optimise_prophet, param_names_prophet,
param_space_prophet, y_log, calls=3)
# 6. Get Naive Param Dictionary (from function, argument is n_calls)
naive_param_dict = get_tuned_hyperparameters(optimise_naive, param_names_naive,
param_space_naive, y_log, calls=50)
# 7. MultiPlex Ensemble Predictions
y_pred = ensemble_predictions(prophet_param_dict, naive_param_dict, fh, y_log)
# If more than available seats predicted, cap the predictions to maximum available
if uc == "freeSeats":
y_pred[y_pred > np.max(y.values)] = np.max(y.values)
# Print for now to test
# plot_series(y_, y_pred, labels=["y", "y_pred"])
# 8. Create forecast dataframe
y_pred_df = y_pred.to_frame()
y_pred_df = y_pred_df.rename_axis("timestamp")
y_pred_df["timestamp"] = y_pred_df.index
y_pred_df = y_pred_df.reset_index(drop=True)
y_pred_df["recordType"] = uc
y_pred_df["deviceID"] = i
device_lst.append(y_pred_df)
# 9. Concatanete all dataframes
# 10. Return concatenated dataframes
y_pred_df_total = pd.concat(device_lst, sort=True)
y_pred_df_total = y_pred_df_total.sort_values(["timestamp", "deviceID"], ascending=(True, True))
y_pred_df_total = y_pred_df_total.reset_index(drop=True)
# 11. Make dictionary
y_pred_df_total = y_pred_df_total.to_dict("records")
return y_pred_df_total