SightPlusPlus-FootfallPredictor/predictions.py at main · UCL-SightPlusPlus/SightPlusPlus-FootfallPredictor · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# Time Series Predictions - SKTIME
import warnings
#warnings.filterwarnings("ignore")
warnings.simplefilter(action='ignore', category=FutureWarning)
# from mock_data import *
from pred_utils import *

# Remove later when these information are in the db
use_cases = {"1": "queueing",
             "2": "freeSeats",
             "3": "event"}


# Time Series Forecasts

def create_future_data(total_df):
    # General variables: fh (pre-specified)
    device_lst = []
    # Forecasting Horizon (2 months = 1460 h)
    fh = np.arange(1, 1460 + 1)
    # 1. Retrieve all devices (deviceID)
    devices = total_df["deviceID"].values.ravel()
    device_arr = pd.unique(devices)
    # 2. Loop through devices
    for i in device_arr:
        # 3. Filter dataframe for device and respective use case
        # 4. Pre-Process data for all (if "event": pass in function to process)
        y_uc = total_df[total_df["deviceID"] == i]
        # Get use case
        uc_arr = y_uc["recordType"].unique()
        record_type = uc_arr[0]
        uc = use_cases[record_type]
        y_uc = y_uc.reindex(["timestamp", uc], axis=1)
        # Make data univariate for forecasts and in 1 hour intervals
        y_uc = y_uc.set_index("timestamp")
        if uc == "event":
            # Person enters +1, Person leaves -1
            # Create numpy array of length y_uc. Do cumulative sum.
            y_uc[y_uc == "personIn"] = 1
            y_uc[y_uc == "personOut"] = -1
            y_uc["event"] = pd.to_numeric(np.cumsum(y_uc["event"]))

        # Sum Footfall
        y = y_uc.groupby(pd.Grouper(freq="60Min")).aggregate(np.mean)
        y.columns = ["y"]
        y["y"] = y["y"].fillna(0)
        y["y"] = y["y"].astype(float)
        y = pd.Series(y["y"])
        # Handle anomalies
        y_ = anomaly_handler(y, uc)
        # plot_series(y_)
        # Do log transformation to prevent negative forecasts
        y_log = np.log(y_ + 1)
        # 5. Get Prophet Param Dictionary (from function, argument is n_calls)
        prophet_param_dict = get_tuned_hyperparameters(optimise_prophet, param_names_prophet,
                                                       param_space_prophet, y_log, calls=3)
        # 6. Get Naive Param Dictionary (from function, argument is n_calls)
        naive_param_dict = get_tuned_hyperparameters(optimise_naive, param_names_naive,
                                                     param_space_naive, y_log, calls=50)
        # 7. MultiPlex Ensemble Predictions
        y_pred = ensemble_predictions(prophet_param_dict, naive_param_dict, fh, y_log)
        # If more than available seats predicted, cap the predictions to maximum available
        if uc == "freeSeats":
            y_pred[y_pred > np.max(y.values)] = np.max(y.values)
        # Print for now to test
        # plot_series(y_, y_pred, labels=["y", "y_pred"])

        # 8. Create forecast dataframe
        y_pred_df = y_pred.to_frame()

        y_pred_df = y_pred_df.rename_axis("timestamp")
        y_pred_df["timestamp"] = y_pred_df.index
        y_pred_df = y_pred_df.reset_index(drop=True)

        y_pred_df["recordType"] = uc
        y_pred_df["deviceID"] = i

        device_lst.append(y_pred_df)

    # 9. Concatanete all dataframes
    # 10. Return concatenated dataframes

    y_pred_df_total = pd.concat(device_lst, sort=True)
    y_pred_df_total = y_pred_df_total.sort_values(["timestamp", "deviceID"], ascending=(True, True))
    y_pred_df_total = y_pred_df_total.reset_index(drop=True)

    # 11. Make dictionary
    y_pred_df_total = y_pred_df_total.to_dict("records")

    return y_pred_df_total