diff --git a/docs/examples/cryptocurrency-quickstart.ipynb b/docs/examples/cryptocurrency-quickstart.ipynb index e54bf0e..4d4a39a 100644 --- a/docs/examples/cryptocurrency-quickstart.ipynb +++ b/docs/examples/cryptocurrency-quickstart.ipynb @@ -63,7 +63,7 @@ "outputs": [], "source": [ "files = os.listdir(path)\n", - "files = [path + \"/\" + x for x in files]" + "files = [path+'/'+x for x in files]" ] }, { @@ -198,18 +198,18 @@ "# Read all filez and set them up to the readable structure for timecopilot\n", "for file in files:\n", " temp_df = pd.read_csv(file)\n", - " temp_df = temp_df[[\"Symbol\", \"Date\", \"Close\"]]\n", - " temp_df.columns = [\"unique_id\", \"ds\", \"y\"]\n", - " big_df = pd.concat([big_df, temp_df])\n", + " temp_df = temp_df[['Symbol','Date','Close']]\n", + " temp_df.columns = ['unique_id','ds','y']\n", + " big_df = pd.concat([big_df,temp_df])\n", "\n", "big_df = big_df.reset_index(drop=True)\n", "big_df[\"ds\"] = pd.to_datetime(big_df[\"ds\"], dayfirst=True, errors=\"coerce\")\n", "\n", - "# This line will be kept for execution time sanity, feel free to remove it if you want to stress timing a little further.\n", + "# This line will be kept for execution time sanity, feel free to remove it if you want to stress timing a little further. \n", "# big_df = big_df[big_df.ds >= \"2021-01-01\"]\n", - "cryptos = [\"MIOTA\", \"XEM\", \"ETH\", \"LTC\", \"DOGE\", \"CRO\", \"USDC\", \"ADA\"]\n", - "big_df = big_df[big_df.unique_id.isin(cryptos)]\n", - "big_df = big_df.reset_index(drop=True)\n", + "cryptos=['MIOTA','XEM','ETH','LTC','DOGE','CRO','USDC','ADA']\n", + "big_df=big_df[big_df.unique_id.isin(cryptos)]\n", + "big_df=big_df.reset_index(drop=True)\n", "big_df" ] }, @@ -341,7 +341,6 @@ " df_out.loc[idx, col] = np.nan\n", " return df_out\n", "\n", - "\n", "df_missing = add_missing(big_df, col=\"y\", frac=0.03, seed=42)\n", "df_missing = df_missing.sample(frac=1, random_state=42).reset_index(drop=True)\n", "print(df_missing)" @@ -710,14 +709,12 @@ } ], "source": [ - "anomaly_summary_xlm = anomalies_df[\n", + "anomaly_summary_xlm=anomalies_df[\n", " # (anomalies_df.unique_id=='SOL') & \\\n", - " (\n", - " (anomalies_df[\"Chronos-anomaly\"] == True)\n", - " | (anomalies_df[\"SeasonalNaive-anomaly\"] == True)\n", - " | (anomalies_df[\"Theta-anomaly\"] == True)\n", - " )\n", - "].reset_index(drop=True)\n", + " ((anomalies_df['Chronos-anomaly']==True) | \\\n", + " (anomalies_df['SeasonalNaive-anomaly']==True) |\n", + " (anomalies_df['Theta-anomaly']==True)\n", + " )].reset_index(drop=True)\n", "anomaly_summary_xlm" ] }, @@ -957,14 +954,12 @@ } ], "source": [ - "anomaly_summary_xlm = anomalies_df[\n", - " (anomalies_df.unique_id == \"ADA\")\n", - " & (\n", - " (anomalies_df[\"Chronos-anomaly\"] == True)\n", - " | (anomalies_df[\"SeasonalNaive-anomaly\"] == True)\n", - " | (anomalies_df[\"Theta-anomaly\"] == True)\n", - " )\n", - "].reset_index(drop=True)\n", + "anomaly_summary_xlm=anomalies_df[\n", + " (anomalies_df.unique_id=='ADA') & \\\n", + " ((anomalies_df['Chronos-anomaly']==True) | \\\n", + " (anomalies_df['SeasonalNaive-anomaly']==True) |\n", + " (anomalies_df['Theta-anomaly']==True)\n", + " )].reset_index(drop=True)\n", "anomaly_summary_xlm" ] }, @@ -1204,14 +1199,12 @@ } ], "source": [ - "anomaly_summary_xlm = anomalies_df[\n", - " (anomalies_df.unique_id == \"ADA\")\n", - " & (\n", - " (anomalies_df[\"Chronos-anomaly\"] == True)\n", - " & (anomalies_df[\"SeasonalNaive-anomaly\"] == True)\n", - " # (anomalies_df['Theta-anomaly']==True)\n", - " )\n", - "].reset_index(drop=True)\n", + "anomaly_summary_xlm=anomalies_df[\n", + " (anomalies_df.unique_id=='ADA') & \\\n", + " ((anomalies_df['Chronos-anomaly']==True) & \\\n", + " (anomalies_df['SeasonalNaive-anomaly']==True) \\\n", + " # (anomalies_df['Theta-anomaly']==True)\n", + " )].reset_index(drop=True)\n", "anomaly_summary_xlm" ] }, @@ -1248,12 +1241,12 @@ "source": [ "tcf1 = TimeCopilotForecaster(\n", " models=[\n", - " AutoARIMA(),\n", + " AutoARIMA(), \n", " Chronos(repo_id=\"amazon/chronos-bolt-mini\"),\n", " Theta(),\n", - " AutoETS(),\n", - " Moirai(),\n", - " Prophet(),\n", + " AutoETS(), \n", + " Moirai(), \n", + " Prophet(), \n", " SeasonalNaive(),\n", " ]\n", ")" @@ -1266,7 +1259,7 @@ "metadata": {}, "outputs": [], "source": [ - "fcst_df = tcf1.forecast(df=big_df, h=30, level=[80, 90])" + "fcst_df = tcf1.forecast(df=big_df, h=30, level=[80,90])" ] }, { @@ -1310,9 +1303,9 @@ "metadata": {}, "outputs": [], "source": [ - "eth_fcst_normal = fcst_df[(fcst_df.unique_id == \"ETH\")][\n", - " [\"unique_id\", \"ds\", \"Chronos\", \"Chronos-lo-80\"]\n", - "].reset_index(drop=True)" + "eth_fcst_normal=fcst_df[(fcst_df.unique_id=='ETH')]\\\n", + " [['unique_id','ds','Chronos','Chronos-lo-80']]\\\n", + " .reset_index(drop=True)" ] }, { @@ -1352,9 +1345,9 @@ "metadata": {}, "outputs": [], "source": [ - "eth_fcst_missing = fcst_df[(fcst_df.unique_id == \"ETH\")][\n", - " [\"unique_id\", \"ds\", \"Chronos\", \"Chronos-lo-80\"]\n", - "].reset_index(drop=True)" + "eth_fcst_missing=fcst_df[(fcst_df.unique_id=='ETH')]\\\n", + " [['unique_id','ds','Chronos','Chronos-lo-80']]\\\n", + " .reset_index(drop=True)" ] }, { @@ -1522,9 +1515,9 @@ } ], "source": [ - "compare = eth_fcst_normal.merge(eth_fcst_missing, on=[\"ds\", \"unique_id\"])\n", - "compare[\"dif\"] = abs(compare[\"Chronos_x\"] - compare[\"Chronos_y\"])\n", - "print(compare[\"dif\"].sum())" + "compare=eth_fcst_normal.merge(eth_fcst_missing,on=['ds','unique_id'])\n", + "compare['dif']=abs(compare['Chronos_x']-compare['Chronos_y'])\n", + "print(compare['dif'].sum())" ] }, { diff --git a/mkdocs.yml b/mkdocs.yml index f4df3da..7e8f424 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -22,15 +22,12 @@ nav: - examples/agent-quickstart.ipynb - examples/llm-providers.ipynb - examples/aws-bedrock.ipynb - - examples/google-llms.ipynb - examples/forecaster-quickstart.ipynb - examples/anomaly-detection-forecaster-quickstart.ipynb - examples/ts-foundation-models-comparison-quickstart.ipynb - examples/gift-eval.ipynb - examples/chronos-family.ipynb - - examples/cryptocurrency-quickstart.ipynb - - examples/sktime.ipynb - - examples/patchtst-fm.ipynb + - examples/crytpocurrency-quickstart.ipynb - Experiments: - experiments/gift-eval.md - experiments/fev.md @@ -46,12 +43,10 @@ nav: - api/models/ml.md - api/models/neural.md - api/models/ensembles.md - - api/models/adapters/adapters.md - api/models/utils/forecaster.md - api/gift-eval/gift-eval.md - Changelogs: - changelogs/index.md - - changelogs/v0.0.23.md - changelogs/v0.0.22.md - changelogs/v0.0.21.md - changelogs/v0.0.20.md @@ -74,7 +69,6 @@ nav: theme: name: "material" - custom_dir: docs/overrides logo: https://timecopilot.s3.amazonaws.com/public/logos/logo-white.svg favicon: https://timecopilot.s3.amazonaws.com/public/logos/favicon-white.svg palette: diff --git a/timecopilot/forecaster.py b/timecopilot/forecaster.py index a27ac5f..4c41884 100644 --- a/timecopilot/forecaster.py +++ b/timecopilot/forecaster.py @@ -96,11 +96,9 @@ def _call_models( res_df_model = fn(**known_kwargs, **kwargs) res_df_model = res_df_model.rename( columns={ - col: ( - col.replace(self.fallback_model.alias, model.alias) - if col.startswith(self.fallback_model.alias) - else col - ) + col: col.replace(self.fallback_model.alias, model.alias) + if col.startswith(self.fallback_model.alias) + else col for col in res_df_model.columns } ) diff --git a/timecopilot/models/utils/gluonts_forecaster.py b/timecopilot/models/utils/gluonts_forecaster.py index 25bc2d5..d49b25b 100644 --- a/timecopilot/models/utils/gluonts_forecaster.py +++ b/timecopilot/models/utils/gluonts_forecaster.py @@ -13,6 +13,90 @@ from .forecaster import Forecaster, QuantileConverter +_COARSE_FREQ_PREFIXES = ("B", "D", "W", "M", "Q", "A", "Y") + + +def _maybe_align_for_gluonts( + df: pd.DataFrame, + freq: str, + *, + id_col: str = "unique_id", + ts_col: str = "ds", + coarse_only: bool = True, + verbose: bool = True, +): + """ + GluonTS requires a regular grid. If timestamps are consistently "end-of-bin" + (e.g., 23:59:59 for daily close), GluonTS will anchor to the bin boundary + (midnight) and your output ds can look "shifted". + + This function: + - builds an internal copy aligned to the freq grid (floor to bin boundary) + - stores the most common within-bin offset per series (anchor) + - returns (df_gluonts, anchor_by_id) so you can restore the offset in outputs + + Returns: + df_gluonts: df aligned to grid (or original df if no action) + anchor: Series[timedelta] indexed by unique_id, or None if not applied + """ + f = str(freq) + + if coarse_only and not f.startswith(_COARSE_FREQ_PREFIXES): + return df, None + + offset = pd.tseries.frequencies.to_offset(f) + + g = df.sort_values([id_col, ts_col]).copy(deep=False) + ds = pd.to_datetime(g[ts_col]) + + # grid boundary for each timestamp, given the offset + base = ds.dt.floor(offset) + within = ds - base # timedelta inside the bin (time-of-day for daily) + + # use the anchor provided function + anchor = within.groupby(g[id_col]).apply(_compute_anchor_with_guard).dropna() + + if anchor.empty: + return df, None + + # apply only if any series is non-zero offset + if not (anchor != pd.Timedelta(0)).any(): + return df, None + + if verbose: + print( + f"[gluonts-align] Applied internal time alignment for freq='{f}': " + f"removed dominant within-bin offset(s) " + f"{', '.join(str(o) for o in anchor.value_counts().index)} " + f"from {anchor.shape[0]}/{g[id_col].nunique()} series; " + f"input data unchanged, offsets restored in forecasts." + ) + + df_gluonts = df.copy(deep=False) + df_gluonts[ts_col] = pd.to_datetime(df_gluonts[ts_col]).dt.floor(offset) + + return df_gluonts, anchor + + +def _compute_anchor_with_guard( + within: pd.Series, min_frac: float = 0.8, min_count: int = 10 +): + vc = within.value_counts() + top_offset = vc.index[0] + top_count = vc.iloc[0] + total = vc.sum() + + if top_offset == pd.Timedelta(0): + return None + + if top_count < min_count: + return None + + if top_count / total < min_frac: + return None + + return top_offset + def fix_freq(freq: str) -> str: # see https://github.com/awslabs/gluonts/pull/2462/files @@ -166,6 +250,15 @@ def forecast( df = maybe_convert_col_to_float32(df, "y") freq = self._maybe_infer_freq(df, freq) qc = QuantileConverter(level=level, quantiles=quantiles) + + df_gluonts, anchor = _maybe_align_for_gluonts( + df, + fix_freq(freq), + coarse_only=True, # set False if you want this + # to handle hourly/minutely end-stamps too + verbose=True, + ) + gluonts_dataset = PandasDataset.from_long_dataframe( df.copy(deep=False), target="y", @@ -173,17 +266,27 @@ def forecast( timestamp="ds", freq=fix_freq(freq), ) + with self.get_predictor(prediction_length=h) as predictor: fcsts = predictor.predict( gluonts_dataset, num_samples=self.num_samples, ) + + fcsts_list = list(fcsts) # materialize iterator once + fcst_df = self.gluonts_fcsts_to_df( - fcsts, - freq=freq, + fcsts_list, + freq=fix_freq(freq), model_name=self.alias, quantiles=qc.quantiles, ) + + if anchor is not None: + fcst_df["ds"] = pd.to_datetime(fcst_df["ds"]) + fcst_df["unique_id"].map( + anchor + ) + if qc.quantiles is not None: fcst_df = qc.maybe_convert_quantiles_to_level( fcst_df,