Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 39 additions & 46 deletions docs/examples/cryptocurrency-quickstart.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
"outputs": [],
"source": [
"files = os.listdir(path)\n",
"files = [path + \"/\" + x for x in files]"
"files = [path+'/'+x for x in files]"
]
},
{
Expand Down Expand Up @@ -198,18 +198,18 @@
"# Read all filez and set them up to the readable structure for timecopilot\n",
"for file in files:\n",
" temp_df = pd.read_csv(file)\n",
" temp_df = temp_df[[\"Symbol\", \"Date\", \"Close\"]]\n",
" temp_df.columns = [\"unique_id\", \"ds\", \"y\"]\n",
" big_df = pd.concat([big_df, temp_df])\n",
" temp_df = temp_df[['Symbol','Date','Close']]\n",
" temp_df.columns = ['unique_id','ds','y']\n",
" big_df = pd.concat([big_df,temp_df])\n",
"\n",
"big_df = big_df.reset_index(drop=True)\n",
"big_df[\"ds\"] = pd.to_datetime(big_df[\"ds\"], dayfirst=True, errors=\"coerce\")\n",
"\n",
"# This line will be kept for execution time sanity, feel free to remove it if you want to stress timing a little further.\n",
"# This line will be kept for execution time sanity, feel free to remove it if you want to stress timing a little further. \n",
"# big_df = big_df[big_df.ds >= \"2021-01-01\"]\n",
"cryptos = [\"MIOTA\", \"XEM\", \"ETH\", \"LTC\", \"DOGE\", \"CRO\", \"USDC\", \"ADA\"]\n",
"big_df = big_df[big_df.unique_id.isin(cryptos)]\n",
"big_df = big_df.reset_index(drop=True)\n",
"cryptos=['MIOTA','XEM','ETH','LTC','DOGE','CRO','USDC','ADA']\n",
"big_df=big_df[big_df.unique_id.isin(cryptos)]\n",
"big_df=big_df.reset_index(drop=True)\n",
"big_df"
]
},
Expand Down Expand Up @@ -341,7 +341,6 @@
" df_out.loc[idx, col] = np.nan\n",
" return df_out\n",
"\n",
"\n",
"df_missing = add_missing(big_df, col=\"y\", frac=0.03, seed=42)\n",
"df_missing = df_missing.sample(frac=1, random_state=42).reset_index(drop=True)\n",
"print(df_missing)"
Expand Down Expand Up @@ -710,14 +709,12 @@
}
],
"source": [
"anomaly_summary_xlm = anomalies_df[\n",
"anomaly_summary_xlm=anomalies_df[\n",
" # (anomalies_df.unique_id=='SOL') & \\\n",
" (\n",
" (anomalies_df[\"Chronos-anomaly\"] == True)\n",
" | (anomalies_df[\"SeasonalNaive-anomaly\"] == True)\n",
" | (anomalies_df[\"Theta-anomaly\"] == True)\n",
" )\n",
"].reset_index(drop=True)\n",
" ((anomalies_df['Chronos-anomaly']==True) | \\\n",
" (anomalies_df['SeasonalNaive-anomaly']==True) |\n",
" (anomalies_df['Theta-anomaly']==True)\n",
" )].reset_index(drop=True)\n",
"anomaly_summary_xlm"
]
},
Expand Down Expand Up @@ -957,14 +954,12 @@
}
],
"source": [
"anomaly_summary_xlm = anomalies_df[\n",
" (anomalies_df.unique_id == \"ADA\")\n",
" & (\n",
" (anomalies_df[\"Chronos-anomaly\"] == True)\n",
" | (anomalies_df[\"SeasonalNaive-anomaly\"] == True)\n",
" | (anomalies_df[\"Theta-anomaly\"] == True)\n",
" )\n",
"].reset_index(drop=True)\n",
"anomaly_summary_xlm=anomalies_df[\n",
" (anomalies_df.unique_id=='ADA') & \\\n",
" ((anomalies_df['Chronos-anomaly']==True) | \\\n",
" (anomalies_df['SeasonalNaive-anomaly']==True) |\n",
" (anomalies_df['Theta-anomaly']==True)\n",
" )].reset_index(drop=True)\n",
"anomaly_summary_xlm"
]
},
Expand Down Expand Up @@ -1204,14 +1199,12 @@
}
],
"source": [
"anomaly_summary_xlm = anomalies_df[\n",
" (anomalies_df.unique_id == \"ADA\")\n",
" & (\n",
" (anomalies_df[\"Chronos-anomaly\"] == True)\n",
" & (anomalies_df[\"SeasonalNaive-anomaly\"] == True)\n",
" # (anomalies_df['Theta-anomaly']==True)\n",
" )\n",
"].reset_index(drop=True)\n",
"anomaly_summary_xlm=anomalies_df[\n",
" (anomalies_df.unique_id=='ADA') & \\\n",
" ((anomalies_df['Chronos-anomaly']==True) & \\\n",
" (anomalies_df['SeasonalNaive-anomaly']==True) \\\n",
" # (anomalies_df['Theta-anomaly']==True)\n",
" )].reset_index(drop=True)\n",
"anomaly_summary_xlm"
]
},
Expand Down Expand Up @@ -1248,12 +1241,12 @@
"source": [
"tcf1 = TimeCopilotForecaster(\n",
" models=[\n",
" AutoARIMA(),\n",
" AutoARIMA(), \n",
" Chronos(repo_id=\"amazon/chronos-bolt-mini\"),\n",
" Theta(),\n",
" AutoETS(),\n",
" Moirai(),\n",
" Prophet(),\n",
" AutoETS(), \n",
" Moirai(), \n",
" Prophet(), \n",
" SeasonalNaive(),\n",
" ]\n",
")"
Expand All @@ -1266,7 +1259,7 @@
"metadata": {},
"outputs": [],
"source": [
"fcst_df = tcf1.forecast(df=big_df, h=30, level=[80, 90])"
"fcst_df = tcf1.forecast(df=big_df, h=30, level=[80,90])"
]
},
{
Expand Down Expand Up @@ -1310,9 +1303,9 @@
"metadata": {},
"outputs": [],
"source": [
"eth_fcst_normal = fcst_df[(fcst_df.unique_id == \"ETH\")][\n",
" [\"unique_id\", \"ds\", \"Chronos\", \"Chronos-lo-80\"]\n",
"].reset_index(drop=True)"
"eth_fcst_normal=fcst_df[(fcst_df.unique_id=='ETH')]\\\n",
" [['unique_id','ds','Chronos','Chronos-lo-80']]\\\n",
" .reset_index(drop=True)"
]
},
{
Expand Down Expand Up @@ -1352,9 +1345,9 @@
"metadata": {},
"outputs": [],
"source": [
"eth_fcst_missing = fcst_df[(fcst_df.unique_id == \"ETH\")][\n",
" [\"unique_id\", \"ds\", \"Chronos\", \"Chronos-lo-80\"]\n",
"].reset_index(drop=True)"
"eth_fcst_missing=fcst_df[(fcst_df.unique_id=='ETH')]\\\n",
" [['unique_id','ds','Chronos','Chronos-lo-80']]\\\n",
" .reset_index(drop=True)"
]
},
{
Expand Down Expand Up @@ -1522,9 +1515,9 @@
}
],
"source": [
"compare = eth_fcst_normal.merge(eth_fcst_missing, on=[\"ds\", \"unique_id\"])\n",
"compare[\"dif\"] = abs(compare[\"Chronos_x\"] - compare[\"Chronos_y\"])\n",
"print(compare[\"dif\"].sum())"
"compare=eth_fcst_normal.merge(eth_fcst_missing,on=['ds','unique_id'])\n",
"compare['dif']=abs(compare['Chronos_x']-compare['Chronos_y'])\n",
"print(compare['dif'].sum())"
]
},
{
Expand Down
8 changes: 1 addition & 7 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,12 @@ nav:
- examples/agent-quickstart.ipynb
- examples/llm-providers.ipynb
- examples/aws-bedrock.ipynb
- examples/google-llms.ipynb
- examples/forecaster-quickstart.ipynb
- examples/anomaly-detection-forecaster-quickstart.ipynb
- examples/ts-foundation-models-comparison-quickstart.ipynb
- examples/gift-eval.ipynb
- examples/chronos-family.ipynb
- examples/cryptocurrency-quickstart.ipynb
- examples/sktime.ipynb
- examples/patchtst-fm.ipynb
- examples/crytpocurrency-quickstart.ipynb
- Experiments:
- experiments/gift-eval.md
- experiments/fev.md
Expand All @@ -46,12 +43,10 @@ nav:
- api/models/ml.md
- api/models/neural.md
- api/models/ensembles.md
- api/models/adapters/adapters.md
- api/models/utils/forecaster.md
- api/gift-eval/gift-eval.md
- Changelogs:
- changelogs/index.md
- changelogs/v0.0.23.md
- changelogs/v0.0.22.md
- changelogs/v0.0.21.md
- changelogs/v0.0.20.md
Expand All @@ -74,7 +69,6 @@ nav:

theme:
name: "material"
custom_dir: docs/overrides
logo: https://timecopilot.s3.amazonaws.com/public/logos/logo-white.svg
favicon: https://timecopilot.s3.amazonaws.com/public/logos/favicon-white.svg
palette:
Expand Down
8 changes: 3 additions & 5 deletions timecopilot/forecaster.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,9 @@ def _call_models(
res_df_model = fn(**known_kwargs, **kwargs)
res_df_model = res_df_model.rename(
columns={
col: (
col.replace(self.fallback_model.alias, model.alias)
if col.startswith(self.fallback_model.alias)
else col
)
col: col.replace(self.fallback_model.alias, model.alias)
if col.startswith(self.fallback_model.alias)
else col
for col in res_df_model.columns
}
)
Expand Down
107 changes: 105 additions & 2 deletions timecopilot/models/utils/gluonts_forecaster.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,90 @@

from .forecaster import Forecaster, QuantileConverter

_COARSE_FREQ_PREFIXES = ("B", "D", "W", "M", "Q", "A", "Y")


def _maybe_align_for_gluonts(
df: pd.DataFrame,
freq: str,
*,
id_col: str = "unique_id",
ts_col: str = "ds",
coarse_only: bool = True,
verbose: bool = True,
):
"""
GluonTS requires a regular grid. If timestamps are consistently "end-of-bin"
(e.g., 23:59:59 for daily close), GluonTS will anchor to the bin boundary
(midnight) and your output ds can look "shifted".

This function:
- builds an internal copy aligned to the freq grid (floor to bin boundary)
- stores the most common within-bin offset per series (anchor)
- returns (df_gluonts, anchor_by_id) so you can restore the offset in outputs

Returns:
df_gluonts: df aligned to grid (or original df if no action)
anchor: Series[timedelta] indexed by unique_id, or None if not applied
"""
f = str(freq)

if coarse_only and not f.startswith(_COARSE_FREQ_PREFIXES):
return df, None

offset = pd.tseries.frequencies.to_offset(f)

g = df.sort_values([id_col, ts_col]).copy(deep=False)
ds = pd.to_datetime(g[ts_col])

# grid boundary for each timestamp, given the offset
base = ds.dt.floor(offset)
within = ds - base # timedelta inside the bin (time-of-day for daily)

# use the anchor provided function
anchor = within.groupby(g[id_col]).apply(_compute_anchor_with_guard).dropna()

if anchor.empty:
return df, None

# apply only if any series is non-zero offset
if not (anchor != pd.Timedelta(0)).any():
return df, None

if verbose:
print(
f"[gluonts-align] Applied internal time alignment for freq='{f}': "
f"removed dominant within-bin offset(s) "
f"{', '.join(str(o) for o in anchor.value_counts().index)} "
f"from {anchor.shape[0]}/{g[id_col].nunique()} series; "
f"input data unchanged, offsets restored in forecasts."
)

df_gluonts = df.copy(deep=False)
df_gluonts[ts_col] = pd.to_datetime(df_gluonts[ts_col]).dt.floor(offset)

return df_gluonts, anchor


def _compute_anchor_with_guard(
within: pd.Series, min_frac: float = 0.8, min_count: int = 10
):
vc = within.value_counts()
top_offset = vc.index[0]
top_count = vc.iloc[0]
total = vc.sum()

if top_offset == pd.Timedelta(0):
return None

if top_count < min_count:
return None

if top_count / total < min_frac:
return None

return top_offset


def fix_freq(freq: str) -> str:
# see https://github.com/awslabs/gluonts/pull/2462/files
Expand Down Expand Up @@ -166,24 +250,43 @@ def forecast(
df = maybe_convert_col_to_float32(df, "y")
freq = self._maybe_infer_freq(df, freq)
qc = QuantileConverter(level=level, quantiles=quantiles)

df_gluonts, anchor = _maybe_align_for_gluonts(
df,
fix_freq(freq),
coarse_only=True, # set False if you want this
# to handle hourly/minutely end-stamps too
verbose=True,
)

gluonts_dataset = PandasDataset.from_long_dataframe(
df.copy(deep=False),
target="y",
item_id="unique_id",
timestamp="ds",
freq=fix_freq(freq),
)

with self.get_predictor(prediction_length=h) as predictor:
fcsts = predictor.predict(
gluonts_dataset,
num_samples=self.num_samples,
)

fcsts_list = list(fcsts) # materialize iterator once

fcst_df = self.gluonts_fcsts_to_df(
fcsts,
freq=freq,
fcsts_list,
freq=fix_freq(freq),
model_name=self.alias,
quantiles=qc.quantiles,
)

if anchor is not None:
fcst_df["ds"] = pd.to_datetime(fcst_df["ds"]) + fcst_df["unique_id"].map(
anchor
)

if qc.quantiles is not None:
fcst_df = qc.maybe_convert_quantiles_to_level(
fcst_df,
Expand Down