ShinDongWoon · ShinDongWoon · Sep 25, 2025 · Sep 25, 2025 · Sep 25, 2025 · Sep 25, 2025
diff --git a/src/timesnet_forecast/models/timesnet.py b/src/timesnet_forecast/models/timesnet.py
@@ -395,12 +395,28 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         if amplitudes.dim() == 1:
             amplitudes = amplitudes.view(1, -1).expand(B, -1)
         amp = amplitudes[:, valid_indices] if amplitudes.numel() > 0 else amplitudes
-        weights_flat = F.softmax(amp, dim=1) if amp.numel() > 0 else amp
-        if weights_flat.numel() > 0:
-            weight_sum = weights_flat.sum(dim=1)
-            target = torch.ones_like(weight_sum)
-            if not torch.allclose(weight_sum, target, atol=1e-4, rtol=1e-3):
-                raise RuntimeError("Residual weights must sum to 1 per sample")
+        if amp.numel() > 0:
+            softmax_dtype = amp.dtype
+            if softmax_dtype in (torch.float16, torch.bfloat16):
+                amp_for_softmax = amp.to(dtype=torch.float32)
+                weights_float = F.softmax(amp_for_softmax, dim=1)
+            else:
+                weights_float = F.softmax(amp, dim=1)
+            if torch.any(~torch.isfinite(weights_float)):
+                raise RuntimeError(
+                    "TimesNet residual weights contain non-finite values; check input amplitudes"
+                )
+            eps = torch.finfo(weights_float.dtype).eps
+            weight_sum = weights_float.sum(dim=1, keepdim=True)
+            zero_mask = weight_sum <= eps
+            if zero_mask.any():
+                uniform = torch.full_like(weights_float, 1.0 / weights_float.size(1))
+                weights_float = torch.where(zero_mask, uniform, weights_float)
+                weight_sum = torch.where(zero_mask, torch.ones_like(weight_sum), weight_sum)
+            weights_float = weights_float / weight_sum.clamp_min(eps)
+            weights_flat = weights_float.to(dtype=amp.dtype)
+        else:
+            weights_flat = amp
         weights = weights_flat.view(B, 1, 1, -1)
         combined = (stacked * weights).sum(dim=-1)
         return x + combined
@@ -1146,7 +1162,17 @@ def forward(
             mark_slice = x_mark[:, -self.input_len :, :]
         else:
             mark_slice = None
-        enc_x_value = x[:, -self.input_len :, :]
+        enc_x_value = x[:, -self.input_len :, :].clone()
+        if enc_x_value.is_floating_point():
+            invalid_mask = ~torch.isfinite(enc_x_value)
+            if torch.any(invalid_mask):
+                enc_x_value = enc_x_value.masked_fill(invalid_mask, 0.0)
+        else:
+            invalid_mask = ~torch.isfinite(enc_x_value.to(dtype=torch.float32))
+            if torch.any(invalid_mask):
+                raise RuntimeError(
+                    "TimesNet input contains non-finite values in non-floating tensor"
+                )
         enc_x_features = enc_x_value.clone()
         self._ensure_embedding(enc_x_features, mark_slice, series_static, series_ids)
         target_steps = self.pred_len if self.mode == "direct" else self._out_steps