Nerogar · O-J1 · Feb 7, 2026 · Feb 7, 2026 · Feb 7, 2026 · Feb 7, 2026
diff --git a/modules/trainer/BaseTrainer.py b/modules/trainer/BaseTrainer.py
@@ -97,7 +97,17 @@ def _start_tensorboard(self):
         if self.config.tensorboard_expose:
             tensorboard_args.append("--bind_all")
 
-        self.tensorboard_subprocess = subprocess.Popen(tensorboard_args)
+
+        self.tensorboard_subprocess = subprocess.Popen(
+            tensorboard_args, stderr=subprocess.DEVNULL,
+        )
 
     def _stop_tensorboard(self):
-        self.tensorboard_subprocess.kill()
+        if hasattr(self, 'tensorboard_subprocess') and self.tensorboard_subprocess:
+            try:
+                self.tensorboard_subprocess.terminate()
+                self.tensorboard_subprocess.wait(timeout=5)
+            except subprocess.TimeoutExpired:
+                self.tensorboard_subprocess.kill()
+            except Exception:
+                pass
diff --git a/modules/trainer/CloudTrainer.py b/modules/trainer/CloudTrainer.py
@@ -29,7 +29,7 @@ def __init__(self, config: TrainConfig, callbacks: TrainCallbacks, commands: Tra
 
         tensorboard_log_dir = os.path.join(config.workspace_dir, "tensorboard")
         os.makedirs(Path(tensorboard_log_dir).absolute(), exist_ok=True)
-        if config.tensorboard and not config.cloud.tensorboard_tunnel and not config.tensorboard_always_on:
+        if config.tensorboard_is_train_only and not config.cloud.tensorboard_tunnel:
             super()._start_tensorboard()
 
         match config.cloud.type:
@@ -113,7 +113,7 @@ def train(self):
 
     def end(self):
         try:
-            if self.config.tensorboard and not self.config.cloud.tensorboard_tunnel and not self.config.tensorboard_always_on:
+            if self.config.tensorboard_is_train_only and not self.config.cloud.tensorboard_tunnel:
                 super()._stop_tensorboard()
 
             if self.config.cloud.delete_workspace and not self.error_caught and not self.commands.get_stop_command():

diff --git a/modules/trainer/GenericTrainer.py b/modules/trainer/GenericTrainer.py
@@ -68,10 +68,13 @@ def __init__(self, config: TrainConfig, callbacks: TrainCallbacks, commands: Tra
         super().__init__(config, callbacks, commands)
 
         if multi.is_master():
-            tensorboard_log_dir = os.path.join(config.workspace_dir, "tensorboard")
-            os.makedirs(Path(tensorboard_log_dir).absolute(), exist_ok=True)
-            self.tensorboard = SummaryWriter(os.path.join(tensorboard_log_dir, f"{config.save_filename_prefix}{get_string_timestamp()}"))
-            if config.tensorboard and not config.tensorboard_always_on:
+            if config.tensorboard_enabled:
+                tensorboard_log_dir = os.path.join(config.workspace_dir, "tensorboard")
+                os.makedirs(Path(tensorboard_log_dir).absolute(), exist_ok=True)
+                self.tensorboard = SummaryWriter(os.path.join(tensorboard_log_dir, f"{config.save_filename_prefix}{get_string_timestamp()}"))
+            else:
+                self.tensorboard = None
+            if config.tensorboard_is_train_only:
                 super()._start_tensorboard()
 
         self.model = None
@@ -183,17 +186,20 @@ def __prune_backups(self, backups_to_keep: int):
         backup_dirpath = os.path.join(self.config.workspace_dir, "backup")
         if os.path.exists(backup_dirpath):
             backup_directories = sorted(
-                [dirpath for dirpath in os.listdir(backup_dirpath) if
-                 os.path.isdir(os.path.join(backup_dirpath, dirpath))],
+                [name for name in os.listdir(backup_dirpath) if
+                 os.path.isdir(os.path.join(backup_dirpath, name))],
+                key=lambda n: TrainConfig._extract_backup_datetime(
+                    os.path.join(backup_dirpath, n), n
+                ),
                 reverse=True,
             )
 
-            for dirpath in backup_directories[backups_to_keep:]:
-                dirpath = os.path.join(backup_dirpath, dirpath)
+            for name in backup_directories[backups_to_keep:]:
+                full = os.path.join(backup_dirpath, name)
                 try:
-                    shutil.rmtree(dirpath)
+                    shutil.rmtree(full)
                 except Exception:
-                    print(f"Could not delete old rolling backup {dirpath}")
+                    print(f"Could not delete old rolling backup {full}")
 
         return
 
@@ -240,7 +246,7 @@ def __sample_loop(
                 )
 
                 def on_sample_default(sampler_output: ModelSamplerOutput):
-                    if self.config.samples_to_tensorboard and sampler_output.file_type == FileType.IMAGE:
+                    if self.config.samples_to_tensorboard and self.tensorboard is not None and sampler_output.file_type == FileType.IMAGE:
                         self.tensorboard.add_image(
                             f"sample{str(i)} - {safe_prompt}", pil_to_tensor(sampler_output.data),  # noqa: B023
                             train_progress.global_step
@@ -400,18 +406,20 @@ def __validate(self, train_progress: TrainProgress):
             for concept_seed, total_loss in accumulated_loss_per_concept.items():
                 average_loss = total_loss / concept_counts[concept_seed]
 
-                self.tensorboard.add_scalar(f"loss/validation_step/{mapping_seed_to_label[concept_seed]}",
-                                            average_loss,
-                                            train_progress.global_step)
+                if self.tensorboard is not None:
+                    self.tensorboard.add_scalar(f"loss/validation_step/{mapping_seed_to_label[concept_seed]}",
+                                                average_loss,
+                                                train_progress.global_step)
 
             if len(concept_counts) > 1:
                 total_loss = sum(accumulated_loss_per_concept[key] for key in concept_counts)
                 total_count = sum(concept_counts[key] for key in concept_counts)
                 total_average_loss = total_loss / total_count
 
-                self.tensorboard.add_scalar("loss/validation_step/total_average",
-                                            total_average_loss,
-                                            train_progress.global_step)
+                if self.tensorboard is not None:
+                    self.tensorboard.add_scalar("loss/validation_step/total_average",
+                                                total_average_loss,
+                                                train_progress.global_step)
 
     def __save_backup_config(self, backup_path):
         config_path = os.path.join(backup_path, "onetrainer_config")
@@ -433,7 +441,8 @@ def __backup(self, train_progress: TrainProgress, print_msg: bool = True, print_
 
         self.callbacks.on_update_status("Creating backup")
 
-        backup_name = f"{get_string_timestamp()}-backup-{train_progress.filename_string()}"
+        safe_prefix = path_util.safe_filename(self.config.save_filename_prefix, max_length=None)
+        backup_name = f"{safe_prefix}{get_string_timestamp()}-backup-{train_progress.filename_string()}"
         backup_path = os.path.join(self.config.workspace_dir, "backup", backup_name)
 
         # Special case for schedule-free optimizers.
@@ -480,10 +489,11 @@ def __save(self, train_progress: TrainProgress, print_msg: bool = True, print_cb
 
         self.callbacks.on_update_status("Saving")
 
+        safe_prefix = path_util.safe_filename(self.config.save_filename_prefix, max_length=None)
         save_path = os.path.join(
             self.config.workspace_dir,
             "save",
-            f"{self.config.save_filename_prefix}{get_string_timestamp()}-save-{train_progress.filename_string()}{self.config.output_model_format.file_extension()}"
+            f"{safe_prefix}{get_string_timestamp()}-save-{train_progress.filename_string()}{self.config.output_model_format.file_extension()}"
         )
         if print_msg:
             print_cb("Saving " + save_path)
@@ -784,15 +794,17 @@ def sample_commands_fun():
                         has_gradient = False
 
                         if multi.is_master():
-                            self.model_setup.report_to_tensorboard(
-                                self.model, self.config, lr_scheduler, self.tensorboard
-                            )
+                            if self.tensorboard is not None:
+                                self.model_setup.report_to_tensorboard(
+                                    self.model, self.config, lr_scheduler, self.tensorboard
+                                )
 
                             accumulated_loss_cpu = accumulated_loss.item()
                             if math.isnan(accumulated_loss_cpu):
                                 raise RuntimeError("Training loss became NaN. This may be due to invalid parameters, precision issues, or a bug in the loss computation.")
 
-                            self.tensorboard.add_scalar("loss/train_step",accumulated_loss_cpu , train_progress.global_step)
+                            if self.tensorboard is not None:
+                                self.tensorboard.add_scalar("loss/train_step",accumulated_loss_cpu , train_progress.global_step)
                             ema_loss = ema_loss or accumulated_loss_cpu
                             ema_loss_steps += 1
                             ema_loss_decay = min(0.99, 1 - (1 / ema_loss_steps))
@@ -801,19 +813,21 @@ def sample_commands_fun():
                                 'loss': accumulated_loss_cpu,
                                 'smooth loss': ema_loss,
                             })
-                            self.tensorboard.add_scalar("smooth_loss/train_step", ema_loss, train_progress.global_step)
+                            if self.tensorboard is not None:
+                                self.tensorboard.add_scalar("smooth_loss/train_step", ema_loss, train_progress.global_step)
 
                         accumulated_loss = 0.0
                         self.model_setup.after_optimizer_step(self.model, self.config, train_progress)
 
                         if self.model.ema:
                             assert multi.is_master()
                             update_step = train_progress.global_step // self.config.gradient_accumulation_steps
-                            self.tensorboard.add_scalar(
-                                "ema_decay",
-                                self.model.ema.get_current_decay(update_step),
-                                train_progress.global_step
-                            )
+                            if self.tensorboard is not None:
+                                self.tensorboard.add_scalar(
+                                    "ema_decay",
+                                    self.model.ema.get_current_decay(update_step),
+                                    train_progress.global_step
+                                )
                             self.model.ema.step(
                                 self.parameters,
                                 update_step
@@ -874,9 +888,10 @@ def end(self):
             self.model.to(self.temp_device)
 
         if multi.is_master():
-            self.tensorboard.close()
+            if self.tensorboard is not None:
+                self.tensorboard.close()
 
-            if self.config.tensorboard and not self.config.tensorboard_always_on:
+            if self.config.tensorboard_is_train_only:
                 super()._stop_tensorboard()
 
         for handle in self.grad_hook_handles:

diff --git a/modules/ui/MuonAdamWindow.py b/modules/ui/MuonAdamWindow.py
@@ -105,6 +105,6 @@ def create_adam_params_ui(self, master):
             components.label(master, row, col, title, tooltip=tooltip)
 
             if param_type != 'bool':
-                components.entry(master, row, col + 1, self.adam_ui_state, key)
+                components.entry(master, row, col + 1, self.adam_ui_state, key, allow_negative=True)
             else:
                 components.switch(master, row, col + 1, self.adam_ui_state, key)
diff --git a/modules/ui/OptimizerParamsWindow.py b/modules/ui/OptimizerParamsWindow.py
@@ -237,7 +237,7 @@ def create_dynamic_ui(
                 self.toggle_muon_adam_button()
             elif type != 'bool':
                 components.entry(master, row, col + 1, self.optimizer_ui_state, key,
-                                 command=self.update_user_pref)
+                                 command=self.update_user_pref, allow_negative=True)
             else:
                 components.switch(master, row, col + 1, self.optimizer_ui_state, key,
                                   command=self.update_user_pref)