From e5fc75eabf506f933946c9d1090ad522d5f28bdc Mon Sep 17 00:00:00 2001 From: Ondrej Lukas Date: Thu, 5 Mar 2026 18:38:44 +0100 Subject: [PATCH 1/2] Add check for negative rewards --- netsecgame/game/configuration_manager.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/netsecgame/game/configuration_manager.py b/netsecgame/game/configuration_manager.py index 4a5ff752..1e5b58b5 100644 --- a/netsecgame/game/configuration_manager.py +++ b/netsecgame/game/configuration_manager.py @@ -134,7 +134,14 @@ def get_rewards(self, reward_names: List[str] = ["step", "success", "fail", "fal """Returns the rewards configuration.""" if not self._parser: raise RuntimeError("Configuration not loaded.") - return self._parser.get_rewards(reward_names, default_value) + rewards = self._parser.get_rewards(reward_names, default_value) + if rewards.get("fail") > 0: + self.logger.warning("Fail reward is positive. This is not recommended.") + if rewards.get("false_positive") > 0: + self.logger.warning("False positive reward is positive. This is not recommended.") + if rewards.get("success") < 0: + self.logger.warning("Success reward is negative. This is not recommended.") + return rewards def get_use_dynamic_ips(self, default_value: bool = False) -> bool: if not self._parser: From 010526d52596af7dc3564587f98af83c51b63f33 Mon Sep 17 00:00:00 2001 From: Ondrej Lukas Date: Thu, 5 Mar 2026 18:40:12 +0100 Subject: [PATCH 2/2] Fix the sign of FP reward (assume non-positive value) --- netsecgame/game/coordinator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/netsecgame/game/coordinator.py b/netsecgame/game/coordinator.py index 904e3444..38af94e7 100644 --- a/netsecgame/game/coordinator.py +++ b/netsecgame/game/coordinator.py @@ -560,7 +560,7 @@ async def _assign_rewards_episode_end(self): self._agent_status[agent] = AgentStatus.Fail # dicrease the reward for false positives self.logger.debug(f"Processing false positives for agent {agent}: {self._agent_false_positives[agent]}") - self._agent_rewards[agent] -= self._agent_false_positives[agent] * self._rewards["false_positive"] + self._agent_rewards[agent] += self._agent_false_positives[agent] * self._rewards["false_positive"] # clear the episode end event self._episode_end_event.clear() # notify all waiting agents