From e5fc75eabf506f933946c9d1090ad522d5f28bdc Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Thu, 5 Mar 2026 18:38:44 +0100
Subject: [PATCH 1/2] Add check for negative rewards

---
 netsecgame/game/configuration_manager.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/netsecgame/game/configuration_manager.py b/netsecgame/game/configuration_manager.py
index 4a5ff752..1e5b58b5 100644
--- a/netsecgame/game/configuration_manager.py
+++ b/netsecgame/game/configuration_manager.py
@@ -134,7 +134,14 @@ def get_rewards(self, reward_names: List[str] = ["step", "success", "fail", "fal
         """Returns the rewards configuration."""
         if not self._parser:
             raise RuntimeError("Configuration not loaded.")
-        return self._parser.get_rewards(reward_names, default_value)
+        rewards = self._parser.get_rewards(reward_names, default_value)
+        if rewards.get("fail") > 0:
+            self.logger.warning("Fail reward is positive. This is not recommended.")
+        if rewards.get("false_positive") > 0:
+            self.logger.warning("False positive reward is positive. This is not recommended.")
+        if rewards.get("success") < 0:
+            self.logger.warning("Success reward is negative. This is not recommended.")
+        return rewards
         
     def get_use_dynamic_ips(self, default_value: bool = False) -> bool:
         if not self._parser:

From 010526d52596af7dc3564587f98af83c51b63f33 Mon Sep 17 00:00:00 2001
From: Ondrej Lukas <ondrej.lukas95@gmail.com>
Date: Thu, 5 Mar 2026 18:40:12 +0100
Subject: [PATCH 2/2] Fix the sign of FP reward (assume non-positive value)

---
 netsecgame/game/coordinator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/netsecgame/game/coordinator.py b/netsecgame/game/coordinator.py
index 904e3444..38af94e7 100644
--- a/netsecgame/game/coordinator.py
+++ b/netsecgame/game/coordinator.py
@@ -560,7 +560,7 @@ async def _assign_rewards_episode_end(self):
                         self._agent_status[agent] = AgentStatus.Fail
                     # dicrease the reward for false positives
                     self.logger.debug(f"Processing false positives for agent {agent}: {self._agent_false_positives[agent]}")
-                    self._agent_rewards[agent] -= self._agent_false_positives[agent] * self._rewards["false_positive"]
+                    self._agent_rewards[agent] += self._agent_false_positives[agent] * self._rewards["false_positive"]
             # clear the episode end event
             self._episode_end_event.clear()
             # notify all waiting agents