diff --git a/netsecgame/game/configuration_manager.py b/netsecgame/game/configuration_manager.py index 4a5ff752..1e5b58b5 100644 --- a/netsecgame/game/configuration_manager.py +++ b/netsecgame/game/configuration_manager.py @@ -134,7 +134,14 @@ def get_rewards(self, reward_names: List[str] = ["step", "success", "fail", "fal """Returns the rewards configuration.""" if not self._parser: raise RuntimeError("Configuration not loaded.") - return self._parser.get_rewards(reward_names, default_value) + rewards = self._parser.get_rewards(reward_names, default_value) + if rewards.get("fail") > 0: + self.logger.warning("Fail reward is positive. This is not recommended.") + if rewards.get("false_positive") > 0: + self.logger.warning("False positive reward is positive. This is not recommended.") + if rewards.get("success") < 0: + self.logger.warning("Success reward is negative. This is not recommended.") + return rewards def get_use_dynamic_ips(self, default_value: bool = False) -> bool: if not self._parser: diff --git a/netsecgame/game/coordinator.py b/netsecgame/game/coordinator.py index 904e3444..38af94e7 100644 --- a/netsecgame/game/coordinator.py +++ b/netsecgame/game/coordinator.py @@ -560,7 +560,7 @@ async def _assign_rewards_episode_end(self): self._agent_status[agent] = AgentStatus.Fail # dicrease the reward for false positives self.logger.debug(f"Processing false positives for agent {agent}: {self._agent_false_positives[agent]}") - self._agent_rewards[agent] -= self._agent_false_positives[agent] * self._rewards["false_positive"] + self._agent_rewards[agent] += self._agent_false_positives[agent] * self._rewards["false_positive"] # clear the episode end event self._episode_end_event.clear() # notify all waiting agents