diff --git a/internal/controller/node_controller.go b/internal/controller/node_controller.go index fc04d27..1d5d83a 100644 --- a/internal/controller/node_controller.go +++ b/internal/controller/node_controller.go @@ -148,6 +148,8 @@ func (r *RuleReadinessController) processNodeAgainstAllRules(ctx context.Context "node", node.Name, "rule", rule.Name) // Continue with other rules even if one fails r.recordNodeFailure(rule, node.Name, "EvaluationError", err.Error()) + } else { + r.clearNodeFailure(rule, node.Name) } // Persist the rule status diff --git a/internal/controller/nodereadinessrule_controller.go b/internal/controller/nodereadinessrule_controller.go index 99347e9..2020d4f 100644 --- a/internal/controller/nodereadinessrule_controller.go +++ b/internal/controller/nodereadinessrule_controller.go @@ -277,6 +277,8 @@ func (r *RuleReadinessController) processAllNodesForRule(ctx context.Context, ru log.Error(err, "Failed to evaluate node for rule", "rule", rule.Name, "node", node.Name) r.recordNodeFailure(rule, node.Name, "EvaluationError", err.Error()) metrics.Failures.WithLabelValues(rule.Name, "EvaluationError").Inc() + } else { + r.clearNodeFailure(rule, node.Name) } } } @@ -422,7 +424,7 @@ func (r *RuleReadinessController) getApplicableRulesForNode(ctx context.Context, for _, rule := range r.ruleCache { if r.ruleAppliesTo(ctx, rule, node) { - applicableRules = append(applicableRules, rule) + applicableRules = append(applicableRules, rule.DeepCopy()) } } @@ -706,3 +708,14 @@ func (r *RuleReadinessController) getPreviousNodeEvaluation(rule *readinessv1alp } return nil } + +// clearNodeFailure removes any failure record for a specific node from the rule status. +func (r *RuleReadinessController) clearNodeFailure(rule *readinessv1alpha1.NodeReadinessRule, nodeName string) { + var failedNodes []readinessv1alpha1.NodeFailure + for _, failure := range rule.Status.FailedNodes { + if failure.NodeName != nodeName { + failedNodes = append(failedNodes, failure) + } + } + rule.Status.FailedNodes = failedNodes +}