Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions internal/controller/nodereadinessrule_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import (
"context"
"errors"
"fmt"
"strings"
"sync"
Expand Down Expand Up @@ -144,6 +145,7 @@
r.Controller.updateRuleCache(ctx, rule)

// Handle dry run
var processErr error
if rule.Spec.DryRun {
if err := r.Controller.processDryRun(ctx, rule, nodeList); err != nil {
log.Error(err, "Failed to process dry run", "rule", rule.Name)
Expand All @@ -153,14 +155,15 @@
// Clear previous dry run results
rule.Status.DryRunResults = readinessv1alpha1.DryRunResults{}

// Process all applicable nodes for this rule
// Process all applicable nodes for this rule; save error so status
// is still updated before retrying.
if err := r.Controller.processAllNodesForRule(ctx, rule, nodeList); err != nil {
log.Error(err, "Failed to process nodes for rule", "rule", rule.Name)
return ctrl.Result{RequeueAfter: time.Minute}, err
processErr = err
}
}

// Update rule status
// Update rule status regardless of evaluation errors above
if err := r.Controller.updateRuleStatus(ctx, rule); err != nil {
log.Error(err, "Failed to update rule status", "rule", rule.Name)
return ctrl.Result{RequeueAfter: time.Minute}, err
Expand All @@ -172,6 +175,9 @@
return ctrl.Result{RequeueAfter: time.Minute}, err
}

if processErr != nil {
return ctrl.Result{RequeueAfter: time.Minute}, processErr
}
return ctrl.Result{}, nil
}

Expand Down Expand Up @@ -258,13 +264,14 @@

// processAllNodesForRule processes all nodes when a rule changes.
//
//nolint:unparam // Keep error return for future extensibility and API stability.

Check failure on line 267 in internal/controller/nodereadinessrule_controller.go

View workflow job for this annotation

GitHub Actions / Run on Ubuntu

directive `//nolint:unparam // Keep error return for future extensibility and API stability.` is unused for linter "unparam" (nolintlint)
func (r *RuleReadinessController) processAllNodesForRule(ctx context.Context, rule *readinessv1alpha1.NodeReadinessRule, nodeList *corev1.NodeList) error {
log := ctrl.LoggerFrom(ctx)

log.Info("Processing all nodes for rule", "rule", rule.Name, "totalNodes", len(nodeList.Items))

var appliedNodes []string
var errs []error
for _, node := range nodeList.Items {
if r.ruleAppliesTo(ctx, rule, &node) {
appliedNodes = append(appliedNodes, node.Name)
Expand All @@ -274,6 +281,7 @@
log.Error(err, "Failed to evaluate node for rule", "rule", rule.Name, "node", node.Name)
r.recordNodeFailure(rule, node.Name, "EvaluationError", err.Error())
metrics.Failures.WithLabelValues(rule.Name, "EvaluationError").Inc()
errs = append(errs, err)
}
}
}
Expand All @@ -287,7 +295,7 @@
}

log.Info("Completed processing nodes for rule", "rule", rule.Name, "processedCount", len(appliedNodes))
return nil
return errors.Join(errs...)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

returning an error will keep the caller function(in this case this) in an infinite loop. Other rules will not be evaluated too. It will also not update the status(here) till the error is fixed.

Copy link
Copy Markdown
Author

@dorodb-web22 dorodb-web22 May 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the early return was skipping updateRuleStatus and cleanupDeletedNodes. i ffixed by saving the error, letting status update complete, then returning the error at the end to trigger requeue.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

requeue on transient errors like API conflicts or patch failures.

can you also add unit tests to capture these gaps that this PR aims to address?

}

// evaluateRuleForNode evaluates a single rule against a single node.
Expand Down Expand Up @@ -586,7 +594,7 @@
func (r *RuleReadinessController) cleanupTaintsForRule(ctx context.Context, rule *readinessv1alpha1.NodeReadinessRule, nodeList *corev1.NodeList) error {
log := ctrl.LoggerFrom(ctx)

var errors []string

Check failure on line 597 in internal/controller/nodereadinessrule_controller.go

View workflow job for this annotation

GitHub Actions / Run on Ubuntu

import-shadowing: The name 'errors' shadows an import name (revive)
for _, node := range nodeList.Items {
if !r.ruleAppliesTo(ctx, rule, &node) {
continue
Expand Down Expand Up @@ -623,7 +631,7 @@
}

// Clean up nodes that matched old selector but not new selector
var errors []string

Check failure on line 634 in internal/controller/nodereadinessrule_controller.go

View workflow job for this annotation

GitHub Actions / Run on Ubuntu

import-shadowing: The name 'errors' shadows an import name (revive)
for _, node := range nodeList.Items {
// Check if node matched old selector
matchedOld := false
Expand Down
Loading