Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 75 additions & 4 deletions internal/controller/node_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,13 @@ func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
func (r *RuleReadinessController) processNodeAgainstAllRules(ctx context.Context, node *corev1.Node) error {
log := ctrl.LoggerFrom(ctx)

// Get all known (cached) applicable rules for this node
applicableRules := r.getApplicableRulesForNode(ctx, node)
// Get all known cached rules that either currently match this node or
// previously recorded status for it and may need cleanup.
rules := r.getRulesForNodeReconcile(ctx, node)
var errs []error
log.Info("Processing node against rules", "node", node.Name, "ruleCount", len(applicableRules))
log.Info("Processing node against rules", "node", node.Name, "ruleCount", len(rules))

for _, rule := range applicableRules {
for _, rule := range rules {
log.V(4).Info("Processing rule from cache",
"node", node.Name,
"rule", rule.Name,
Expand All @@ -128,6 +129,15 @@ func (r *RuleReadinessController) processNodeAgainstAllRules(ctx context.Context
continue
}

if !r.ruleAppliesTo(ctx, rule, node) {
if err := r.cleanupNodeForUnmatchedRule(ctx, rule, node); err != nil {
log.Error(err, "Failed to cleanup node for unmatched rule",
"node", node.Name, "rule", rule.Name)
errs = append(errs, err)
}
continue
}

// Skip if bootstrap-only and already completed
if r.isBootstrapCompleted(ctx, node.Name, rule.Name) && rule.Spec.EnforcementMode == readinessv1alpha1.EnforcementModeBootstrapOnly {
log.Info("Skipping bootstrap-only rule - already completed",
Expand Down Expand Up @@ -228,6 +238,67 @@ func (r *RuleReadinessController) processNodeAgainstAllRules(ctx context.Context
return errors.Join(errs...)
}

func (r *RuleReadinessController) cleanupNodeForUnmatchedRule(ctx context.Context, rule *readinessv1alpha1.NodeReadinessRule, node *corev1.Node) error {
log := ctrl.LoggerFrom(ctx)
if !ruleHasNodeStatus(rule, node.Name) {
return nil
}

if r.hasTaintBySpec(node, rule.Spec.Taint) {
log.Info("Removing taint from node that no longer matches rule selector",
"node", node.Name,
"rule", rule.Name,
"taint", rule.Spec.Taint.Key)
if err := r.removeTaintBySpec(ctx, node, rule.Spec.Taint, rule.Name); err != nil {
metrics.Failures.WithLabelValues(rule.Name, "RemoveTaintError").Inc()
return fmt.Errorf("failed to remove taint after selector mismatch: %w", err)
}
metrics.TaintOperations.WithLabelValues(rule.Name, "remove").Inc()
}

if err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
latestRule := &readinessv1alpha1.NodeReadinessRule{}
if err := r.Get(ctx, client.ObjectKey{Name: rule.Name}, latestRule); err != nil {
return err
}

patch := client.MergeFrom(latestRule.DeepCopy())
removeNodeFromRuleStatus(latestRule, node.Name)
return r.Status().Patch(ctx, latestRule, patch)
}); err != nil {
return err
}

removeNodeFromRuleStatus(rule, node.Name)
return nil
}

func removeNodeFromRuleStatus(rule *readinessv1alpha1.NodeReadinessRule, nodeName string) {
var appliedNodes []string
for _, appliedNode := range rule.Status.AppliedNodes {
if appliedNode != nodeName {
appliedNodes = append(appliedNodes, appliedNode)
}
}
rule.Status.AppliedNodes = appliedNodes

var nodeEvaluations []readinessv1alpha1.NodeEvaluation
for _, evaluation := range rule.Status.NodeEvaluations {
if evaluation.NodeName != nodeName {
nodeEvaluations = append(nodeEvaluations, evaluation)
}
}
rule.Status.NodeEvaluations = nodeEvaluations

var failedNodes []readinessv1alpha1.NodeFailure
for _, failure := range rule.Status.FailedNodes {
if failure.NodeName != nodeName {
failedNodes = append(failedNodes, failure)
}
}
rule.Status.FailedNodes = failedNodes
}

// getConditionStatus gets the status of a condition on a node.
func (r *RuleReadinessController) getConditionStatus(node *corev1.Node, conditionType string) corev1.ConditionStatus {
for _, condition := range node.Status.Conditions {
Expand Down
54 changes: 54 additions & 0 deletions internal/controller/node_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,60 @@ var _ = Describe("Node Controller", func() {
return false
}, time.Second*5).Should(BeTrue())
})

It("should remove a managed taint when node labels stop matching the rule selector", func() {
_, err := nodeReconciler.Reconcile(ctx, reconcile.Request{NamespacedName: namespacedName})
Expect(err).NotTo(HaveOccurred())

By("verifying the rule recorded this node as evaluated")
Eventually(func() bool {
updatedRule := &nodereadinessiov1alpha1.NodeReadinessRule{}
if err := k8sClient.Get(ctx, types.NamespacedName{Name: ruleName}, updatedRule); err != nil {
return false
}
for _, evaluation := range updatedRule.Status.NodeEvaluations {
if evaluation.NodeName == nodeName {
return true
}
}
return false
}, time.Second*5).Should(BeTrue())

By("changing the node labels so the rule no longer applies")
updatedNode := &corev1.Node{}
Expect(k8sClient.Get(ctx, namespacedName, updatedNode)).To(Succeed())
updatedNode.Labels = map[string]string{"env": "other"}
Expect(k8sClient.Update(ctx, updatedNode)).To(Succeed())

_, err = nodeReconciler.Reconcile(ctx, reconcile.Request{NamespacedName: namespacedName})
Expect(err).NotTo(HaveOccurred())

By("verifying the taint managed by the rule is removed")
Eventually(func() bool {
recheckedNode := &corev1.Node{}
_ = k8sClient.Get(ctx, namespacedName, recheckedNode)
for _, taint := range recheckedNode.Spec.Taints {
if taint.Key == taintKey {
return true
}
}
return false
}, time.Second*5).Should(BeFalse())

By("verifying node-specific rule status is removed")
Eventually(func() bool {
updatedRule := &nodereadinessiov1alpha1.NodeReadinessRule{}
if err := k8sClient.Get(ctx, types.NamespacedName{Name: ruleName}, updatedRule); err != nil {
return false
}
for _, evaluation := range updatedRule.Status.NodeEvaluations {
if evaluation.NodeName == nodeName {
return false
}
}
return true
}, time.Second*5).Should(BeTrue())
})
})

When("a rule's node selector does not match", func() {
Expand Down
32 changes: 26 additions & 6 deletions internal/controller/nodereadinessrule_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -417,20 +417,40 @@ func (r *RuleReadinessController) updateNodeEvaluationStatus(
nodeEval.LastEvaluationTime = metav1.Now()
}

// getApplicableRulesForNode returns all rules applicable to a node.
func (r *RuleReadinessController) getApplicableRulesForNode(ctx context.Context, node *corev1.Node) []*readinessv1alpha1.NodeReadinessRule {
// getRulesForNodeReconcile returns rules applicable to a node plus rules that
// previously recorded this node and may need cleanup after label changes.
func (r *RuleReadinessController) getRulesForNodeReconcile(ctx context.Context, node *corev1.Node) []*readinessv1alpha1.NodeReadinessRule {
r.ruleCacheMutex.RLock()
defer r.ruleCacheMutex.RUnlock()

var applicableRules []*readinessv1alpha1.NodeReadinessRule
var rules []*readinessv1alpha1.NodeReadinessRule

for _, rule := range r.ruleCache {
if r.ruleAppliesTo(ctx, rule, node) {
applicableRules = append(applicableRules, rule)
if r.ruleAppliesTo(ctx, rule, node) || ruleHasNodeStatus(rule, node.Name) {
rules = append(rules, rule)
}
}

return applicableRules
return rules
}

func ruleHasNodeStatus(rule *readinessv1alpha1.NodeReadinessRule, nodeName string) bool {
for _, appliedNode := range rule.Status.AppliedNodes {
if appliedNode == nodeName {
return true
}
}
for _, evaluation := range rule.Status.NodeEvaluations {
if evaluation.NodeName == nodeName {
return true
}
}
for _, failure := range rule.Status.FailedNodes {
if failure.NodeName == nodeName {
return true
}
}
return false
}

// ruleAppliesTo checks if a rule applies to a node.
Expand Down