From 5c40403ba7e720688877932a1bb27ce8ff515982 Mon Sep 17 00:00:00 2001 From: Rawad Hossain Date: Tue, 5 May 2026 02:02:45 +0600 Subject: [PATCH] fix bootstrap-only re-taint when annotation fails Signed-off-by: Rawad Hossain --- internal/controller/node_controller.go | 10 +- .../nodereadinessrule_controller.go | 17 +- .../nodereadinessrule_controller_test.go | 145 +++++++++++++++++- 3 files changed, 164 insertions(+), 8 deletions(-) diff --git a/internal/controller/node_controller.go b/internal/controller/node_controller.go index 9dc596a..de74dbb 100644 --- a/internal/controller/node_controller.go +++ b/internal/controller/node_controller.go @@ -316,6 +316,8 @@ func (r *RuleReadinessController) removeTaintBySpec(ctx context.Context, node *c }) } +const bootstrapAnnotationKeyPrefix = "readiness.k8s.io/bootstrap-completed-" + // Bootstrap completion tracking. func (r *RuleReadinessController) isBootstrapCompleted(ctx context.Context, nodeName, ruleName string) bool { // Check node annotation @@ -324,15 +326,15 @@ func (r *RuleReadinessController) isBootstrapCompleted(ctx context.Context, node return false } - annotationKey := fmt.Sprintf("readiness.k8s.io/bootstrap-completed-%s", ruleName) + annotationKey := bootstrapAnnotationKeyPrefix + ruleName _, exists := node.Annotations[annotationKey] return exists } -func (r *RuleReadinessController) markBootstrapCompleted(ctx context.Context, nodeName, ruleName string) { +func (r *RuleReadinessController) markBootstrapCompleted(ctx context.Context, nodeName, ruleName string) error { log := ctrl.LoggerFrom(ctx) - annotationKey := fmt.Sprintf("readiness.k8s.io/bootstrap-completed-%s", ruleName) + annotationKey := bootstrapAnnotationKeyPrefix + ruleName marked := false // retry to handle conflict with concurrent node updates @@ -368,12 +370,14 @@ func (r *RuleReadinessController) markBootstrapCompleted(ctx context.Context, no switch { case err != nil: log.Error(err, "Failed to mark bootstrap completed", "node", nodeName, "rule", ruleName) + return err case marked: log.Info("Marked bootstrap completed", "node", nodeName, "rule", ruleName) metrics.BootstrapCompleted.WithLabelValues(ruleName).Inc() default: log.V(4).Info("Bootstrap already completed", "node", nodeName, "rule", ruleName) } + return nil } // recordNodeFailure records a failure for a specific node. diff --git a/internal/controller/nodereadinessrule_controller.go b/internal/controller/nodereadinessrule_controller.go index 99347e9..b29a8e4 100644 --- a/internal/controller/nodereadinessrule_controller.go +++ b/internal/controller/nodereadinessrule_controller.go @@ -271,6 +271,11 @@ func (r *RuleReadinessController) processAllNodesForRule(ctx context.Context, ru for _, node := range nodeList.Items { if r.ruleAppliesTo(ctx, rule, &node) { appliedNodes = append(appliedNodes, node.Name) + if r.isBootstrapCompleted(ctx, node.Name, rule.Name) && rule.Spec.EnforcementMode == readinessv1alpha1.EnforcementModeBootstrapOnly { + log.Info("Skipping bootstrap-only rule - already completed", + "node", node.Name, "rule", rule.Name) + continue + } log.Info("Processing node for rule", "rule", rule.Name, "node", node.Name) if err := r.evaluateRuleForNode(ctx, rule, &node); err != nil { // Log error but continue with other nodes @@ -345,7 +350,17 @@ func (r *RuleReadinessController) evaluateRuleForNode(ctx context.Context, rule // Mark bootstrap completed if bootstrap-only mode if rule.Spec.EnforcementMode == readinessv1alpha1.EnforcementModeBootstrapOnly { - r.markBootstrapCompleted(ctx, node.Name, rule.Name) + if err := r.markBootstrapCompleted(ctx, node.Name, rule.Name); err != nil { + // Taint is already removed, keep NodeEvaluation in sync with the node + var taintStatus readinessv1alpha1.TaintStatus + if r.hasTaintBySpec(node, rule.Spec.Taint) { + taintStatus = readinessv1alpha1.TaintStatusPresent + } else { + taintStatus = readinessv1alpha1.TaintStatusAbsent + } + r.updateNodeEvaluationStatus(rule, node.Name, conditionResults, taintStatus) + return fmt.Errorf("failed to mark bootstrap completed: %w", err) + } } case !shouldRemoveTaint && !currentlyHasTaint: diff --git a/internal/controller/nodereadinessrule_controller_test.go b/internal/controller/nodereadinessrule_controller_test.go index 03ebb90..8435435 100644 --- a/internal/controller/nodereadinessrule_controller_test.go +++ b/internal/controller/nodereadinessrule_controller_test.go @@ -18,6 +18,8 @@ package controller import ( "context" + "fmt" + "strings" "time" . "github.com/onsi/ginkgo/v2" @@ -31,6 +33,8 @@ import ( "k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/client" + fakeclient "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/client/interceptor" "sigs.k8s.io/controller-runtime/pkg/reconcile" nodereadinessiov1alpha1 "sigs.k8s.io/node-readiness-controller/api/v1alpha1" @@ -519,7 +523,7 @@ var _ = Describe("NodeReadinessRule Controller", func() { defer func() { _ = k8sClient.Delete(ctx, node) }() // Mark as completed - readinessController.markBootstrapCompleted(ctx, nodeName, ruleName) + _ = readinessController.markBootstrapCompleted(ctx, nodeName, ruleName) // Should now be completed Eventually(func() bool { @@ -569,7 +573,7 @@ var _ = Describe("NodeReadinessRule Controller", func() { defer func() { _ = k8sClient.Delete(ctx, node) }() // Mark bootstrap completed - readinessController.markBootstrapCompleted(ctx, nodeName, ruleName) + _ = readinessController.markBootstrapCompleted(ctx, nodeName, ruleName) // Verify annotation was added and existing annotation is preserved Eventually(func(g Gomega) { @@ -597,11 +601,144 @@ var _ = Describe("NodeReadinessRule Controller", func() { counter := metrics.BootstrapCompleted.WithLabelValues(ruleName) before := counterValue(counter) - readinessController.markBootstrapCompleted(ctx, nodeName, ruleName) - readinessController.markBootstrapCompleted(ctx, nodeName, ruleName) + _ = readinessController.markBootstrapCompleted(ctx, nodeName, ruleName) + _ = readinessController.markBootstrapCompleted(ctx, nodeName, ruleName) Expect(counterValue(counter)).To(Equal(before + 1)) }) + + It("should fail when bootstrap annotation patch fails after taint removal", func() { + nodeName := "bootstrap-annotation-patch-failure-node" + ruleName := "bootstrap-annotation-patch-failure-rule" + + testScheme := runtime.NewScheme() + Expect(corev1.AddToScheme(testScheme)).To(Succeed()) + Expect(nodereadinessiov1alpha1.AddToScheme(testScheme)).To(Succeed()) + + // Node is Ready=True and has the rule taint, so removal path is taken + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + Labels: map[string]string{"test": "bootstrap-annotation-patch-failure"}, + }, + Spec: corev1.NodeSpec{ + Taints: []corev1.Taint{ + {Key: "readiness.k8s.io/bootstrap-annotation-patch-failure", Effect: corev1.TaintEffectNoSchedule}, + }, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{{Type: "Ready", Status: corev1.ConditionTrue}}, + }, + } + + rule := &nodereadinessiov1alpha1.NodeReadinessRule{ + ObjectMeta: metav1.ObjectMeta{Name: ruleName}, + Spec: nodereadinessiov1alpha1.NodeReadinessRuleSpec{ + Conditions: []nodereadinessiov1alpha1.ConditionRequirement{ + {Type: "Ready", RequiredStatus: corev1.ConditionTrue}, + }, + Taint: corev1.Taint{ + Key: "readiness.k8s.io/bootstrap-annotation-patch-failure", + Effect: corev1.TaintEffectNoSchedule, + }, + EnforcementMode: nodereadinessiov1alpha1.EnforcementModeBootstrapOnly, + NodeSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"test": "bootstrap-annotation-patch-failure"}, + }, + }, + } + + fc := fakeclient.NewClientBuilder(). + WithScheme(testScheme). + WithObjects(node). + WithInterceptorFuncs(interceptor.Funcs{ + Patch: func(ctx context.Context, c client.WithWatch, obj client.Object, patch client.Patch, opts ...client.PatchOption) error { + if _, ok := obj.(*corev1.Node); !ok { + return c.Patch(ctx, obj, patch, opts...) + } + data, err := patch.Data(obj) + if err != nil { + return c.Patch(ctx, obj, patch, opts...) + } + if strings.Contains(string(data), bootstrapAnnotationKeyPrefix) { + return fmt.Errorf("simulated bootstrap annotation patch failure") + } + return c.Patch(ctx, obj, patch, opts...) + }, + }). + Build() + + testController := &RuleReadinessController{ + Client: fc, + Scheme: testScheme, + clientset: fake.NewSimpleClientset(), + ruleCache: make(map[string]*nodereadinessiov1alpha1.NodeReadinessRule), + EventRecorder: record.NewFakeRecorder(10), + } + + nodeForEval := &corev1.Node{} + Expect(fc.Get(ctx, types.NamespacedName{Name: nodeName}, nodeForEval)).To(Succeed()) + err := testController.evaluateRuleForNode(ctx, rule, nodeForEval) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("failed to mark bootstrap completed")) + + updated := &corev1.Node{} + Expect(fc.Get(ctx, types.NamespacedName{Name: nodeName}, updated)).To(Succeed()) + Expect(testController.hasTaintBySpec(updated, rule.Spec.Taint)).To(BeFalse()) + _, hasAnnot := updated.Annotations[bootstrapAnnotationKeyPrefix+ruleName] + Expect(hasAnnot).To(BeFalse()) + }) + + It("should skip evaluation when bootstrap is already completed", func() { + nodeName := "bootstrap-already-completed-node" + ruleName := "bootstrap-already-completed-rule" + + rule := &nodereadinessiov1alpha1.NodeReadinessRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: ruleName, + Finalizers: []string{finalizerName}, + }, + Spec: nodereadinessiov1alpha1.NodeReadinessRuleSpec{ + Conditions: []nodereadinessiov1alpha1.ConditionRequirement{ + {Type: "Ready", RequiredStatus: corev1.ConditionTrue}, + }, + Taint: corev1.Taint{ + Key: "readiness.k8s.io/bootstrap-already-completed", + Effect: corev1.TaintEffectNoSchedule, + }, + EnforcementMode: nodereadinessiov1alpha1.EnforcementModeBootstrapOnly, + NodeSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"test": "bootstrap-already-completed"}, + }, + }, + } + Expect(k8sClient.Create(ctx, rule)).To(Succeed()) + defer func() { _ = k8sClient.Delete(ctx, rule) }() + + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + Labels: map[string]string{"test": "bootstrap-already-completed"}, + Annotations: map[string]string{ + bootstrapAnnotationKeyPrefix + ruleName: "true", + }, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{{Type: "Ready", Status: corev1.ConditionFalse}}, + }, + } + Expect(k8sClient.Create(ctx, node)).To(Succeed()) + defer func() { _ = k8sClient.Delete(ctx, node) }() + + readinessController.updateRuleCache(ctx, rule) + Expect(readinessController.processAllNodesForRule(ctx, rule)).To(Succeed()) + + Expect(rule.Status.AppliedNodes).To(ContainElement(nodeName)) + updatedNode := &corev1.Node{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: nodeName}, updatedNode)).To(Succeed()) + // Check only for the rule taint, envtest adds node.kubernetes.io/not-ready when Ready=False + Expect(readinessController.hasTaintBySpec(updatedNode, rule.Spec.Taint)).To(BeFalse()) + }) }) Context("when a new rule is created", func() {