From 5c83d8f04d8e6774c93849c547518568d2f44958 Mon Sep 17 00:00:00 2001 From: KARMAN SINGH TALWAR Date: Thu, 14 May 2026 19:25:30 +0530 Subject: [PATCH] fix: pre-populate rule cache on first NodeReconciler pass to prevent bootstrap-only taint bypass RuleReadinessController maintains an in-memory ruleCache populated lazily as RuleReconciler processes each NodeReadinessRule. NodeReconciler derives applicable rules exclusively from this cache. Both controllers start consuming their work queues concurrently after informer sync, creating a race where node events are processed against an empty or partially-warm cache. For continuous enforcement this window is self-healing. For bootstrap-only rules it is permanent: if a node is evaluated before its applicable rule reaches the cache the taint is never applied, the bootstrap completion annotation is never written, and the admission gate is silently bypassed. Add ensureCacheWarmed to RuleReadinessController. On the first call it lists all NodeReadinessRules from the already-synced informer cache and adds any rule not yet present in ruleCache. Subsequent calls are no-ops guarded by an atomic.Bool. Existing cache entries (including rules already marked for deletion by RuleReconciler) are intentionally preserved so that in-flight state is not overwritten. Call ensureCacheWarmed at the top of NodeReconciler.Reconcile before any rule evaluation takes place. Add two integration tests covering the cold-start scenario: - verifies that a bootstrap-only taint is applied even when ruleCache is empty at reconcile time (rule discovered via warm-up) - verifies that the warm-up is not repeated on subsequent reconciles --- internal/controller/node_controller.go | 10 ++ internal/controller/node_controller_test.go | 144 ++++++++++++++++++ .../nodereadinessrule_controller.go | 44 ++++++ 3 files changed, 198 insertions(+) diff --git a/internal/controller/node_controller.go b/internal/controller/node_controller.go index a3e8aac..5c24ba7 100644 --- a/internal/controller/node_controller.go +++ b/internal/controller/node_controller.go @@ -91,6 +91,16 @@ func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. log := ctrl.LoggerFrom(ctx) log.Info("Reconciling node", "node", req.Name) + // Pre-populate the rule cache from the informer cache on the first reconcile. + // This prevents the startup race where this controller processes node events + // before RuleReconciler has reconciled existing rules into the cache. For + // bootstrap-only rules the race is non-recoverable: if the node is evaluated + // with an empty cache the taint is never applied and the bootstrap completion + // annotation is never written. + if err := r.Controller.ensureCacheWarmed(ctx); err != nil { + return ctrl.Result{}, err + } + // Fetch the node node := &corev1.Node{} if err := r.Get(ctx, req.NamespacedName, node); err != nil { diff --git a/internal/controller/node_controller_test.go b/internal/controller/node_controller_test.go index 0036873..fc60390 100644 --- a/internal/controller/node_controller_test.go +++ b/internal/controller/node_controller_test.go @@ -1007,4 +1007,148 @@ var _ = Describe("Node Controller", func() { Expect(err.Error()).To(ContainSubstring("status patch failed")) }) }) + + // These tests verify that NodeReconciler pre-populates the rule cache from the + // informer cache on its first reconcile, closing the startup race where node + // events arrive before RuleReconciler has warmed the cache. The scenario is + // particularly critical for bootstrap-only rules: if the cache is cold when a + // node is first evaluated, the taint is never applied and the bootstrap + // completion annotation is never written, permanently bypassing the admission gate. + Context("cold-start rule cache warm-up", func() { + var ( + ctx context.Context + readinessController *RuleReadinessController + nodeReconciler *NodeReconciler + coldStartNode *corev1.Node + coldStartRule *nodereadinessiov1alpha1.NodeReadinessRule + coldStartNodeName types.NamespacedName + ) + + const ( + coldTaintKey = "readiness.k8s.io/cold-start-taint" + coldConditionType = "ColdStartCondition" + ) + + BeforeEach(func() { + ctx = context.Background() + + // Intentionally do NOT call updateRuleCache — this simulates the + // NodeReconciler processing a node event before the RuleReconciler + // has reconciled the rule and populated the cache. + readinessController = &RuleReadinessController{ + Client: k8sClient, + Scheme: k8sClient.Scheme(), + clientset: fake.NewSimpleClientset(), + ruleCache: make(map[string]*nodereadinessiov1alpha1.NodeReadinessRule), + EventRecorder: record.NewFakeRecorder(10), + } + + nodeReconciler = &NodeReconciler{ + Client: k8sClient, + Scheme: k8sClient.Scheme(), + Controller: readinessController, + } + + coldStartNode = &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cold-start-node", + Labels: map[string]string{"cold-start-group": "test"}, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + // Condition not yet satisfied — taint should be applied. + {Type: coldConditionType, Status: corev1.ConditionFalse}, + }, + }, + } + + coldStartRule = &nodereadinessiov1alpha1.NodeReadinessRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cold-start-rule", + Finalizers: []string{finalizerName}, + }, + Spec: nodereadinessiov1alpha1.NodeReadinessRuleSpec{ + Conditions: []nodereadinessiov1alpha1.ConditionRequirement{ + {Type: coldConditionType, RequiredStatus: corev1.ConditionTrue}, + }, + Taint: corev1.Taint{ + Key: coldTaintKey, + Effect: corev1.TaintEffectNoSchedule, + }, + NodeSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"cold-start-group": "test"}, + }, + EnforcementMode: nodereadinessiov1alpha1.EnforcementModeBootstrapOnly, + }, + } + + coldStartNodeName = types.NamespacedName{Name: coldStartNode.Name} + + Expect(k8sClient.Create(ctx, coldStartNode)).To(Succeed()) + Expect(k8sClient.Create(ctx, coldStartRule)).To(Succeed()) + }) + + AfterEach(func() { + _ = k8sClient.Delete(ctx, coldStartNode) + + updatedRule := &nodereadinessiov1alpha1.NodeReadinessRule{} + if err := k8sClient.Get(ctx, types.NamespacedName{Name: coldStartRule.Name}, updatedRule); err == nil { + updatedRule.Finalizers = nil + _ = k8sClient.Update(ctx, updatedRule) + _ = k8sClient.Delete(ctx, updatedRule) + } + + Eventually(func() bool { + err := k8sClient.Get(ctx, types.NamespacedName{Name: coldStartRule.Name}, &nodereadinessiov1alpha1.NodeReadinessRule{}) + return apierrors.IsNotFound(err) + }, time.Second*10).Should(BeTrue()) + + readinessController.removeRuleFromCache(ctx, coldStartRule.Name) + }) + + It("should apply bootstrap-only taint even when ruleCache is empty at reconcile time", func() { + By("Verifying the rule cache is empty before reconcile (cold-start simulation)") + readinessController.ruleCacheMutex.RLock() + Expect(readinessController.ruleCache).To(BeEmpty()) + readinessController.ruleCacheMutex.RUnlock() + + By("Triggering NodeReconciler with a cold cache — ensureCacheWarmed must discover the rule") + _, err := nodeReconciler.Reconcile(ctx, reconcile.Request{NamespacedName: coldStartNodeName}) + Expect(err).NotTo(HaveOccurred()) + + By("Verifying the bootstrap-only taint was applied despite the cache being cold at start") + Eventually(func() bool { + updatedNode := &corev1.Node{} + if err := k8sClient.Get(ctx, coldStartNodeName, updatedNode); err != nil { + return false + } + for _, taint := range updatedNode.Spec.Taints { + if taint.Key == coldTaintKey && taint.Effect == corev1.TaintEffectNoSchedule { + return true + } + } + return false + }, time.Second*5).Should(BeTrue(), "bootstrap-only taint must be applied even when the rule cache was empty at the start of reconcile") + + By("Verifying the rule is now present in the cache after warm-up") + Expect(readinessController.getCachedRule(coldStartRule.Name)).NotTo(BeNil()) + }) + + It("should not re-warm the cache on subsequent reconciles", func() { + By("First reconcile warms the cache") + _, err := nodeReconciler.Reconcile(ctx, reconcile.Request{NamespacedName: coldStartNodeName}) + Expect(err).NotTo(HaveOccurred()) + Expect(readinessController.cacheWarmed.Load()).To(BeTrue()) + + By("Removing the rule from the cache to detect an unexpected re-warm") + readinessController.removeRuleFromCache(ctx, coldStartRule.Name) + + By("Second reconcile must NOT re-warm the cache (cacheWarmed is already true)") + _, err = nodeReconciler.Reconcile(ctx, reconcile.Request{NamespacedName: coldStartNodeName}) + Expect(err).NotTo(HaveOccurred()) + + By("Verifying the rule is still absent from cache — warm-up was not repeated") + Expect(readinessController.getCachedRule(coldStartRule.Name)).To(BeNil()) + }) + }) }) diff --git a/internal/controller/nodereadinessrule_controller.go b/internal/controller/nodereadinessrule_controller.go index 6feb083..b2707d1 100644 --- a/internal/controller/nodereadinessrule_controller.go +++ b/internal/controller/nodereadinessrule_controller.go @@ -21,6 +21,7 @@ import ( "fmt" "strings" "sync" + "sync/atomic" "time" "github.com/prometheus/client_golang/prometheus" @@ -58,6 +59,12 @@ type RuleReadinessController struct { // Cache for efficient rule lookup ruleCacheMutex sync.RWMutex ruleCache map[string]*readinessv1alpha1.NodeReadinessRule // ruleName -> rule + + // cacheWarmed is set to true after the first successful pre-population of + // ruleCache from the informer cache. It prevents the startup race where + // NodeReconciler processes node events before RuleReconciler has reconciled + // existing rules and populated the cache. + cacheWarmed atomic.Bool } // RuleReconciler handles NodeReadinessRule reconciliation. @@ -477,6 +484,43 @@ func (r *RuleReadinessController) removeRuleFromCache(ctx context.Context, ruleN log.Info("Removed rule from cache", "rule", ruleName, "totalRules", len(r.ruleCache)) } +// ensureCacheWarmed pre-populates ruleCache from the informer cache on the first +// call. This closes the startup race where NodeReconciler can process node events +// before RuleReconciler has reconciled existing rules and populated the cache. +// For bootstrap-only rules this race is non-recoverable: the taint is never +// applied and the bootstrap annotation is never written if the rule is absent +// from the cache when the node is first evaluated. Subsequent calls are no-ops. +// +// Only rules not already present in the cache are added. Rules that the +// RuleReconciler has already cached (including those marked for deletion) are +// intentionally preserved so that in-flight state is not overwritten. +func (r *RuleReadinessController) ensureCacheWarmed(ctx context.Context) error { + if r.cacheWarmed.Load() { + return nil + } + + log := ctrl.LoggerFrom(ctx) + ruleList := &readinessv1alpha1.NodeReadinessRuleList{} + if err := r.List(ctx, ruleList); err != nil { + return fmt.Errorf("failed to pre-populate rule cache: %w", err) + } + + added := 0 + for i := range ruleList.Items { + r.ruleCacheMutex.RLock() + _, alreadyCached := r.ruleCache[ruleList.Items[i].Name] + r.ruleCacheMutex.RUnlock() + + if !alreadyCached { + r.updateRuleCache(ctx, &ruleList.Items[i]) + added++ + } + } + r.cacheWarmed.Store(true) + log.Info("Rule cache pre-populated on first node reconcile", "ruleCount", added) + return nil +} + // updateRuleStatus updates the status of a NodeReadinessRule. func (r *RuleReadinessController) updateRuleStatus(ctx context.Context, rule *readinessv1alpha1.NodeReadinessRule) error { log := ctrl.LoggerFrom(ctx)