From 9ecc85d32aeb5963ac0a287aceb030d59d73c96a Mon Sep 17 00:00:00 2001 From: mohanakatari119-bit Date: Wed, 6 May 2026 14:03:05 +0530 Subject: [PATCH] fix: increment metrics.Failures counter on node reconciler evaluation error Signed-off-by: mohanakatari119-bit --- internal/controller/node_controller.go | 1 + internal/controller/node_controller_test.go | 76 +++++++++++++++++++++ 2 files changed, 77 insertions(+) diff --git a/internal/controller/node_controller.go b/internal/controller/node_controller.go index fc04d27..1c40ffd 100644 --- a/internal/controller/node_controller.go +++ b/internal/controller/node_controller.go @@ -148,6 +148,7 @@ func (r *RuleReadinessController) processNodeAgainstAllRules(ctx context.Context "node", node.Name, "rule", rule.Name) // Continue with other rules even if one fails r.recordNodeFailure(rule, node.Name, "EvaluationError", err.Error()) + metrics.Failures.WithLabelValues(rule.Name, "EvaluationError").Inc() } // Persist the rule status diff --git a/internal/controller/node_controller_test.go b/internal/controller/node_controller_test.go index fee7160..d3348ca 100644 --- a/internal/controller/node_controller_test.go +++ b/internal/controller/node_controller_test.go @@ -18,11 +18,13 @@ package controller import ( "context" + "fmt" "sync/atomic" "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + dto "github.com/prometheus/client_model/go" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -36,6 +38,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" nodereadinessiov1alpha1 "sigs.k8s.io/node-readiness-controller/api/v1alpha1" + "sigs.k8s.io/node-readiness-controller/internal/metrics" ) var _ = Describe("Node Controller", func() { @@ -937,4 +940,77 @@ var _ = Describe("Node Controller", func() { "Patch should not be called when taint removal is a no-op") }) }) + + Context("metrics.Failures counter in node reconciler", func() { + var ( + ctx context.Context + testScheme *runtime.Scheme + ) + + BeforeEach(func() { + ctx = context.Background() + testScheme = runtime.NewScheme() + Expect(corev1.AddToScheme(testScheme)).To(Succeed()) + Expect(nodereadinessiov1alpha1.AddToScheme(testScheme)).To(Succeed()) + }) + + It("should increment metrics.Failures when evaluateRuleForNode returns an error", func() { + // The node has no taint yet; the rule requires conditions not met, + // so evaluateRuleForNode will call addTaintBySpec. We intercept + // Patch to return an error, forcing the failure path. + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: "metrics-fail-node"}, + } + rule := &nodereadinessiov1alpha1.NodeReadinessRule{ + ObjectMeta: metav1.ObjectMeta{Name: "metrics-fail-rule"}, + Spec: nodereadinessiov1alpha1.NodeReadinessRuleSpec{ + NodeSelector: metav1.LabelSelector{}, + Conditions: []nodereadinessiov1alpha1.ConditionRequirement{ + {Type: "TestCondition", RequiredStatus: corev1.ConditionTrue}, + }, + Taint: corev1.Taint{ + Key: "readiness.k8s.io/test", + Effect: corev1.TaintEffectNoSchedule, + }, + EnforcementMode: nodereadinessiov1alpha1.EnforcementModeContinuous, + }, + } + + fc := fakeclient.NewClientBuilder(). + WithScheme(testScheme). + WithObjects(node, rule). + WithStatusSubresource(rule). + WithInterceptorFuncs(interceptor.Funcs{ + Patch: func(ctx context.Context, c client.WithWatch, obj client.Object, patch client.Patch, opts ...client.PatchOption) error { + if _, ok := obj.(*corev1.Node); ok { + return apierrors.NewInternalError(fmt.Errorf("simulated patch failure")) + } + return c.Patch(ctx, obj, patch, opts...) + }, + }). + Build() + + controller := &RuleReadinessController{ + Client: fc, + Scheme: testScheme, + clientset: fake.NewSimpleClientset(), + ruleCache: map[string]*nodereadinessiov1alpha1.NodeReadinessRule{rule.Name: rule}, + EventRecorder: record.NewFakeRecorder(10), + } + + // Read the failure counter before the call. + beforeM := &dto.Metric{} + _ = metrics.Failures.WithLabelValues(rule.Name, "EvaluationError").Write(beforeM) + before := beforeM.GetCounter().GetValue() + + controller.processNodeAgainstAllRules(ctx, node) + + afterM := &dto.Metric{} + _ = metrics.Failures.WithLabelValues(rule.Name, "EvaluationError").Write(afterM) + after := afterM.GetCounter().GetValue() + + Expect(after).To(BeNumerically(">", before), + "metrics.Failures{rule, EvaluationError} must increment when the node reconciler hits an evaluation error") + }) + }) })