From e3a55d8140fd929afe5d670510a349c1cae337e3 Mon Sep 17 00:00:00 2001 From: elbehery Date: Thu, 21 May 2026 15:54:32 +0200 Subject: [PATCH 1/5] Add kcp_apibinding_phase metric Track the number of APIBindings in each phase (Binding, Bound, or empty for newly created) via a system-wide GaugeVec with a single "phase" label. Phase transitions are tracked in the apibinding controller event handlers using a per-key map protected by a mutex, following the same pattern used for kcp_workspace_count. Signed-off-by: elbehery --- .../apis/apibinding/apibinding_controller.go | 80 ++++++++++- .../apibinding_phase_metrics_test.go | 130 ++++++++++++++++++ pkg/server/metrics/metrics.go | 20 +++ 3 files changed, 225 insertions(+), 5 deletions(-) create mode 100644 pkg/reconciler/apis/apibinding/apibinding_phase_metrics_test.go diff --git a/pkg/reconciler/apis/apibinding/apibinding_controller.go b/pkg/reconciler/apis/apibinding/apibinding_controller.go index 96b0bf2c9cf..6bc0d52925f 100644 --- a/pkg/reconciler/apis/apibinding/apibinding_controller.go +++ b/pkg/reconciler/apis/apibinding/apibinding_controller.go @@ -20,6 +20,7 @@ import ( "context" "encoding/json" "fmt" + "sync" "time" "github.com/go-logr/logr" @@ -59,6 +60,7 @@ import ( "github.com/kcp-dev/kcp/pkg/logging" "github.com/kcp-dev/kcp/pkg/reconciler/committer" "github.com/kcp-dev/kcp/pkg/reconciler/events" + kcpmetrics "github.com/kcp-dev/kcp/pkg/server/metrics" "github.com/kcp-dev/kcp/pkg/tombstone" ) @@ -195,20 +197,28 @@ func NewController( commit: committer.NewSSACommitter[*APIBinding, Patcher, *APIBindingSpec, *APIBindingStatus]( kcpClusterClient.ApisV1alpha2().APIBindings(), ControllerName, - )} + ), + countedAPIBindings: make(map[string]string), + } logger := logging.WithReconciler(klog.Background(), ControllerName) // APIBinding handlers _, _ = apiBindingInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { - c.enqueueAPIBinding(tombstone.Obj[*apisv1alpha2.APIBinding](obj), logger, "") + binding := tombstone.Obj[*apisv1alpha2.APIBinding](obj) + c.enqueueAPIBinding(binding, logger, "") + c.handlePhaseMetricsOnAdd(binding) }, - UpdateFunc: func(_, obj interface{}) { - c.enqueueAPIBinding(tombstone.Obj[*apisv1alpha2.APIBinding](obj), logger, "") + UpdateFunc: func(oldObj, obj interface{}) { + binding := tombstone.Obj[*apisv1alpha2.APIBinding](obj) + c.enqueueAPIBinding(binding, logger, "") + c.handlePhaseMetricsOnUpdate(tombstone.Obj[*apisv1alpha2.APIBinding](oldObj), binding) }, DeleteFunc: func(obj interface{}) { - c.enqueueAPIBinding(tombstone.Obj[*apisv1alpha2.APIBinding](obj), logger, "") + binding := tombstone.Obj[*apisv1alpha2.APIBinding](obj) + c.enqueueAPIBinding(binding, logger, "") + c.handlePhaseMetricsOnDelete(binding) }, }) @@ -353,6 +363,9 @@ type controller struct { deletedCRDTracker *lockedStringSet commit CommitFunc + + mu sync.Mutex + countedAPIBindings map[string]string } // enqueueAPIBinding enqueues an APIBinding . @@ -571,3 +584,60 @@ func InstallIndexers( indexers.APIExportByIdentity: indexers.IndexAPIExportByIdentity, }) } + +func (c *controller) handlePhaseMetricsOnAdd(binding *apisv1alpha2.APIBinding) { + key, err := kcpcache.MetaClusterNamespaceKeyFunc(binding) + if err != nil { + return + } + phase := string(binding.Status.Phase) + + c.mu.Lock() + defer c.mu.Unlock() + + if _, exists := c.countedAPIBindings[key]; !exists { + c.countedAPIBindings[key] = phase + if phase != "" { + kcpmetrics.IncrementAPIBindingPhase(phase) + } + } +} + +func (c *controller) handlePhaseMetricsOnUpdate(oldBinding, newBinding *apisv1alpha2.APIBinding) { + key, err := kcpcache.MetaClusterNamespaceKeyFunc(newBinding) + if err != nil { + return + } + oldPhase := string(oldBinding.Status.Phase) + newPhase := string(newBinding.Status.Phase) + + c.mu.Lock() + defer c.mu.Unlock() + + if oldPhase != newPhase { + if oldPhase != "" { + kcpmetrics.DecrementAPIBindingPhase(oldPhase) + } + if newPhase != "" { + kcpmetrics.IncrementAPIBindingPhase(newPhase) + } + c.countedAPIBindings[key] = newPhase + } +} + +func (c *controller) handlePhaseMetricsOnDelete(binding *apisv1alpha2.APIBinding) { + key, err := kcpcache.MetaClusterNamespaceKeyFunc(binding) + if err != nil { + return + } + + c.mu.Lock() + defer c.mu.Unlock() + + if phase, exists := c.countedAPIBindings[key]; exists { + delete(c.countedAPIBindings, key) + if phase != "" { + kcpmetrics.DecrementAPIBindingPhase(phase) + } + } +} diff --git a/pkg/reconciler/apis/apibinding/apibinding_phase_metrics_test.go b/pkg/reconciler/apis/apibinding/apibinding_phase_metrics_test.go new file mode 100644 index 00000000000..660d4ba930c --- /dev/null +++ b/pkg/reconciler/apis/apibinding/apibinding_phase_metrics_test.go @@ -0,0 +1,130 @@ +/* +Copyright 2026 The kcp Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package apibinding + +import ( + "sync" + "testing" + + "github.com/stretchr/testify/require" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + apisv1alpha2 "github.com/kcp-dev/sdk/apis/apis/v1alpha2" +) + +func newTestController() *controller { + return &controller{ + countedAPIBindings: make(map[string]string), + } +} + +func binding(cluster, name string, phase apisv1alpha2.APIBindingPhaseType) *apisv1alpha2.APIBinding { + return &apisv1alpha2.APIBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Annotations: map[string]string{"kcp.io/cluster": cluster}, + }, + Status: apisv1alpha2.APIBindingStatus{Phase: phase}, + } +} + +func TestHandlePhaseMetricsOnAdd(t *testing.T) { + t.Run("new binding with non-empty phase is tracked", func(t *testing.T) { + c := newTestController() + b := binding("root:ws", "test", apisv1alpha2.APIBindingPhaseBound) + c.handlePhaseMetricsOnAdd(b) + require.Len(t, c.countedAPIBindings, 1) + for _, v := range c.countedAPIBindings { + require.Equal(t, string(apisv1alpha2.APIBindingPhaseBound), v) + } + }) + + t.Run("new binding with empty phase is tracked but metric not emitted", func(t *testing.T) { + c := newTestController() + b := binding("root:ws", "test", "") + c.handlePhaseMetricsOnAdd(b) + require.Len(t, c.countedAPIBindings, 1) + for _, v := range c.countedAPIBindings { + require.Equal(t, "", v) + } + }) + + t.Run("duplicate add is ignored", func(t *testing.T) { + c := newTestController() + b := binding("root:ws", "test", apisv1alpha2.APIBindingPhaseBinding) + c.handlePhaseMetricsOnAdd(b) + c.handlePhaseMetricsOnAdd(b) // second add should be a no-op + require.Len(t, c.countedAPIBindings, 1) + }) +} + +func TestHandlePhaseMetricsOnUpdate(t *testing.T) { + t.Run("phase change updates tracked state", func(t *testing.T) { + c := newTestController() + old := binding("root:ws", "test", apisv1alpha2.APIBindingPhaseBinding) + new := binding("root:ws", "test", apisv1alpha2.APIBindingPhaseBound) + c.handlePhaseMetricsOnAdd(old) + c.handlePhaseMetricsOnUpdate(old, new) + for _, v := range c.countedAPIBindings { + require.Equal(t, string(apisv1alpha2.APIBindingPhaseBound), v) + } + }) + + t.Run("no-op when phase unchanged", func(t *testing.T) { + c := newTestController() + b := binding("root:ws", "test", apisv1alpha2.APIBindingPhaseBound) + c.handlePhaseMetricsOnAdd(b) + c.handlePhaseMetricsOnUpdate(b, b) + for _, v := range c.countedAPIBindings { + require.Equal(t, string(apisv1alpha2.APIBindingPhaseBound), v) + } + }) +} + +func TestHandlePhaseMetricsOnDelete(t *testing.T) { + t.Run("delete removes tracked binding", func(t *testing.T) { + c := newTestController() + b := binding("root:ws", "test", apisv1alpha2.APIBindingPhaseBound) + c.handlePhaseMetricsOnAdd(b) + require.Len(t, c.countedAPIBindings, 1) + c.handlePhaseMetricsOnDelete(b) + require.Empty(t, c.countedAPIBindings) + }) + + t.Run("delete of unknown binding is a no-op", func(t *testing.T) { + c := newTestController() + b := binding("root:ws", "unknown", apisv1alpha2.APIBindingPhaseBound) + require.NotPanics(t, func() { c.handlePhaseMetricsOnDelete(b) }) + require.Empty(t, c.countedAPIBindings) + }) +} + +func TestHandlePhaseMetricsConcurrency(t *testing.T) { + c := newTestController() + var wg sync.WaitGroup + for i := range 50 { + wg.Add(1) + go func(i int) { + defer wg.Done() + name := "binding-" + string(rune('a'+i%26)) + b := binding("root:ws", name, apisv1alpha2.APIBindingPhaseBinding) + c.handlePhaseMetricsOnAdd(b) + }(i) + } + wg.Wait() +} diff --git a/pkg/server/metrics/metrics.go b/pkg/server/metrics/metrics.go index e680a328e99..9a3a06dd711 100644 --- a/pkg/server/metrics/metrics.go +++ b/pkg/server/metrics/metrics.go @@ -39,11 +39,21 @@ var ( }, []string{"shard", "phase"}, ) + + apiBindingPhase = metrics.NewGaugeVec( + &metrics.GaugeOpts{ + Name: "kcp_apibinding_phase", + Help: "Number of APIBindings in each phase (Binding, Bound, or empty for newly created).", + StabilityLevel: metrics.ALPHA, + }, + []string{"phase"}, + ) ) func init() { legacyregistry.MustRegister(logicalClusterCount) legacyregistry.MustRegister(workspaceCount) + legacyregistry.MustRegister(apiBindingPhase) } // IncrementLogicalClusterCount increments the count for the given shard and phase. @@ -65,3 +75,13 @@ func IncrementWorkspaceCount(shardName string, phase string) { func DecrementWorkspaceCount(shardName string, phase string) { workspaceCount.WithLabelValues(shardName, phase).Dec() } + +// IncrementAPIBindingPhase increments the gauge for the given APIBinding phase. +func IncrementAPIBindingPhase(phase string) { + apiBindingPhase.WithLabelValues(phase).Inc() +} + +// DecrementAPIBindingPhase decrements the gauge for the given APIBinding phase. +func DecrementAPIBindingPhase(phase string) { + apiBindingPhase.WithLabelValues(phase).Dec() +} From c95331cfdd3527b94e859111b7f44384daf48198 Mon Sep 17 00:00:00 2001 From: elbehery Date: Thu, 21 May 2026 15:58:16 +0200 Subject: [PATCH 2/5] Add kcp_apibinding_condition_status metric Track the number of APIBindings with each condition type and status (True, False, Unknown) via a GaugeVec with "condition" and "status" labels. Per-binding condition snapshots are maintained in the controller and diffed on update to keep the gauges accurate without cardinality explosion from per-resource labels. Signed-off-by: elbehery --- .../apibinding_condition_metrics_test.go | 150 ++++++++++++++++++ .../apis/apibinding/apibinding_controller.go | 87 +++++++++- .../apibinding_phase_metrics_test.go | 3 +- pkg/server/metrics/metrics.go | 20 +++ 4 files changed, 257 insertions(+), 3 deletions(-) create mode 100644 pkg/reconciler/apis/apibinding/apibinding_condition_metrics_test.go diff --git a/pkg/reconciler/apis/apibinding/apibinding_condition_metrics_test.go b/pkg/reconciler/apis/apibinding/apibinding_condition_metrics_test.go new file mode 100644 index 00000000000..f19a1ca1c05 --- /dev/null +++ b/pkg/reconciler/apis/apibinding/apibinding_condition_metrics_test.go @@ -0,0 +1,150 @@ +/* +Copyright 2026 The kcp Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package apibinding + +import ( + "testing" + + "github.com/stretchr/testify/require" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + apisv1alpha2 "github.com/kcp-dev/sdk/apis/apis/v1alpha2" + conditionsv1alpha1 "github.com/kcp-dev/sdk/apis/third_party/conditions/apis/conditions/v1alpha1" +) + +func bindingWithConditions(cluster, name string, conds ...conditionsv1alpha1.Condition) *apisv1alpha2.APIBinding { + return &apisv1alpha2.APIBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Annotations: map[string]string{"kcp.io/cluster": cluster}, + }, + Status: apisv1alpha2.APIBindingStatus{Conditions: conds}, + } +} + +func cond(condType conditionsv1alpha1.ConditionType, status corev1.ConditionStatus) conditionsv1alpha1.Condition { + return conditionsv1alpha1.Condition{Type: condType, Status: status} +} + +func TestHandleConditionMetricsOnAdd(t *testing.T) { + t.Run("conditions are tracked on add", func(t *testing.T) { + c := newTestController() + b := bindingWithConditions("root:ws", "test", + cond(apisv1alpha2.APIExportValid, corev1.ConditionTrue), + cond(apisv1alpha2.InitialBindingCompleted, corev1.ConditionFalse), + ) + c.handleConditionMetricsOnAdd(b) + require.Len(t, c.countedAPIBindingConditions, 1) + for _, snapshot := range c.countedAPIBindingConditions { + require.Equal(t, string(corev1.ConditionTrue), snapshot[string(apisv1alpha2.APIExportValid)]) + require.Equal(t, string(corev1.ConditionFalse), snapshot[string(apisv1alpha2.InitialBindingCompleted)]) + } + }) + + t.Run("binding with no conditions stores empty snapshot", func(t *testing.T) { + c := newTestController() + b := bindingWithConditions("root:ws", "test") + c.handleConditionMetricsOnAdd(b) + require.Len(t, c.countedAPIBindingConditions, 1) + for _, snapshot := range c.countedAPIBindingConditions { + require.Empty(t, snapshot) + } + }) + + t.Run("duplicate add is a no-op", func(t *testing.T) { + c := newTestController() + b := bindingWithConditions("root:ws", "test", + cond(apisv1alpha2.APIExportValid, corev1.ConditionTrue), + ) + c.handleConditionMetricsOnAdd(b) + c.handleConditionMetricsOnAdd(b) + require.Len(t, c.countedAPIBindingConditions, 1) + }) +} + +func TestHandleConditionMetricsOnUpdate(t *testing.T) { + t.Run("condition status change is tracked", func(t *testing.T) { + c := newTestController() + old := bindingWithConditions("root:ws", "test", + cond(apisv1alpha2.APIExportValid, corev1.ConditionFalse), + ) + new := bindingWithConditions("root:ws", "test", + cond(apisv1alpha2.APIExportValid, corev1.ConditionTrue), + ) + c.handleConditionMetricsOnAdd(old) + c.handleConditionMetricsOnUpdate(old, new) + for _, snapshot := range c.countedAPIBindingConditions { + require.Equal(t, string(corev1.ConditionTrue), snapshot[string(apisv1alpha2.APIExportValid)]) + } + }) + + t.Run("new condition added on update", func(t *testing.T) { + c := newTestController() + old := bindingWithConditions("root:ws", "test", + cond(apisv1alpha2.APIExportValid, corev1.ConditionTrue), + ) + new := bindingWithConditions("root:ws", "test", + cond(apisv1alpha2.APIExportValid, corev1.ConditionTrue), + cond(apisv1alpha2.InitialBindingCompleted, corev1.ConditionTrue), + ) + c.handleConditionMetricsOnAdd(old) + c.handleConditionMetricsOnUpdate(old, new) + for _, snapshot := range c.countedAPIBindingConditions { + require.Len(t, snapshot, 2) + } + }) + + t.Run("removed condition is cleaned up on update", func(t *testing.T) { + c := newTestController() + old := bindingWithConditions("root:ws", "test", + cond(apisv1alpha2.APIExportValid, corev1.ConditionTrue), + cond(apisv1alpha2.InitialBindingCompleted, corev1.ConditionTrue), + ) + new := bindingWithConditions("root:ws", "test", + cond(apisv1alpha2.APIExportValid, corev1.ConditionTrue), + ) + c.handleConditionMetricsOnAdd(old) + c.handleConditionMetricsOnUpdate(old, new) + for _, snapshot := range c.countedAPIBindingConditions { + require.Len(t, snapshot, 1) + require.NotContains(t, snapshot, string(apisv1alpha2.InitialBindingCompleted)) + } + }) +} + +func TestHandleConditionMetricsOnDelete(t *testing.T) { + t.Run("delete removes all condition tracking", func(t *testing.T) { + c := newTestController() + b := bindingWithConditions("root:ws", "test", + cond(apisv1alpha2.APIExportValid, corev1.ConditionTrue), + ) + c.handleConditionMetricsOnAdd(b) + require.Len(t, c.countedAPIBindingConditions, 1) + c.handleConditionMetricsOnDelete(b) + require.Empty(t, c.countedAPIBindingConditions) + }) + + t.Run("delete of unknown binding is a no-op", func(t *testing.T) { + c := newTestController() + b := bindingWithConditions("root:ws", "unknown", + cond(apisv1alpha2.APIExportValid, corev1.ConditionTrue), + ) + require.NotPanics(t, func() { c.handleConditionMetricsOnDelete(b) }) + }) +} diff --git a/pkg/reconciler/apis/apibinding/apibinding_controller.go b/pkg/reconciler/apis/apibinding/apibinding_controller.go index 6bc0d52925f..7fa06afa602 100644 --- a/pkg/reconciler/apis/apibinding/apibinding_controller.go +++ b/pkg/reconciler/apis/apibinding/apibinding_controller.go @@ -198,7 +198,8 @@ func NewController( kcpClusterClient.ApisV1alpha2().APIBindings(), ControllerName, ), - countedAPIBindings: make(map[string]string), + countedAPIBindings: make(map[string]string), + countedAPIBindingConditions: make(map[string]map[string]string), } logger := logging.WithReconciler(klog.Background(), ControllerName) @@ -209,16 +210,20 @@ func NewController( binding := tombstone.Obj[*apisv1alpha2.APIBinding](obj) c.enqueueAPIBinding(binding, logger, "") c.handlePhaseMetricsOnAdd(binding) + c.handleConditionMetricsOnAdd(binding) }, UpdateFunc: func(oldObj, obj interface{}) { binding := tombstone.Obj[*apisv1alpha2.APIBinding](obj) + old := tombstone.Obj[*apisv1alpha2.APIBinding](oldObj) c.enqueueAPIBinding(binding, logger, "") - c.handlePhaseMetricsOnUpdate(tombstone.Obj[*apisv1alpha2.APIBinding](oldObj), binding) + c.handlePhaseMetricsOnUpdate(old, binding) + c.handleConditionMetricsOnUpdate(old, binding) }, DeleteFunc: func(obj interface{}) { binding := tombstone.Obj[*apisv1alpha2.APIBinding](obj) c.enqueueAPIBinding(binding, logger, "") c.handlePhaseMetricsOnDelete(binding) + c.handleConditionMetricsOnDelete(binding) }, }) @@ -366,6 +371,8 @@ type controller struct { mu sync.Mutex countedAPIBindings map[string]string + // countedAPIBindingConditions maps binding key -> (conditionType -> status) + countedAPIBindingConditions map[string]map[string]string } // enqueueAPIBinding enqueues an APIBinding . @@ -641,3 +648,79 @@ func (c *controller) handlePhaseMetricsOnDelete(binding *apisv1alpha2.APIBinding } } } + +func (c *controller) handleConditionMetricsOnAdd(binding *apisv1alpha2.APIBinding) { + key, err := kcpcache.MetaClusterNamespaceKeyFunc(binding) + if err != nil { + return + } + + snapshot := make(map[string]string, len(binding.Status.Conditions)) + for _, cond := range binding.Status.Conditions { + snapshot[string(cond.Type)] = string(cond.Status) + } + + c.mu.Lock() + defer c.mu.Unlock() + + if _, exists := c.countedAPIBindingConditions[key]; exists { + return + } + c.countedAPIBindingConditions[key] = snapshot + for condType, status := range snapshot { + kcpmetrics.IncrementAPIBindingConditionStatus(condType, status) + } +} + +func (c *controller) handleConditionMetricsOnUpdate(oldBinding, newBinding *apisv1alpha2.APIBinding) { + key, err := kcpcache.MetaClusterNamespaceKeyFunc(newBinding) + if err != nil { + return + } + + newSnapshot := make(map[string]string, len(newBinding.Status.Conditions)) + for _, cond := range newBinding.Status.Conditions { + newSnapshot[string(cond.Type)] = string(cond.Status) + } + + c.mu.Lock() + defer c.mu.Unlock() + + oldSnapshot := c.countedAPIBindingConditions[key] + + // Decrement removed or changed conditions. + for condType, oldStatus := range oldSnapshot { + newStatus, exists := newSnapshot[condType] + if !exists || newStatus != oldStatus { + kcpmetrics.DecrementAPIBindingConditionStatus(condType, oldStatus) + } + } + // Increment new or changed conditions. + for condType, newStatus := range newSnapshot { + oldStatus, exists := oldSnapshot[condType] + if !exists || oldStatus != newStatus { + kcpmetrics.IncrementAPIBindingConditionStatus(condType, newStatus) + } + } + + c.countedAPIBindingConditions[key] = newSnapshot +} + +func (c *controller) handleConditionMetricsOnDelete(binding *apisv1alpha2.APIBinding) { + key, err := kcpcache.MetaClusterNamespaceKeyFunc(binding) + if err != nil { + return + } + + c.mu.Lock() + defer c.mu.Unlock() + + snapshot, exists := c.countedAPIBindingConditions[key] + if !exists { + return + } + for condType, status := range snapshot { + kcpmetrics.DecrementAPIBindingConditionStatus(condType, status) + } + delete(c.countedAPIBindingConditions, key) +} diff --git a/pkg/reconciler/apis/apibinding/apibinding_phase_metrics_test.go b/pkg/reconciler/apis/apibinding/apibinding_phase_metrics_test.go index 660d4ba930c..50eedccba8d 100644 --- a/pkg/reconciler/apis/apibinding/apibinding_phase_metrics_test.go +++ b/pkg/reconciler/apis/apibinding/apibinding_phase_metrics_test.go @@ -29,7 +29,8 @@ import ( func newTestController() *controller { return &controller{ - countedAPIBindings: make(map[string]string), + countedAPIBindings: make(map[string]string), + countedAPIBindingConditions: make(map[string]map[string]string), } } diff --git a/pkg/server/metrics/metrics.go b/pkg/server/metrics/metrics.go index 9a3a06dd711..1b4c1b89a0e 100644 --- a/pkg/server/metrics/metrics.go +++ b/pkg/server/metrics/metrics.go @@ -48,12 +48,22 @@ var ( }, []string{"phase"}, ) + + apiBindingConditionStatus = metrics.NewGaugeVec( + &metrics.GaugeOpts{ + Name: "kcp_apibinding_condition_status", + Help: "Number of APIBindings with each condition type and status (True, False, Unknown).", + StabilityLevel: metrics.ALPHA, + }, + []string{"condition", "status"}, + ) ) func init() { legacyregistry.MustRegister(logicalClusterCount) legacyregistry.MustRegister(workspaceCount) legacyregistry.MustRegister(apiBindingPhase) + legacyregistry.MustRegister(apiBindingConditionStatus) } // IncrementLogicalClusterCount increments the count for the given shard and phase. @@ -85,3 +95,13 @@ func IncrementAPIBindingPhase(phase string) { func DecrementAPIBindingPhase(phase string) { apiBindingPhase.WithLabelValues(phase).Dec() } + +// IncrementAPIBindingConditionStatus increments the gauge for the given condition type and status. +func IncrementAPIBindingConditionStatus(conditionType, status string) { + apiBindingConditionStatus.WithLabelValues(conditionType, status).Inc() +} + +// DecrementAPIBindingConditionStatus decrements the gauge for the given condition type and status. +func DecrementAPIBindingConditionStatus(conditionType, status string) { + apiBindingConditionStatus.WithLabelValues(conditionType, status).Dec() +} From c36538696539d6a775d97fc4511bab4c423852c1 Mon Sep 17 00:00:00 2001 From: elbehery Date: Thu, 21 May 2026 15:59:46 +0200 Subject: [PATCH 3/5] Add kcp_apibinding_ready_duration_ms metric Record the time in milliseconds from APIBinding creation to reaching the Bound phase as a histogram. The observation is taken once in the phase update handler on the Bound transition, so each binding contributes exactly one sample. Buckets cover the expected range from sub-second to 5 minutes. Signed-off-by: elbehery --- .../apis/apibinding/apibinding_controller.go | 3 + .../apibinding_ready_duration_metrics_test.go | 67 +++++++++++++++++++ pkg/server/metrics/metrics.go | 17 +++++ 3 files changed, 87 insertions(+) create mode 100644 pkg/reconciler/apis/apibinding/apibinding_ready_duration_metrics_test.go diff --git a/pkg/reconciler/apis/apibinding/apibinding_controller.go b/pkg/reconciler/apis/apibinding/apibinding_controller.go index 7fa06afa602..c295907ebec 100644 --- a/pkg/reconciler/apis/apibinding/apibinding_controller.go +++ b/pkg/reconciler/apis/apibinding/apibinding_controller.go @@ -628,6 +628,9 @@ func (c *controller) handlePhaseMetricsOnUpdate(oldBinding, newBinding *apisv1al if newPhase != "" { kcpmetrics.IncrementAPIBindingPhase(newPhase) } + if newPhase == string(apisv1alpha2.APIBindingPhaseBound) { + kcpmetrics.ObserveAPIBindingReadyDuration(newBinding.CreationTimestamp.Time) + } c.countedAPIBindings[key] = newPhase } } diff --git a/pkg/reconciler/apis/apibinding/apibinding_ready_duration_metrics_test.go b/pkg/reconciler/apis/apibinding/apibinding_ready_duration_metrics_test.go new file mode 100644 index 00000000000..34ba1ba875f --- /dev/null +++ b/pkg/reconciler/apis/apibinding/apibinding_ready_duration_metrics_test.go @@ -0,0 +1,67 @@ +/* +Copyright 2026 The kcp Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package apibinding + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + apisv1alpha2 "github.com/kcp-dev/sdk/apis/apis/v1alpha2" +) + +func bindingWithPhaseAndCreation(cluster, name string, phase apisv1alpha2.APIBindingPhaseType, created time.Time) *apisv1alpha2.APIBinding { + return &apisv1alpha2.APIBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Annotations: map[string]string{"kcp.io/cluster": cluster}, + CreationTimestamp: metav1.Time{Time: created}, + }, + Status: apisv1alpha2.APIBindingStatus{Phase: phase}, + } +} + +func TestHandleReadyDurationOnUpdate(t *testing.T) { + t.Run("transitioning to Bound records duration without panic", func(t *testing.T) { + c := newTestController() + created := time.Now().Add(-5 * time.Second) + old := bindingWithPhaseAndCreation("root:ws", "test", apisv1alpha2.APIBindingPhaseBinding, created) + new := bindingWithPhaseAndCreation("root:ws", "test", apisv1alpha2.APIBindingPhaseBound, created) + c.handlePhaseMetricsOnAdd(old) + require.NotPanics(t, func() { c.handlePhaseMetricsOnUpdate(old, new) }) + }) + + t.Run("transitioning to Binding does not record duration", func(t *testing.T) { + c := newTestController() + created := time.Now().Add(-2 * time.Second) + old := bindingWithPhaseAndCreation("root:ws", "test", "", created) + new := bindingWithPhaseAndCreation("root:ws", "test", apisv1alpha2.APIBindingPhaseBinding, created) + c.handlePhaseMetricsOnAdd(old) + require.NotPanics(t, func() { c.handlePhaseMetricsOnUpdate(old, new) }) + }) + + t.Run("same phase does not record duration", func(t *testing.T) { + c := newTestController() + created := time.Now().Add(-1 * time.Second) + b := bindingWithPhaseAndCreation("root:ws", "test", apisv1alpha2.APIBindingPhaseBound, created) + c.handlePhaseMetricsOnAdd(b) + require.NotPanics(t, func() { c.handlePhaseMetricsOnUpdate(b, b) }) + }) +} diff --git a/pkg/server/metrics/metrics.go b/pkg/server/metrics/metrics.go index 1b4c1b89a0e..183d155aa77 100644 --- a/pkg/server/metrics/metrics.go +++ b/pkg/server/metrics/metrics.go @@ -17,6 +17,8 @@ limitations under the License. package metrics import ( + "time" + "k8s.io/component-base/metrics" "k8s.io/component-base/metrics/legacyregistry" ) @@ -57,6 +59,15 @@ var ( }, []string{"condition", "status"}, ) + + apiBindingReadyDurationMs = metrics.NewHistogram( + &metrics.HistogramOpts{ + Name: "kcp_apibinding_ready_duration_ms", + Help: "Duration in milliseconds from APIBinding creation to reaching the Bound phase.", + StabilityLevel: metrics.ALPHA, + Buckets: []float64{100, 500, 1000, 2500, 5000, 10000, 30000, 60000, 120000, 300000}, + }, + ) ) func init() { @@ -64,6 +75,7 @@ func init() { legacyregistry.MustRegister(workspaceCount) legacyregistry.MustRegister(apiBindingPhase) legacyregistry.MustRegister(apiBindingConditionStatus) + legacyregistry.MustRegister(apiBindingReadyDurationMs) } // IncrementLogicalClusterCount increments the count for the given shard and phase. @@ -105,3 +117,8 @@ func IncrementAPIBindingConditionStatus(conditionType, status string) { func DecrementAPIBindingConditionStatus(conditionType, status string) { apiBindingConditionStatus.WithLabelValues(conditionType, status).Dec() } + +// ObserveAPIBindingReadyDuration records the duration from creation to Bound phase. +func ObserveAPIBindingReadyDuration(creationTime time.Time) { + apiBindingReadyDurationMs.Observe(float64(time.Since(creationTime).Milliseconds())) +} From 03d58534c01fcecef89363644eb2a037685f127b Mon Sep 17 00:00:00 2001 From: elbehery Date: Thu, 21 May 2026 16:05:41 +0200 Subject: [PATCH 4/5] Add kcp_apiexport_condition_status metric Track the number of APIExports with each condition type and status (True, False, Unknown) via a GaugeVec with "condition" and "status" labels. Per-export condition snapshots are maintained in the apiexport controller and diffed on update to keep the gauges accurate, mirroring the approach used for kcp_apibinding_condition_status. Signed-off-by: elbehery --- .../apiexport_condition_metrics_test.go | 174 ++++++++++++++++++ .../apis/apiexport/apiexport_controller.go | 95 +++++++++- pkg/server/metrics/metrics.go | 20 ++ 3 files changed, 285 insertions(+), 4 deletions(-) create mode 100644 pkg/reconciler/apis/apiexport/apiexport_condition_metrics_test.go diff --git a/pkg/reconciler/apis/apiexport/apiexport_condition_metrics_test.go b/pkg/reconciler/apis/apiexport/apiexport_condition_metrics_test.go new file mode 100644 index 00000000000..6e010bceed2 --- /dev/null +++ b/pkg/reconciler/apis/apiexport/apiexport_condition_metrics_test.go @@ -0,0 +1,174 @@ +/* +Copyright 2026 The kcp Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package apiexport + +import ( + "sync" + "testing" + + "github.com/stretchr/testify/require" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + apisv1alpha2 "github.com/kcp-dev/sdk/apis/apis/v1alpha2" + conditionsv1alpha1 "github.com/kcp-dev/sdk/apis/third_party/conditions/apis/conditions/v1alpha1" +) + +func newTestController() *controller { + return &controller{ + countedAPIExportConditions: make(map[string]map[string]string), + } +} + +func exportWithConditions(cluster, name string, conds ...conditionsv1alpha1.Condition) *apisv1alpha2.APIExport { + return &apisv1alpha2.APIExport{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Annotations: map[string]string{"kcp.io/cluster": cluster}, + }, + Status: apisv1alpha2.APIExportStatus{Conditions: conds}, + } +} + +func cond(condType conditionsv1alpha1.ConditionType, status corev1.ConditionStatus) conditionsv1alpha1.Condition { + return conditionsv1alpha1.Condition{Type: condType, Status: status} +} + +func TestHandleConditionMetricsOnAdd(t *testing.T) { + t.Run("conditions are tracked on add", func(t *testing.T) { + c := newTestController() + e := exportWithConditions("root:ws", "test", + cond(apisv1alpha2.APIExportIdentityValid, corev1.ConditionTrue), + cond(apisv1alpha2.APIExportVirtualWorkspaceURLsReady, corev1.ConditionFalse), + ) + c.handleConditionMetricsOnAdd(e) + require.Len(t, c.countedAPIExportConditions, 1) + for _, snapshot := range c.countedAPIExportConditions { + require.Equal(t, string(corev1.ConditionTrue), snapshot[string(apisv1alpha2.APIExportIdentityValid)]) + require.Equal(t, string(corev1.ConditionFalse), snapshot[string(apisv1alpha2.APIExportVirtualWorkspaceURLsReady)]) + } + }) + + t.Run("export with no conditions stores empty snapshot", func(t *testing.T) { + c := newTestController() + e := exportWithConditions("root:ws", "test") + c.handleConditionMetricsOnAdd(e) + require.Len(t, c.countedAPIExportConditions, 1) + for _, snapshot := range c.countedAPIExportConditions { + require.Empty(t, snapshot) + } + }) + + t.Run("duplicate add is a no-op", func(t *testing.T) { + c := newTestController() + e := exportWithConditions("root:ws", "test", + cond(apisv1alpha2.APIExportIdentityValid, corev1.ConditionTrue), + ) + c.handleConditionMetricsOnAdd(e) + c.handleConditionMetricsOnAdd(e) + require.Len(t, c.countedAPIExportConditions, 1) + }) +} + +func TestHandleConditionMetricsOnUpdate(t *testing.T) { + t.Run("condition status change is tracked", func(t *testing.T) { + c := newTestController() + old := exportWithConditions("root:ws", "test", + cond(apisv1alpha2.APIExportIdentityValid, corev1.ConditionFalse), + ) + new := exportWithConditions("root:ws", "test", + cond(apisv1alpha2.APIExportIdentityValid, corev1.ConditionTrue), + ) + c.handleConditionMetricsOnAdd(old) + c.handleConditionMetricsOnUpdate(old, new) + for _, snapshot := range c.countedAPIExportConditions { + require.Equal(t, string(corev1.ConditionTrue), snapshot[string(apisv1alpha2.APIExportIdentityValid)]) + } + }) + + t.Run("new condition added on update", func(t *testing.T) { + c := newTestController() + old := exportWithConditions("root:ws", "test", + cond(apisv1alpha2.APIExportIdentityValid, corev1.ConditionTrue), + ) + new := exportWithConditions("root:ws", "test", + cond(apisv1alpha2.APIExportIdentityValid, corev1.ConditionTrue), + cond(apisv1alpha2.APIExportVirtualWorkspaceURLsReady, corev1.ConditionTrue), + ) + c.handleConditionMetricsOnAdd(old) + c.handleConditionMetricsOnUpdate(old, new) + for _, snapshot := range c.countedAPIExportConditions { + require.Len(t, snapshot, 2) + } + }) + + t.Run("removed condition is cleaned up on update", func(t *testing.T) { + c := newTestController() + old := exportWithConditions("root:ws", "test", + cond(apisv1alpha2.APIExportIdentityValid, corev1.ConditionTrue), + cond(apisv1alpha2.APIExportVirtualWorkspaceURLsReady, corev1.ConditionTrue), + ) + new := exportWithConditions("root:ws", "test", + cond(apisv1alpha2.APIExportIdentityValid, corev1.ConditionTrue), + ) + c.handleConditionMetricsOnAdd(old) + c.handleConditionMetricsOnUpdate(old, new) + for _, snapshot := range c.countedAPIExportConditions { + require.Len(t, snapshot, 1) + require.NotContains(t, snapshot, string(apisv1alpha2.APIExportVirtualWorkspaceURLsReady)) + } + }) +} + +func TestHandleConditionMetricsOnDelete(t *testing.T) { + t.Run("delete removes all condition tracking", func(t *testing.T) { + c := newTestController() + e := exportWithConditions("root:ws", "test", + cond(apisv1alpha2.APIExportIdentityValid, corev1.ConditionTrue), + ) + c.handleConditionMetricsOnAdd(e) + require.Len(t, c.countedAPIExportConditions, 1) + c.handleConditionMetricsOnDelete(e) + require.Empty(t, c.countedAPIExportConditions) + }) + + t.Run("delete of unknown export is a no-op", func(t *testing.T) { + c := newTestController() + e := exportWithConditions("root:ws", "unknown", + cond(apisv1alpha2.APIExportIdentityValid, corev1.ConditionTrue), + ) + require.NotPanics(t, func() { c.handleConditionMetricsOnDelete(e) }) + }) +} + +func TestHandleConditionMetricsConcurrency(t *testing.T) { + c := newTestController() + var wg sync.WaitGroup + for i := range 50 { + wg.Add(1) + go func(i int) { + defer wg.Done() + name := "export-" + string(rune('a'+i%26)) + e := exportWithConditions("root:ws", name, + cond(apisv1alpha2.APIExportIdentityValid, corev1.ConditionTrue), + ) + c.handleConditionMetricsOnAdd(e) + }(i) + } + wg.Wait() +} diff --git a/pkg/reconciler/apis/apiexport/apiexport_controller.go b/pkg/reconciler/apis/apiexport/apiexport_controller.go index 663834e001f..edcca3c4799 100644 --- a/pkg/reconciler/apis/apiexport/apiexport_controller.go +++ b/pkg/reconciler/apis/apiexport/apiexport_controller.go @@ -19,6 +19,7 @@ package apiexport import ( "context" "fmt" + "sync" "time" corev1 "k8s.io/api/core/v1" @@ -49,6 +50,7 @@ import ( "github.com/kcp-dev/kcp/pkg/logging" "github.com/kcp-dev/kcp/pkg/reconciler/committer" "github.com/kcp-dev/kcp/pkg/reconciler/events" + kcpmetrics "github.com/kcp-dev/kcp/pkg/server/metrics" ) const ( @@ -133,17 +135,25 @@ func NewController( }, commit: committer.NewCommitter[*APIExport, Patcher, *APIExportSpec, *APIExportStatus](kcpClusterClient.ApisV1alpha2().APIExports()), + + countedAPIExportConditions: make(map[string]map[string]string), } _, _ = apiExportInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { - c.enqueueAPIExport(obj.(*apisv1alpha2.APIExport)) + export := obj.(*apisv1alpha2.APIExport) + c.enqueueAPIExport(export) + c.handleConditionMetricsOnAdd(export) }, - UpdateFunc: func(_, newObj interface{}) { - c.enqueueAPIExport(newObj.(*apisv1alpha2.APIExport)) + UpdateFunc: func(oldObj, newObj interface{}) { + export := newObj.(*apisv1alpha2.APIExport) + c.enqueueAPIExport(export) + c.handleConditionMetricsOnUpdate(oldObj.(*apisv1alpha2.APIExport), export) }, DeleteFunc: func(obj interface{}) { - c.enqueueAPIExport(obj.(*apisv1alpha2.APIExport)) + export := obj.(*apisv1alpha2.APIExport) + c.enqueueAPIExport(export) + c.handleConditionMetricsOnDelete(export) }, }) @@ -214,6 +224,9 @@ type controller struct { listShards func() ([]*corev1alpha1.Shard, error) commit CommitFunc + + mu sync.Mutex + countedAPIExportConditions map[string]map[string]string } // enqueueAPIExport enqueues an APIExport. @@ -381,3 +394,77 @@ func InstallIndexers(apiExportInformer apisv1alpha2informers.APIExportClusterInf }, ) } + +func (c *controller) handleConditionMetricsOnAdd(export *apisv1alpha2.APIExport) { + key, err := kcpcache.MetaClusterNamespaceKeyFunc(export) + if err != nil { + return + } + + snapshot := make(map[string]string, len(export.Status.Conditions)) + for _, cond := range export.Status.Conditions { + snapshot[string(cond.Type)] = string(cond.Status) + } + + c.mu.Lock() + defer c.mu.Unlock() + + if _, exists := c.countedAPIExportConditions[key]; exists { + return + } + c.countedAPIExportConditions[key] = snapshot + for condType, status := range snapshot { + kcpmetrics.IncrementAPIExportConditionStatus(condType, status) + } +} + +func (c *controller) handleConditionMetricsOnUpdate(oldExport, newExport *apisv1alpha2.APIExport) { + key, err := kcpcache.MetaClusterNamespaceKeyFunc(newExport) + if err != nil { + return + } + + newSnapshot := make(map[string]string, len(newExport.Status.Conditions)) + for _, cond := range newExport.Status.Conditions { + newSnapshot[string(cond.Type)] = string(cond.Status) + } + + c.mu.Lock() + defer c.mu.Unlock() + + oldSnapshot := c.countedAPIExportConditions[key] + + for condType, oldStatus := range oldSnapshot { + newStatus, exists := newSnapshot[condType] + if !exists || newStatus != oldStatus { + kcpmetrics.DecrementAPIExportConditionStatus(condType, oldStatus) + } + } + for condType, newStatus := range newSnapshot { + oldStatus, exists := oldSnapshot[condType] + if !exists || oldStatus != newStatus { + kcpmetrics.IncrementAPIExportConditionStatus(condType, newStatus) + } + } + + c.countedAPIExportConditions[key] = newSnapshot +} + +func (c *controller) handleConditionMetricsOnDelete(export *apisv1alpha2.APIExport) { + key, err := kcpcache.MetaClusterNamespaceKeyFunc(export) + if err != nil { + return + } + + c.mu.Lock() + defer c.mu.Unlock() + + snapshot, exists := c.countedAPIExportConditions[key] + if !exists { + return + } + for condType, status := range snapshot { + kcpmetrics.DecrementAPIExportConditionStatus(condType, status) + } + delete(c.countedAPIExportConditions, key) +} diff --git a/pkg/server/metrics/metrics.go b/pkg/server/metrics/metrics.go index 183d155aa77..0b04ab41627 100644 --- a/pkg/server/metrics/metrics.go +++ b/pkg/server/metrics/metrics.go @@ -68,6 +68,15 @@ var ( Buckets: []float64{100, 500, 1000, 2500, 5000, 10000, 30000, 60000, 120000, 300000}, }, ) + + apiExportConditionStatus = metrics.NewGaugeVec( + &metrics.GaugeOpts{ + Name: "kcp_apiexport_condition_status", + Help: "Number of APIExports with each condition type and status (True, False, Unknown).", + StabilityLevel: metrics.ALPHA, + }, + []string{"condition", "status"}, + ) ) func init() { @@ -76,6 +85,7 @@ func init() { legacyregistry.MustRegister(apiBindingPhase) legacyregistry.MustRegister(apiBindingConditionStatus) legacyregistry.MustRegister(apiBindingReadyDurationMs) + legacyregistry.MustRegister(apiExportConditionStatus) } // IncrementLogicalClusterCount increments the count for the given shard and phase. @@ -122,3 +132,13 @@ func DecrementAPIBindingConditionStatus(conditionType, status string) { func ObserveAPIBindingReadyDuration(creationTime time.Time) { apiBindingReadyDurationMs.Observe(float64(time.Since(creationTime).Milliseconds())) } + +// IncrementAPIExportConditionStatus increments the gauge for the given APIExport condition type and status. +func IncrementAPIExportConditionStatus(conditionType, status string) { + apiExportConditionStatus.WithLabelValues(conditionType, status).Inc() +} + +// DecrementAPIExportConditionStatus decrements the gauge for the given APIExport condition type and status. +func DecrementAPIExportConditionStatus(conditionType, status string) { + apiExportConditionStatus.WithLabelValues(conditionType, status).Dec() +} From 01b37f4a8a3e35101ba44c096e2e415bb7261b4c Mon Sep 17 00:00:00 2001 From: elbehery Date: Thu, 21 May 2026 16:36:09 +0200 Subject: [PATCH 5/5] Add kcp_apiexport_ready_duration_ms metric Record the time in milliseconds from APIExport creation to becoming fully operational (IdentityValid=True and VirtualWorkspaceURLsReady=True both set) as a histogram. Each export contributes exactly one sample, tracked via a per-key set that is cleaned up on delete. Buckets cover the same range as the apibinding counterpart. Signed-off-by: elbehery --- .../apiexport_condition_metrics_test.go | 1 + .../apis/apiexport/apiexport_controller.go | 49 +++++++ .../apiexport_ready_duration_metrics_test.go | 128 ++++++++++++++++++ pkg/server/metrics/metrics.go | 15 ++ 4 files changed, 193 insertions(+) create mode 100644 pkg/reconciler/apis/apiexport/apiexport_ready_duration_metrics_test.go diff --git a/pkg/reconciler/apis/apiexport/apiexport_condition_metrics_test.go b/pkg/reconciler/apis/apiexport/apiexport_condition_metrics_test.go index 6e010bceed2..419c7547671 100644 --- a/pkg/reconciler/apis/apiexport/apiexport_condition_metrics_test.go +++ b/pkg/reconciler/apis/apiexport/apiexport_condition_metrics_test.go @@ -32,6 +32,7 @@ import ( func newTestController() *controller { return &controller{ countedAPIExportConditions: make(map[string]map[string]string), + readyAPIExports: make(map[string]struct{}), } } diff --git a/pkg/reconciler/apis/apiexport/apiexport_controller.go b/pkg/reconciler/apis/apiexport/apiexport_controller.go index edcca3c4799..277facfe3bf 100644 --- a/pkg/reconciler/apis/apiexport/apiexport_controller.go +++ b/pkg/reconciler/apis/apiexport/apiexport_controller.go @@ -137,6 +137,7 @@ func NewController( commit: committer.NewCommitter[*APIExport, Patcher, *APIExportSpec, *APIExportStatus](kcpClusterClient.ApisV1alpha2().APIExports()), countedAPIExportConditions: make(map[string]map[string]string), + readyAPIExports: make(map[string]struct{}), } _, _ = apiExportInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ @@ -149,11 +150,13 @@ func NewController( export := newObj.(*apisv1alpha2.APIExport) c.enqueueAPIExport(export) c.handleConditionMetricsOnUpdate(oldObj.(*apisv1alpha2.APIExport), export) + c.handleReadyDurationMetricOnUpdate(export) }, DeleteFunc: func(obj interface{}) { export := obj.(*apisv1alpha2.APIExport) c.enqueueAPIExport(export) c.handleConditionMetricsOnDelete(export) + c.handleReadyDurationMetricOnDelete(export) }, }) @@ -227,6 +230,7 @@ type controller struct { mu sync.Mutex countedAPIExportConditions map[string]map[string]string + readyAPIExports map[string]struct{} } // enqueueAPIExport enqueues an APIExport. @@ -468,3 +472,48 @@ func (c *controller) handleConditionMetricsOnDelete(export *apisv1alpha2.APIExpo } delete(c.countedAPIExportConditions, key) } + +func isAPIExportReady(export *apisv1alpha2.APIExport) bool { + identityValid := false + urlsReady := false + for _, cond := range export.Status.Conditions { + switch cond.Type { + case apisv1alpha2.APIExportIdentityValid: + identityValid = string(cond.Status) == "True" + case apisv1alpha2.APIExportVirtualWorkspaceURLsReady: + urlsReady = string(cond.Status) == "True" + } + } + return identityValid && urlsReady +} + +func (c *controller) handleReadyDurationMetricOnUpdate(export *apisv1alpha2.APIExport) { + if !isAPIExportReady(export) { + return + } + key, err := kcpcache.MetaClusterNamespaceKeyFunc(export) + if err != nil { + return + } + + c.mu.Lock() + defer c.mu.Unlock() + + if _, alreadyObserved := c.readyAPIExports[key]; alreadyObserved { + return + } + c.readyAPIExports[key] = struct{}{} + kcpmetrics.ObserveAPIExportReadyDuration(export.CreationTimestamp.Time) +} + +func (c *controller) handleReadyDurationMetricOnDelete(export *apisv1alpha2.APIExport) { + key, err := kcpcache.MetaClusterNamespaceKeyFunc(export) + if err != nil { + return + } + + c.mu.Lock() + defer c.mu.Unlock() + + delete(c.readyAPIExports, key) +} diff --git a/pkg/reconciler/apis/apiexport/apiexport_ready_duration_metrics_test.go b/pkg/reconciler/apis/apiexport/apiexport_ready_duration_metrics_test.go new file mode 100644 index 00000000000..37cf0b38f3b --- /dev/null +++ b/pkg/reconciler/apis/apiexport/apiexport_ready_duration_metrics_test.go @@ -0,0 +1,128 @@ +/* +Copyright 2026 The kcp Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package apiexport + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + apisv1alpha2 "github.com/kcp-dev/sdk/apis/apis/v1alpha2" + conditionsv1alpha1 "github.com/kcp-dev/sdk/apis/third_party/conditions/apis/conditions/v1alpha1" +) + +func exportWithCreation(cluster, name string, created time.Time, conds ...conditionsv1alpha1.Condition) *apisv1alpha2.APIExport { + return &apisv1alpha2.APIExport{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Annotations: map[string]string{"kcp.io/cluster": cluster}, + CreationTimestamp: metav1.Time{Time: created}, + }, + Status: apisv1alpha2.APIExportStatus{Conditions: conds}, + } +} + +func TestIsAPIExportReady(t *testing.T) { + t.Run("ready when both conditions are True", func(t *testing.T) { + e := exportWithCreation("root:ws", "test", time.Now(), + cond(apisv1alpha2.APIExportIdentityValid, corev1.ConditionTrue), + cond(apisv1alpha2.APIExportVirtualWorkspaceURLsReady, corev1.ConditionTrue), + ) + require.True(t, isAPIExportReady(e)) + }) + + t.Run("not ready when identity is False", func(t *testing.T) { + e := exportWithCreation("root:ws", "test", time.Now(), + cond(apisv1alpha2.APIExportIdentityValid, corev1.ConditionFalse), + cond(apisv1alpha2.APIExportVirtualWorkspaceURLsReady, corev1.ConditionTrue), + ) + require.False(t, isAPIExportReady(e)) + }) + + t.Run("not ready when URLs not ready", func(t *testing.T) { + e := exportWithCreation("root:ws", "test", time.Now(), + cond(apisv1alpha2.APIExportIdentityValid, corev1.ConditionTrue), + cond(apisv1alpha2.APIExportVirtualWorkspaceURLsReady, corev1.ConditionFalse), + ) + require.False(t, isAPIExportReady(e)) + }) + + t.Run("not ready with no conditions", func(t *testing.T) { + e := exportWithCreation("root:ws", "test", time.Now()) + require.False(t, isAPIExportReady(e)) + }) +} + +func TestHandleReadyDurationMetricOnUpdate(t *testing.T) { + t.Run("first transition to ready records duration without panic", func(t *testing.T) { + c := newTestController() + created := time.Now().Add(-3 * time.Second) + e := exportWithCreation("root:ws", "test", created, + cond(apisv1alpha2.APIExportIdentityValid, corev1.ConditionTrue), + cond(apisv1alpha2.APIExportVirtualWorkspaceURLsReady, corev1.ConditionTrue), + ) + require.NotPanics(t, func() { c.handleReadyDurationMetricOnUpdate(e) }) + require.Len(t, c.readyAPIExports, 1) + }) + + t.Run("second update when already ready does not double-record", func(t *testing.T) { + c := newTestController() + created := time.Now().Add(-3 * time.Second) + e := exportWithCreation("root:ws", "test", created, + cond(apisv1alpha2.APIExportIdentityValid, corev1.ConditionTrue), + cond(apisv1alpha2.APIExportVirtualWorkspaceURLsReady, corev1.ConditionTrue), + ) + c.handleReadyDurationMetricOnUpdate(e) + c.handleReadyDurationMetricOnUpdate(e) // second call should be a no-op + require.Len(t, c.readyAPIExports, 1) + }) + + t.Run("not-ready update does not record", func(t *testing.T) { + c := newTestController() + e := exportWithCreation("root:ws", "test", time.Now(), + cond(apisv1alpha2.APIExportIdentityValid, corev1.ConditionFalse), + cond(apisv1alpha2.APIExportVirtualWorkspaceURLsReady, corev1.ConditionTrue), + ) + c.handleReadyDurationMetricOnUpdate(e) + require.Empty(t, c.readyAPIExports) + }) +} + +func TestHandleReadyDurationMetricOnDelete(t *testing.T) { + t.Run("delete removes ready tracking", func(t *testing.T) { + c := newTestController() + created := time.Now().Add(-1 * time.Second) + e := exportWithCreation("root:ws", "test", created, + cond(apisv1alpha2.APIExportIdentityValid, corev1.ConditionTrue), + cond(apisv1alpha2.APIExportVirtualWorkspaceURLsReady, corev1.ConditionTrue), + ) + c.handleReadyDurationMetricOnUpdate(e) + require.Len(t, c.readyAPIExports, 1) + c.handleReadyDurationMetricOnDelete(e) + require.Empty(t, c.readyAPIExports) + }) + + t.Run("delete of untracked export is a no-op", func(t *testing.T) { + c := newTestController() + e := exportWithCreation("root:ws", "unknown", time.Now()) + require.NotPanics(t, func() { c.handleReadyDurationMetricOnDelete(e) }) + }) +} diff --git a/pkg/server/metrics/metrics.go b/pkg/server/metrics/metrics.go index 0b04ab41627..f3331fea82a 100644 --- a/pkg/server/metrics/metrics.go +++ b/pkg/server/metrics/metrics.go @@ -77,6 +77,15 @@ var ( }, []string{"condition", "status"}, ) + + apiExportReadyDurationMs = metrics.NewHistogram( + &metrics.HistogramOpts{ + Name: "kcp_apiexport_ready_duration_ms", + Help: "Duration in milliseconds from APIExport creation to becoming fully operational (IdentityValid and VirtualWorkspaceURLsReady both True).", + StabilityLevel: metrics.ALPHA, + Buckets: []float64{100, 500, 1000, 2500, 5000, 10000, 30000, 60000, 120000, 300000}, + }, + ) ) func init() { @@ -86,6 +95,7 @@ func init() { legacyregistry.MustRegister(apiBindingConditionStatus) legacyregistry.MustRegister(apiBindingReadyDurationMs) legacyregistry.MustRegister(apiExportConditionStatus) + legacyregistry.MustRegister(apiExportReadyDurationMs) } // IncrementLogicalClusterCount increments the count for the given shard and phase. @@ -142,3 +152,8 @@ func IncrementAPIExportConditionStatus(conditionType, status string) { func DecrementAPIExportConditionStatus(conditionType, status string) { apiExportConditionStatus.WithLabelValues(conditionType, status).Dec() } + +// ObserveAPIExportReadyDuration records the duration from APIExport creation to fully operational. +func ObserveAPIExportReadyDuration(creationTime time.Time) { + apiExportReadyDurationMs.Observe(float64(time.Since(creationTime).Milliseconds())) +}