From 67666512eeb3cf1c460a9ff1536b5b75a5370f13 Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Tue, 5 May 2026 13:12:43 +0200 Subject: [PATCH 01/14] Migrate EvictionReconciler to SSA status updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace MergeFromWithOptimisticLock + retry with Status().Apply() for all eviction status updates. Introduces ConditionsFromStatus and SetApplyConfigurationStatusCondition helpers to utils — mirroring meta.SetStatusCondition semantics — as the foundation for all SSA condition management. The evictionStatusCfg helper seeds every apply with the full set of owned fields so SSA never prunes values between reconcile cycles. --- internal/controller/eviction_controller.go | 292 +++++++++++---------- internal/utils/conditions.go | 116 ++++++++ 2 files changed, 271 insertions(+), 137 deletions(-) create mode 100644 internal/utils/conditions.go diff --git a/internal/controller/eviction_controller.go b/internal/controller/eviction_controller.go index ae502fa..1d1ca82 100644 --- a/internal/controller/eviction_controller.go +++ b/internal/controller/eviction_controller.go @@ -32,12 +32,13 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/util/retry" + k8sacmetav1 "k8s.io/client-go/applyconfigurations/meta/v1" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" logger "sigs.k8s.io/controller-runtime/pkg/log" kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + apiv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/applyconfigurations/api/v1" "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/openstack" "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/utils" ) @@ -85,6 +86,19 @@ func moveToBack(instances []string) []string { return instances } +// evictionStatusCfg builds an EvictionStatusApplyConfiguration pre-populated +// with the current scalar fields and conditions from the fetched eviction. +// Call sites upsert the single condition they are setting via +// utils.SetApplyConfigurationStatusCondition, then apply. +func evictionStatusCfg(eviction *kvmv1.Eviction) *apiv1.EvictionStatusApplyConfiguration { + cfg := apiv1.EvictionStatus(). + WithHypervisorServiceId(eviction.Status.HypervisorServiceId). + WithOutstandingRamMb(eviction.Status.OutstandingRamMb) + cfg.OutstandingInstances = eviction.Status.OutstandingInstances + cfg.Conditions = utils.ConditionsFromStatus(eviction.Status.Conditions) + return cfg +} + // +kubebuilder:rbac:groups=kvm.cloud.sap,resources=evictions,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=kvm.cloud.sap,resources=evictions/status,verbs=get;update;patch // +kubebuilder:rbac:groups=kvm.cloud.sap,resources=evictions/finalizers,verbs=update @@ -118,14 +132,16 @@ func (r *EvictionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c statusCondition := meta.FindStatusCondition(eviction.Status.Conditions, kvmv1.ConditionTypeEvicting) if statusCondition == nil { - meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeEvicting, - Status: metav1.ConditionTrue, - Message: "Running", - Reason: kvmv1.ConditionReasonRunning, - }) - - return ctrl.Result{}, r.updateStatus(ctx, eviction) + statusCfg := evictionStatusCfg(eviction) + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeEvicting). + WithStatus(metav1.ConditionTrue). + WithReason(kvmv1.ConditionReasonRunning). + WithMessage("Running")) + return ctrl.Result{}, r.Status().Apply(ctx, + apiv1.Eviction(eviction.Name, eviction.Namespace).WithStatus(statusCfg), + client.ForceOwnership, client.FieldOwner(EvictionControllerName)) } switch statusCondition.Status { @@ -157,36 +173,18 @@ func (r *EvictionReconciler) handleRunning(ctx context.Context, eviction *kvmv1. return r.evictNext(ctx, eviction) } - meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeEvicting, - Status: metav1.ConditionFalse, - Message: "eviction completed successfully", - Reason: kvmv1.ConditionReasonSucceeded, - }) - - eviction.Status.OutstandingRamMb = 0 logger.FromContext(ctx).Info("succeeded") - return ctrl.Result{}, r.updateStatus(ctx, eviction) -} - -func (r *EvictionReconciler) updateStatus(ctx context.Context, eviction *kvmv1.Eviction) error { - desiredStatus := eviction.Status.DeepCopy() - return retry.RetryOnConflict(utils.StatusPatchBackoff, func() error { - freshEviction := &kvmv1.Eviction{} - if err := r.Get(ctx, client.ObjectKeyFromObject(eviction), freshEviction); err != nil { - return err - } - freshBase := freshEviction.DeepCopy() - // Apply desired conditions and scalar fields onto the fresh status - for _, c := range desiredStatus.Conditions { - meta.SetStatusCondition(&freshEviction.Status.Conditions, c) - } - freshEviction.Status.OutstandingInstances = desiredStatus.OutstandingInstances - freshEviction.Status.OutstandingRamMb = desiredStatus.OutstandingRamMb - freshEviction.Status.HypervisorServiceId = desiredStatus.HypervisorServiceId - return r.Status().Patch(ctx, freshEviction, client.MergeFromWithOptions(freshBase, - client.MergeFromWithOptimisticLock{}), client.FieldOwner(EvictionControllerName)) - }) + statusCfg := evictionStatusCfg(eviction) + statusCfg.WithOutstandingRamMb(0) + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeEvicting). + WithStatus(metav1.ConditionFalse). + WithReason(kvmv1.ConditionReasonSucceeded). + WithMessage("eviction completed successfully")) + return ctrl.Result{}, r.Status().Apply(ctx, + apiv1.Eviction(eviction.Name, eviction.Namespace).WithStatus(statusCfg), + client.ForceOwnership, client.FieldOwner(EvictionControllerName)) } func (r *EvictionReconciler) handlePreflight(ctx context.Context, eviction *kvmv1.Eviction, hv *kvmv1.Hypervisor) (ctrl.Result, error) { @@ -194,14 +192,22 @@ func (r *EvictionReconciler) handlePreflight(ctx context.Context, eviction *kvmv // If the hypervisor should exist, then we need to ensure it is disabled before we start evicting if expectHypervisor && !meta.IsStatusConditionTrue(hv.Status.Conditions, kvmv1.ConditionTypeHypervisorDisabled) { - // Hypervisor is not disabled (yet?), reflect that as a failing preflight check - if meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypePreflight, - Status: metav1.ConditionFalse, - Message: "hypervisor not disabled", - Reason: kvmv1.ConditionReasonFailed, - }) { - return ctrl.Result{}, r.updateStatus(ctx, eviction) + // Hypervisor is not disabled (yet?), reflect that as a failing preflight check. + // Only apply if the condition is new or changed, to avoid unnecessary API calls on repeated requeues. + existing := meta.FindStatusCondition(eviction.Status.Conditions, kvmv1.ConditionTypePreflight) + if existing == nil || existing.Status != metav1.ConditionFalse { + statusCfg := evictionStatusCfg(eviction) + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypePreflight). + WithStatus(metav1.ConditionFalse). + WithReason(kvmv1.ConditionReasonFailed). + WithMessage("hypervisor not disabled")) + if err := r.Status().Apply(ctx, + apiv1.Eviction(eviction.Name, eviction.Namespace).WithStatus(statusCfg), + client.ForceOwnership, client.FieldOwner(EvictionControllerName)); err != nil { + return ctrl.Result{}, err + } } return ctrl.Result{RequeueAfter: defaultPollTime}, nil // Wait for hypervisor to be disabled } @@ -216,47 +222,56 @@ func (r *EvictionReconciler) handlePreflight(ctx context.Context, eviction *kvmv if expectHypervisor { // Abort eviction - meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeEvicting, - Status: metav1.ConditionFalse, - Message: fmt.Sprintf("failed to get hypervisor %v", err), - Reason: kvmv1.ConditionReasonFailed, - }) - return ctrl.Result{}, r.updateStatus(ctx, eviction) - } else { - // That is (likely) an eviction for a node that never registered - // so we are good to go - msg := "eviction completed successfully due to expected case of no hypervisor" - meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeEvicting, - Status: metav1.ConditionFalse, - Message: msg, - Reason: kvmv1.ConditionReasonSucceeded, - }) - eviction.Status.OutstandingRamMb = 0 - logger.FromContext(ctx).Info(msg) - return ctrl.Result{}, r.updateStatus(ctx, eviction) + statusCfg := evictionStatusCfg(eviction) + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeEvicting). + WithStatus(metav1.ConditionFalse). + WithReason(kvmv1.ConditionReasonFailed). + WithMessage(fmt.Sprintf("failed to get hypervisor %v", err))) + return ctrl.Result{}, r.Status().Apply(ctx, + apiv1.Eviction(eviction.Name, eviction.Namespace).WithStatus(statusCfg), + client.ForceOwnership, client.FieldOwner(EvictionControllerName)) } + + // That is (likely) an eviction for a node that never registered + // so we are good to go + msg := "eviction completed successfully due to expected case of no hypervisor" + logger.FromContext(ctx).Info(msg) + statusCfg := evictionStatusCfg(eviction) + statusCfg.WithOutstandingRamMb(0) + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeEvicting). + WithStatus(metav1.ConditionFalse). + WithReason(kvmv1.ConditionReasonSucceeded). + WithMessage(msg)) + return ctrl.Result{}, r.Status().Apply(ctx, + apiv1.Eviction(eviction.Name, eviction.Namespace).WithStatus(statusCfg), + client.ForceOwnership, client.FieldOwner(EvictionControllerName)) } + var instances []string if hypervisor.Servers != nil { - uuids := make([]string, len(*hypervisor.Servers)) + instances = make([]string, len(*hypervisor.Servers)) for i, server := range *hypervisor.Servers { - uuids[i] = server.UUID + instances[i] = server.UUID } - eviction.Status.OutstandingInstances = uuids } - // Update status - eviction.Status.HypervisorServiceId = hypervisor.ID - eviction.Status.OutstandingRamMb = int64(hypervisor.MemoryMBUsed) - meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypePreflight, - Status: metav1.ConditionTrue, - Message: "Preflight checks passed, hypervisor is disabled and ready for eviction", - Reason: kvmv1.ConditionReasonSucceeded, - }) - return ctrl.Result{}, r.updateStatus(ctx, eviction) + statusCfg := evictionStatusCfg(eviction) + statusCfg.WithHypervisorServiceId(hypervisor.ID) + statusCfg.WithOutstandingRamMb(int64(hypervisor.MemoryMBUsed)) + statusCfg.OutstandingInstances = instances + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypePreflight). + WithStatus(metav1.ConditionTrue). + WithReason(kvmv1.ConditionReasonSucceeded). + WithMessage("Preflight checks passed, hypervisor is disabled and ready for eviction")) + return ctrl.Result{}, r.Status().Apply(ctx, + apiv1.Eviction(eviction.Name, eviction.Namespace).WithStatus(statusCfg), + client.ForceOwnership, client.FieldOwner(EvictionControllerName)) } // Tries to handle the NotFound-error by updating the status @@ -265,18 +280,22 @@ func (r *EvictionReconciler) handleNotFound(ctx context.Context, eviction *kvmv1 return err } logger.FromContext(ctx).Info("Instance is gone") - var uuid string - eviction.Status.OutstandingInstances, uuid = popInstance(eviction.Status.OutstandingInstances) + remaining, uuid := popInstance(eviction.Status.OutstandingInstances) if uuid == "" { return nil } - meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeMigration, - Status: metav1.ConditionFalse, - Message: fmt.Sprintf("Instance %s is gone", uuid), - Reason: kvmv1.ConditionReasonSucceeded, - }) - return r.updateStatus(ctx, eviction) + + statusCfg := evictionStatusCfg(eviction) + statusCfg.OutstandingInstances = remaining + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeMigration). + WithStatus(metav1.ConditionFalse). + WithReason(kvmv1.ConditionReasonSucceeded). + WithMessage(fmt.Sprintf("Instance %s is gone", uuid))) + return r.Status().Apply(ctx, + apiv1.Eviction(eviction.Name, eviction.Namespace).WithStatus(statusCfg), + client.ForceOwnership, client.FieldOwner(EvictionControllerName)) } func (r *EvictionReconciler) evictNext(ctx context.Context, eviction *kvmv1.Eviction) (ctrl.Result, error) { @@ -309,29 +328,27 @@ func (r *EvictionReconciler) evictNext(ctx context.Context, eviction *kvmv1.Evic case "ERROR": // Needs manual intervention (or another operator fixes it) // put it at the end of the list (beginning of array) - eviction.Status.OutstandingInstances = moveToBack(eviction.Status.OutstandingInstances) + reordered := moveToBack(eviction.Status.OutstandingInstances) log.Info("error", "faultMessage", vm.Fault.Message) - meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeMigration, - Status: metav1.ConditionFalse, - Message: fmt.Sprintf("Migration of instance %s failed: %s", vm.ID, vm.Fault.Message), - Reason: kvmv1.ConditionReasonFailed, - }) - - return ctrl.Result{}, errors.Join(fmt.Errorf("error migrating instance %v", uuid), - r.updateStatus(ctx, eviction)) + + statusCfg := evictionStatusCfg(eviction) + statusCfg.OutstandingInstances = reordered + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeMigration). + WithStatus(metav1.ConditionFalse). + WithReason(kvmv1.ConditionReasonFailed). + WithMessage(fmt.Sprintf("Migration of instance %s failed: %s", vm.ID, vm.Fault.Message))) + applyErr := r.Status().Apply(ctx, + apiv1.Eviction(eviction.Name, eviction.Namespace).WithStatus(statusCfg), + client.ForceOwnership, client.FieldOwner(EvictionControllerName)) + return ctrl.Result{}, errors.Join(fmt.Errorf("error migrating instance %v", uuid), applyErr) } currentHypervisor, _, _ := strings.Cut(vm.HypervisorHostname, ".") if currentHypervisor != eviction.Spec.Hypervisor { log.Info("migrated") - meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeMigration, - Status: metav1.ConditionFalse, - Message: fmt.Sprintf("Migration of instance %s finished", vm.ID), - Reason: kvmv1.ConditionReasonSucceeded, - }) // So, it is already off this one, do we need to verify it? if vm.Status == "VERIFY_RESIZE" { @@ -346,27 +363,42 @@ func (r *EvictionReconciler) evictNext(ctx context.Context, eviction *kvmv1.Evic } // All done - eviction.Status.OutstandingInstances, _ = popInstance(eviction.Status.OutstandingInstances) - return ctrl.Result{}, r.updateStatus(ctx, eviction) + popped, _ := popInstance(eviction.Status.OutstandingInstances) + + statusCfg := evictionStatusCfg(eviction) + statusCfg.OutstandingInstances = popped + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeMigration). + WithStatus(metav1.ConditionFalse). + WithReason(kvmv1.ConditionReasonSucceeded). + WithMessage(fmt.Sprintf("Migration of instance %s finished", vm.ID))) + return ctrl.Result{}, r.Status().Apply(ctx, + apiv1.Eviction(eviction.Name, eviction.Namespace).WithStatus(statusCfg), + client.ForceOwnership, client.FieldOwner(EvictionControllerName)) } if vm.TaskState == "deleting" { //nolint:gocritic // We just have to wait for it to be gone. Try the next one. - eviction.Status.OutstandingInstances = moveToBack(eviction.Status.OutstandingInstances) - - meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeMigration, - Status: metav1.ConditionFalse, - Message: fmt.Sprintf("Live migration of terminating instance %s skipped", vm.ID), - Reason: kvmv1.ConditionReasonFailed, - }) - if err := r.updateStatus(ctx, eviction); err != nil { + reordered := moveToBack(eviction.Status.OutstandingInstances) + + statusCfg := evictionStatusCfg(eviction) + statusCfg.OutstandingInstances = reordered + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeMigration). + WithStatus(metav1.ConditionFalse). + WithReason(kvmv1.ConditionReasonFailed). + WithMessage(fmt.Sprintf("Live migration of terminating instance %s skipped", vm.ID))) + if err := r.Status().Apply(ctx, + apiv1.Eviction(eviction.Name, eviction.Namespace).WithStatus(statusCfg), + client.ForceOwnership, client.FieldOwner(EvictionControllerName)); err != nil { return ctrl.Result{}, fmt.Errorf("could not update status due to %w", err) } return ctrl.Result{RequeueAfter: defaultPollTime}, nil } else if vm.Status == "ACTIVE" || vm.PowerState == 1 { log.Info("trigger live-migration") - if err := r.liveMigrate(ctx, vm.ID, eviction); err != nil { + if err := r.liveMigrate(ctx, vm.ID, eviction.Spec.Hypervisor); err != nil { if err2 := r.handleNotFound(ctx, eviction, err); err2 != nil { return ctrl.Result{}, err2 } @@ -374,7 +406,7 @@ func (r *EvictionReconciler) evictNext(ctx context.Context, eviction *kvmv1.Evic } } else { log.Info("trigger cold-migration") - if err := r.coldMigrate(ctx, vm.ID, eviction); err != nil { + if err := r.coldMigrate(ctx, vm.ID, eviction.Spec.Hypervisor); err != nil { if err2 := r.handleNotFound(ctx, eviction, err); err2 != nil { return ctrl.Result{}, err2 } @@ -388,7 +420,7 @@ func (r *EvictionReconciler) evictNext(ctx context.Context, eviction *kvmv1.Evic return ctrl.Result{RequeueAfter: defaultPollTime}, nil } -func (r *EvictionReconciler) liveMigrate(ctx context.Context, uuid string, eviction *kvmv1.Eviction) error { +func (r *EvictionReconciler) liveMigrate(ctx context.Context, uuid, hypervisorName string) error { log := logger.FromContext(ctx) liveMigrateOpts := servers.LiveMigrateOpts{ @@ -397,36 +429,22 @@ func (r *EvictionReconciler) liveMigrate(ctx context.Context, uuid string, evict res := servers.LiveMigrate(ctx, r.computeClient, uuid, liveMigrateOpts) if res.Err != nil { - err := fmt.Errorf("failed to evict VM %s due to %w", uuid, res.Err) - meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeMigration, - Status: metav1.ConditionFalse, - Message: err.Error(), - Reason: kvmv1.ConditionReasonFailed, - }) - return err + return fmt.Errorf("failed to evict VM %s due to %w", uuid, res.Err) } - log.Info("Live migrating server", "server", uuid, "source", eviction.Spec.Hypervisor, "X-Openstack-Request-Id", res.Header.Get("X-Openstack-Request-Id")) + log.Info("Live migrating server", "server", uuid, "source", hypervisorName, "X-Openstack-Request-Id", res.Header.Get("X-Openstack-Request-Id")) return nil } -func (r *EvictionReconciler) coldMigrate(ctx context.Context, uuid string, eviction *kvmv1.Eviction) error { +func (r *EvictionReconciler) coldMigrate(ctx context.Context, uuid, hypervisorName string) error { log := logger.FromContext(ctx) res := servers.Migrate(ctx, r.computeClient, uuid) if res.Err != nil { - err := fmt.Errorf("failed to evict stopped server %s due to %w", uuid, res.Err) - meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeMigration, - Status: metav1.ConditionFalse, - Message: err.Error(), - Reason: kvmv1.ConditionReasonFailed, - }) - return err + return fmt.Errorf("failed to evict stopped server %s due to %w", uuid, res.Err) } - log.Info("Cold-migrating server", "server", uuid, "source", eviction.Spec.Hypervisor, "X-Openstack-Request-Id", res.Header.Get("X-Openstack-Request-Id")) + log.Info("Cold-migrating server", "server", uuid, "source", hypervisorName, "X-Openstack-Request-Id", res.Header.Get("X-Openstack-Request-Id")) return nil } diff --git a/internal/utils/conditions.go b/internal/utils/conditions.go new file mode 100644 index 0000000..2eef3a2 --- /dev/null +++ b/internal/utils/conditions.go @@ -0,0 +1,116 @@ +/* +SPDX-FileCopyrightText: Copyright 2024 SAP SE or an SAP affiliate company and cobaltcore-dev contributors +SPDX-License-Identifier: Apache-2.0 + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utils + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + k8sacmetav1 "k8s.io/client-go/applyconfigurations/meta/v1" +) + +// ConditionsFromStatus converts a []metav1.Condition (as stored in a status +// subresource) into a []k8sacmetav1.ConditionApplyConfiguration suitable for +// use with server-side apply. All fields including LastTransitionTime are +// preserved verbatim. +func ConditionsFromStatus(conditions []metav1.Condition) []k8sacmetav1.ConditionApplyConfiguration { + result := make([]k8sacmetav1.ConditionApplyConfiguration, len(conditions)) + for i := range conditions { + c := &conditions[i] + result[i] = k8sacmetav1.ConditionApplyConfiguration{ + Type: &c.Type, + Status: &c.Status, + Reason: &c.Reason, + Message: &c.Message, + LastTransitionTime: &c.LastTransitionTime, + } + } + return result +} + +// SetApplyConfigurationStatusCondition sets the corresponding condition in +// conditions to newCondition and returns true if the conditions are changed by +// this call. It mirrors the behaviour of k8s.io/apimachinery/pkg/api/meta.SetStatusCondition: +// +// 1. If a condition of the specified type does not exist, newCondition is +// appended. LastTransitionTime is set to now if not provided. +// 2. If a condition of the specified type already exists, all fields are +// updated. LastTransitionTime is updated to now (or the provided value) +// only when Status changes; otherwise the existing LastTransitionTime is +// preserved. +func SetApplyConfigurationStatusCondition(conditions *[]k8sacmetav1.ConditionApplyConfiguration, newCondition k8sacmetav1.ConditionApplyConfiguration) (changed bool) { + if conditions == nil { + return false + } + + // Find existing entry by type + var existing *k8sacmetav1.ConditionApplyConfiguration + for i := range *conditions { + if (*conditions)[i].Type != nil && *(*conditions)[i].Type == *newCondition.Type { + existing = &(*conditions)[i] + break + } + } + + if existing == nil { + // New condition: set LastTransitionTime if not provided + if newCondition.LastTransitionTime == nil { + now := metav1.Now() + newCondition.LastTransitionTime = &now + } + *conditions = append(*conditions, newCondition) + return true + } + + // Existing condition: update fields, handle LastTransitionTime + statusChanged := existing.Status == nil || newCondition.Status == nil || + *existing.Status != *newCondition.Status + + if statusChanged { + existing.Status = newCondition.Status + if newCondition.LastTransitionTime != nil { + existing.LastTransitionTime = newCondition.LastTransitionTime + } else { + now := metav1.Now() + existing.LastTransitionTime = &now + } + changed = true + } + // When status is unchanged, LastTransitionTime is intentionally preserved. + + if strChanged(&existing.Reason, newCondition.Reason) { + existing.Reason = newCondition.Reason + changed = true + } + if strChanged(&existing.Message, newCondition.Message) { + existing.Message = newCondition.Message + changed = true + } + + return changed +} + +// strChanged reports whether the pointer value of b differs from the current +// value at a. +func strChanged(a **string, b *string) bool { + if *a == nil && b == nil { + return false + } + if *a == nil || b == nil { + return true + } + return **a != *b +} From a65a52da06d589b3c2a6055fb4c3106fe83e3c49 Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Tue, 5 May 2026 17:39:46 +0200 Subject: [PATCH 02/14] Migrate HypervisorController to SSA status updates Replace PatchHypervisorStatusWithRetry with Status().Apply() for the InternalIP scalar and ConditionTypeTerminating condition. Re-fetch the hypervisor after the status apply so the subsequent spec patch sees a fresh resourceVersion. --- internal/controller/hypervisor_controller.go | 46 +++++++++++--------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/internal/controller/hypervisor_controller.go b/internal/controller/hypervisor_controller.go index 138aa0b..ea95c01 100644 --- a/internal/controller/hypervisor_controller.go +++ b/internal/controller/hypervisor_controller.go @@ -26,10 +26,10 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" + k8sacmetav1 "k8s.io/client-go/applyconfigurations/meta/v1" ctrl "sigs.k8s.io/controller-runtime" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -37,6 +37,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/predicate" kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + apiv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/applyconfigurations/api/v1" "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/global" "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/utils" ) @@ -101,40 +102,43 @@ func (hv *HypervisorController) Reconcile(ctx context.Context, req ctrl.Request) // continue with creation } else { // update Status if needed - base := hypervisor.DeepCopy() // transfer internal IP + var newInternalIP string for _, address := range node.Status.Addresses { - if address.Type == corev1.NodeInternalIP && hypervisor.Status.InternalIP != address.Address { - hypervisor.Status.InternalIP = address.Address + if address.Type == corev1.NodeInternalIP { + newInternalIP = address.Address break } } // update terminating condition nodeTerminationCondition := FindNodeStatusCondition(node.Status.Conditions, "Terminating") + + // Capture values to apply - only mutate fields this controller owns + statusCfg := apiv1.HypervisorStatus().WithInternalIP(newInternalIP) + statusCfg.Conditions = utils.ConditionsFromStatus(hypervisor.Status.Conditions) + // Node might be terminating, propagate condition to hypervisor if nodeTerminationCondition != nil && nodeTerminationCondition.Status == corev1.ConditionTrue { - // Node might be terminating, propagate condition to hypervisor - meta.SetStatusCondition(&hypervisor.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeTerminating, - Status: metav1.ConditionStatus(nodeTerminationCondition.Status), - Reason: nodeTerminationCondition.Reason, - Message: nodeTerminationCondition.Message, - }) + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeTerminating). + WithStatus(metav1.ConditionStatus(nodeTerminationCondition.Status)). + WithReason(nodeTerminationCondition.Reason). + WithMessage(nodeTerminationCondition.Message)) } - if !equality.Semantic.DeepEqual(hypervisor, base) { - // Capture values to apply - only mutate fields this controller owns - newInternalIP := hypervisor.Status.InternalIP - terminatingCondition := meta.FindStatusCondition(hypervisor.Status.Conditions, kvmv1.ConditionTypeTerminating) + if err := hv.Status().Apply(ctx, + apiv1.Hypervisor(hypervisor.Name, "").WithStatus(statusCfg), + k8sclient.ForceOwnership, k8sclient.FieldOwner(HypervisorControllerName)); err != nil { + return ctrl.Result{}, err + } - return ctrl.Result{}, utils.PatchHypervisorStatusWithRetry(ctx, hv.Client, hypervisor.Name, HypervisorControllerName, func(h *kvmv1.Hypervisor) { - h.Status.InternalIP = newInternalIP - if terminatingCondition != nil { - meta.SetStatusCondition(&h.Status.Conditions, *terminatingCondition) - } - }) + // Re-fetch after status apply so the spec patch has a fresh resourceVersion + if err := hv.Get(ctx, k8sclient.ObjectKeyFromObject(hypervisor), hypervisor); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to re-fetch hypervisor after status apply: %w", err) } + base := hypervisor.DeepCopy() syncLabelsAndAnnotations(nodeLabels, hypervisor, node) if equality.Semantic.DeepEqual(hypervisor, base) { From d796778d96073a3659832252931c15e83702b84c Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Tue, 5 May 2026 17:40:04 +0200 Subject: [PATCH 03/14] Migrate HypervisorTaintController to SSA status updates Replace PatchHypervisorStatusWithRetry with Status().Apply() for the ConditionTypeTainted condition. --- .../controller/hypervisor_taint_controller.go | 64 +++++++++---------- 1 file changed, 29 insertions(+), 35 deletions(-) diff --git a/internal/controller/hypervisor_taint_controller.go b/internal/controller/hypervisor_taint_controller.go index 7822b34..3fefe5c 100644 --- a/internal/controller/hypervisor_taint_controller.go +++ b/internal/controller/hypervisor_taint_controller.go @@ -20,16 +20,17 @@ package controller import ( "context" - "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + k8sacmetav1 "k8s.io/client-go/applyconfigurations/meta/v1" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/predicate" kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + apiv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/applyconfigurations/api/v1" "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/utils" ) @@ -46,52 +47,45 @@ type HypervisorTaintController struct { // +kubebuilder:rbac:groups=kvm.cloud.sap,resources=hypervisors/status,verbs=get;update;patch func (r *HypervisorTaintController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - hypervisor := &kvmv1.Hypervisor{ - ObjectMeta: metav1.ObjectMeta{ - Name: req.Name, - Labels: map[string]string{}, - }, - Spec: kvmv1.HypervisorSpec{ - HighAvailability: true, - InstallCertificate: true, - }, - } - - // Check if hypervisor already exists + hypervisor := &kvmv1.Hypervisor{} if err := r.Get(ctx, req.NamespacedName, hypervisor); err != nil { return ctrl.Result{}, k8sclient.IgnoreNotFound(err) } - before := hypervisor.DeepCopy() + var condStatus metav1.ConditionStatus + var reason, message string if HasKubectlManagedFields(&hypervisor.ObjectMeta) { - meta.SetStatusCondition(&hypervisor.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeTainted, - Status: metav1.ConditionTrue, - Reason: "Kubectl", - Message: "⚠️", - ObservedGeneration: hypervisor.Generation, - }) + condStatus = metav1.ConditionTrue + reason = "Kubectl" + message = "⚠️" } else { - meta.SetStatusCondition(&hypervisor.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeTainted, - Status: metav1.ConditionFalse, - Reason: "NoKubectl", - Message: "🟢", - ObservedGeneration: hypervisor.Generation, - }) + condStatus = metav1.ConditionFalse + reason = "NoKubectl" + message = "🟢" } - if equality.Semantic.DeepEqual(hypervisor, before) { + // Skip if already at the desired state + existing := meta.FindStatusCondition(hypervisor.Status.Conditions, kvmv1.ConditionTypeTainted) + if existing != nil && existing.Status == condStatus && + existing.Reason == reason && existing.Message == message && + existing.ObservedGeneration == hypervisor.Generation { return ctrl.Result{}, nil } // Only set the Tainted condition this controller owns - taintedCondition := meta.FindStatusCondition(hypervisor.Status.Conditions, kvmv1.ConditionTypeTainted) - return ctrl.Result{}, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hypervisor.Name, HypervisorTaintControllerName, func(h *kvmv1.Hypervisor) { - if taintedCondition != nil { - meta.SetStatusCondition(&h.Status.Conditions, *taintedCondition) - } - }) + statusCfg := apiv1.HypervisorStatus() + statusCfg.Conditions = utils.ConditionsFromStatus(hypervisor.Status.Conditions) + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeTainted). + WithStatus(condStatus). + WithReason(reason). + WithMessage(message). + WithObservedGeneration(hypervisor.Generation)) + + return ctrl.Result{}, r.Status().Apply(ctx, + apiv1.Hypervisor(hypervisor.Name, "").WithStatus(statusCfg), + k8sclient.ForceOwnership, k8sclient.FieldOwner(HypervisorTaintControllerName)) } func (r *HypervisorTaintController) SetupWithManager(mgr ctrl.Manager) error { From 1d102d6dfc5c6d22c30522edf75e7015e7274b13 Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Tue, 5 May 2026 17:40:14 +0200 Subject: [PATCH 04/14] Migrate HypervisorInstanceHaController to SSA status updates Replace PatchHypervisorStatusWithRetry with Status().Apply() for the ConditionTypeHaEnabled condition. --- .../hypervisor_instance_ha_controller.go | 112 ++++++++---------- 1 file changed, 47 insertions(+), 65 deletions(-) diff --git a/internal/controller/hypervisor_instance_ha_controller.go b/internal/controller/hypervisor_instance_ha_controller.go index 9b622de..aeab9de 100644 --- a/internal/controller/hypervisor_instance_ha_controller.go +++ b/internal/controller/hypervisor_instance_ha_controller.go @@ -21,15 +21,16 @@ import ( "context" "errors" - "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + k8sacmetav1 "k8s.io/client-go/applyconfigurations/meta/v1" ctrl "sigs.k8s.io/controller-runtime" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" logger "sigs.k8s.io/controller-runtime/pkg/log" kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + apiv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/applyconfigurations/api/v1" "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/utils" ) @@ -60,23 +61,20 @@ func (r *HypervisorInstanceHaController) Reconcile(ctx context.Context, req ctrl return ctrl.Result{}, nil // Onboarding not started, so no hypervisor and nothing to do } - old := hv.DeepCopy() - // Determine if HA should be disabled and the reason evicted := meta.IsStatusConditionFalse(hv.Status.Conditions, kvmv1.ConditionTypeEvicting) testing := onboardingCondition.Status == metav1.ConditionTrue && - onboardingCondition.Reason != kvmv1.ConditionReasonHandover // Onboarding still testing (not yet in Handover phase) + onboardingCondition.Reason != kvmv1.ConditionReasonHandover aborted := onboardingCondition.Status == metav1.ConditionFalse && - onboardingCondition.Reason == kvmv1.ConditionReasonAborted // Onboarding was aborted - shouldDisable := !hv.Spec.HighAvailability || // HA not requested - evicted || // HA not needed as it is empty - testing || // HA not needed for test VMs - aborted // HA not needed when onboarding aborted + onboardingCondition.Reason == kvmv1.ConditionReasonAborted + shouldDisable := !hv.Spec.HighAvailability || evicted || testing || aborted + + var desiredStatus metav1.ConditionStatus + var reason, message string if shouldDisable { + desiredStatus = metav1.ConditionFalse // Determine the reason based on why HA is being disabled - var reason string - var message string switch { case evicted: reason = kvmv1.ConditionReasonHaEvicted @@ -88,73 +86,57 @@ func (r *HypervisorInstanceHaController) Reconcile(ctx context.Context, req ctrl reason = kvmv1.ConditionReasonSucceeded message = "HA disabled per spec" } + } else { + desiredStatus = metav1.ConditionTrue + reason = kvmv1.ConditionReasonSucceeded + message = "HA is enabled" + } - if !meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeHaEnabled, - Status: metav1.ConditionFalse, - Message: message, - Reason: reason, - }) { - // Desired state already achieved - return ctrl.Result{}, nil - } + // Skip if already at desired state + existing := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeHaEnabled) + if existing != nil && existing.Status == desiredStatus && + existing.Reason == reason && existing.Message == message { + return ctrl.Result{}, nil + } - if err := disableInstanceHA(hv); err != nil { - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeHaEnabled, - Status: metav1.ConditionUnknown, - Message: err.Error(), - Reason: kvmv1.ConditionReasonFailed, - } - - patchErr := utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, HypervisorInstanceHaControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }) - return ctrl.Result{}, errors.Join(err, patchErr) - } + // Perform the HA enable/disable action + var actionErr error + if shouldDisable { + actionErr = disableInstanceHA(hv) } else { - if !meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeHaEnabled, - Status: metav1.ConditionTrue, - Message: "HA is enabled", - Reason: kvmv1.ConditionReasonSucceeded, - }) { - // Desired state already achieved - return ctrl.Result{}, nil - } - - if err := enableInstanceHA(hv); err != nil { - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeHaEnabled, - Status: metav1.ConditionUnknown, - Message: err.Error(), - Reason: kvmv1.ConditionReasonFailed, - } - - patchErr := utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, HypervisorInstanceHaControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }) - return ctrl.Result{}, errors.Join(err, patchErr) - } + actionErr = enableInstanceHA(hv) } - if equality.Semantic.DeepEqual(hv, old) { - return ctrl.Result{}, nil + condStatus := desiredStatus + condReason := reason + condMessage := message + if actionErr != nil { + condStatus = metav1.ConditionUnknown + condReason = kvmv1.ConditionReasonFailed + condMessage = actionErr.Error() } // Only set the HaEnabled condition this controller owns - haCondition := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeHaEnabled) - return ctrl.Result{}, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, HypervisorInstanceHaControllerName, func(h *kvmv1.Hypervisor) { - if haCondition != nil { - meta.SetStatusCondition(&h.Status.Conditions, *haCondition) - } - }) + statusCfg := apiv1.HypervisorStatus() + statusCfg.Conditions = utils.ConditionsFromStatus(hv.Status.Conditions) + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeHaEnabled). + WithStatus(condStatus). + WithReason(condReason). + WithMessage(condMessage)) + + applyErr := r.Status().Apply(ctx, + apiv1.Hypervisor(hv.Name, "").WithStatus(statusCfg), + k8sclient.ForceOwnership, k8sclient.FieldOwner(HypervisorInstanceHaControllerName)) + + return ctrl.Result{}, errors.Join(actionErr, applyErr) } // SetupWithManager sets up the controller with the Manager. func (r *HypervisorInstanceHaController) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). Named(HypervisorInstanceHaControllerName). - For(&kvmv1.Hypervisor{}). // trigger the r.Reconcile whenever a hypervisor is created/updated/deleted. + For(&kvmv1.Hypervisor{}). Complete(r) } From ee82beeb6f65d71dd89a08fc0817adbfc7a22769 Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Tue, 5 May 2026 17:40:23 +0200 Subject: [PATCH 05/14] Migrate HypervisorMaintenanceController to SSA status updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace PatchHypervisorStatusWithRetry with Status().Apply() for ConditionTypeHypervisorDisabled, ConditionTypeEvicting, and the Evicted scalar. ConditionTypeEvicting is removed by omitting it from the apply config — SSA prunes map-list entries no longer claimed by the sole owner. --- .../hypervisor_maintenance_controller.go | 183 ++++++++++-------- 1 file changed, 102 insertions(+), 81 deletions(-) diff --git a/internal/controller/hypervisor_maintenance_controller.go b/internal/controller/hypervisor_maintenance_controller.go index c35b54e..1414de8 100644 --- a/internal/controller/hypervisor_maintenance_controller.go +++ b/internal/controller/hypervisor_maintenance_controller.go @@ -24,11 +24,11 @@ import ( "context" "fmt" - "k8s.io/apimachinery/pkg/api/equality" k8serrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + k8sacmetav1 "k8s.io/client-go/applyconfigurations/meta/v1" ctrl "sigs.k8s.io/controller-runtime" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -38,6 +38,7 @@ import ( "github.com/gophercloud/gophercloud/v2/openstack/compute/v2/services" kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + apiv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/applyconfigurations/api/v1" "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/openstack" "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/utils" ) @@ -58,7 +59,6 @@ type HypervisorMaintenanceController struct { func (hec *HypervisorMaintenanceController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { hv := &kvmv1.Hypervisor{} if err := hec.Get(ctx, req.NamespacedName, hv); err != nil { - // OnboardingReconciler not found errors, could be deleted return ctrl.Result{}, k8sclient.IgnoreNotFound(err) } @@ -69,149 +69,171 @@ func (hec *HypervisorMaintenanceController) Reconcile(ctx context.Context, req c return ctrl.Result{}, nil } - old := hv.DeepCopy() - - if err := hec.reconcileComputeService(ctx, hv); err != nil { + // Determine desired disabled condition and eviction state + disabledCond, evictingCond, evicted, err := hec.reconcileComputeService(ctx, hv) + if err != nil { return ctrl.Result{}, err } - if err := hec.reconcileEviction(ctx, hv); err != nil { + evictingCond, evicted, err = hec.reconcileEviction(ctx, hv, evictingCond, evicted) + if err != nil { return ctrl.Result{}, err } - if equality.Semantic.DeepEqual(hv, old) { - return ctrl.Result{}, nil - } + // Build status apply config: always include conditions this controller owns; + // omit ConditionTypeEvicting when it should be removed (SSA prunes it). + statusCfg := apiv1.HypervisorStatus().WithEvicted(evicted) + statusCfg.Conditions = utils.ConditionsFromStatus(hv.Status.Conditions) - // Capture only the fields this controller owns - disabledCondition := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeHypervisorDisabled) - evictingCondition := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeEvicting) - evicted := hv.Status.Evicted + if disabledCond != nil { + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, *disabledCond) + } - return ctrl.Result{}, utils.PatchHypervisorStatusWithRetry(ctx, hec.Client, hv.Name, HypervisorMaintenanceControllerName, func(h *kvmv1.Hypervisor) { - if disabledCondition != nil { - meta.SetStatusCondition(&h.Status.Conditions, *disabledCondition) - } - if evictingCondition != nil { - meta.SetStatusCondition(&h.Status.Conditions, *evictingCondition) - } else { - meta.RemoveStatusCondition(&h.Status.Conditions, kvmv1.ConditionTypeEvicting) + if evictingCond != nil { + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, *evictingCond) + } else { + // Remove ConditionTypeEvicting by omitting it — SSA prunes sole-owned entries. + filtered := statusCfg.Conditions[:0] + for _, c := range statusCfg.Conditions { + if c.Type == nil || *c.Type != kvmv1.ConditionTypeEvicting { + filtered = append(filtered, c) + } } - h.Status.Evicted = evicted - }) + statusCfg.Conditions = filtered + } + + return ctrl.Result{}, hec.Status().Apply(ctx, + apiv1.Hypervisor(hv.Name, "").WithStatus(statusCfg), + k8sclient.ForceOwnership, k8sclient.FieldOwner(HypervisorMaintenanceControllerName)) } -func (hec *HypervisorMaintenanceController) reconcileComputeService(ctx context.Context, hv *kvmv1.Hypervisor) error { +// reconcileComputeService enables/disables the nova-compute service based on +// hv.Spec.Maintenance. Returns the desired HypervisorDisabled condition (nil if +// service ID is unset) and the initial evicting/evicted state. +func (hec *HypervisorMaintenanceController) reconcileComputeService(ctx context.Context, hv *kvmv1.Hypervisor) ( + disabledCond *k8sacmetav1.ConditionApplyConfiguration, + evictingCond *k8sacmetav1.ConditionApplyConfiguration, + evicted bool, + err error, +) { log := logger.FromContext(ctx) serviceId := hv.Status.ServiceID - // We can only do something here, if there is a service to begin with. - // The onboarding should take care of that if serviceId == "" { - return nil + // We can only do something here, if there is a service to begin with. + // The onboarding should take care of that. + return nil, nil, false, nil } switch hv.Spec.Maintenance { case kvmv1.MaintenanceUnset: - // Enable the compute service (in case we haven't done so already) - if !meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeHypervisorDisabled, - Status: metav1.ConditionFalse, - Message: "Hypervisor is enabled", - Reason: kvmv1.ConditionReasonSucceeded, - }) { - // Spec matches status - return nil + cond := k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeHypervisorDisabled). + WithStatus(metav1.ConditionFalse). + WithMessage("Hypervisor is enabled"). + WithReason(kvmv1.ConditionReasonSucceeded) + + existing := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeHypervisorDisabled) + if existing != nil && existing.Status == metav1.ConditionFalse { + // Already enabled, nothing to do + return cond, nil, false, nil } - // We need to enable the host as per spec enableService := services.UpdateOpts{Status: services.ServiceEnabled} + // We need to enable the host as per spec log.Info("Enabling hypervisor", "id", serviceId) - _, err := services.Update(ctx, hec.computeClient, serviceId, enableService).Extract() - if err != nil { - return fmt.Errorf("failed to enable hypervisor due to %w", err) + if _, err := services.Update(ctx, hec.computeClient, serviceId, enableService).Extract(); err != nil { + return nil, nil, false, fmt.Errorf("failed to enable hypervisor due to %w", err) } + return cond, nil, false, nil + case kvmv1.MaintenanceManual, kvmv1.MaintenanceAuto, kvmv1.MaintenanceHA, kvmv1.MaintenanceTermination: - // Disable the compute service: + // Disable the compute service. // Also in case of HA, as it doesn't hurt to disable it twice, and this // allows us to enable the service again, when the maintenance field is // cleared in the case above. - if !meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeHypervisorDisabled, - Status: metav1.ConditionTrue, - Message: "Hypervisor is disabled", - Reason: kvmv1.ConditionReasonSucceeded, - }) { - // Spec matches status - return nil + cond := k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeHypervisorDisabled). + WithStatus(metav1.ConditionTrue). + WithMessage("Hypervisor is disabled"). + WithReason(kvmv1.ConditionReasonSucceeded) + + existing := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeHypervisorDisabled) + if existing != nil && existing.Status == metav1.ConditionTrue { + // Already disabled, nothing to do + return cond, nil, false, nil } - // We need to disable the host as per spec - enableService := services.UpdateOpts{ + disableService := services.UpdateOpts{ Status: services.ServiceDisabled, DisabledReason: "Hypervisor CRD: spec.maintenance=" + hv.Spec.Maintenance, } + // We need to disable the host as per spec log.Info("Disabling hypervisor", "id", serviceId) - _, err := services.Update(ctx, hec.computeClient, serviceId, enableService).Extract() - if err != nil { - return fmt.Errorf("failed to disable hypervisor due to %w", err) + if _, err := services.Update(ctx, hec.computeClient, serviceId, disableService).Extract(); err != nil { + return nil, nil, false, fmt.Errorf("failed to disable hypervisor due to %w", err) } + return cond, nil, false, nil } - return nil + return nil, nil, false, nil } -func (hec *HypervisorMaintenanceController) reconcileEviction(ctx context.Context, hv *kvmv1.Hypervisor) error { +// reconcileEviction creates/deletes the Eviction CR and returns the desired +// ConditionTypeEvicting apply configuration (nil means remove the condition). +func (hec *HypervisorMaintenanceController) reconcileEviction( + ctx context.Context, + hv *kvmv1.Hypervisor, + evictingCond *k8sacmetav1.ConditionApplyConfiguration, + evicted bool, +) (*k8sacmetav1.ConditionApplyConfiguration, bool, error) { eviction := &kvmv1.Eviction{ - ObjectMeta: metav1.ObjectMeta{ - Name: hv.Name, - }, + ObjectMeta: metav1.ObjectMeta{Name: hv.Name}, } switch hv.Spec.Maintenance { case kvmv1.MaintenanceUnset: // Avoid deleting the eviction over and over. - if hv.Status.Evicted || meta.RemoveStatusCondition(&hv.Status.Conditions, kvmv1.ConditionTypeEvicting) { - err := k8sclient.IgnoreNotFound(hec.Delete(ctx, eviction)) - hv.Status.Evicted = false - return err + if !hv.Status.Evicted && meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeEvicting) == nil { + return nil, false, nil } - return nil + err := k8sclient.IgnoreNotFound(hec.Delete(ctx, eviction)) + return nil, false, err // nil evictingCond → condition omitted from apply → SSA prunes it + case kvmv1.MaintenanceManual, kvmv1.MaintenanceAuto, kvmv1.MaintenanceTermination: // In case of "ha", the host gets emptied from the HA service if cond := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeEvicting); cond != nil { if cond.Reason == kvmv1.ConditionReasonSucceeded { // We are done here, no need to look at the eviction any more - return nil + return evictingCond, evicted, nil } } + status, err := hec.ensureEviction(ctx, eviction, hv) if err != nil { - return err + return evictingCond, evicted, err } - var reason, message string + var reason, message string if status == metav1.ConditionFalse { message = "Evicted" reason = kvmv1.ConditionReasonSucceeded - hv.Status.Evicted = true + evicted = true } else { message = "Evicting" reason = kvmv1.ConditionReasonRunning - hv.Status.Evicted = false + evicted = false } - meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeEvicting, - Status: status, - Reason: reason, - Message: message, - }) - - return nil + cond := k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeEvicting). + WithStatus(status). + WithReason(reason). + WithMessage(message) + return cond, evicted, nil } - return nil + return evictingCond, evicted, nil } func (hec *HypervisorMaintenanceController) ensureEviction(ctx context.Context, eviction *kvmv1.Eviction, hypervisor *kvmv1.Hypervisor) (metav1.ConditionStatus, error) { @@ -240,9 +262,8 @@ func (hec *HypervisorMaintenanceController) ensureEviction(ctx context.Context, // check if we are still evicting (defaulting to yes) if meta.IsStatusConditionFalse(eviction.Status.Conditions, kvmv1.ConditionTypeEvicting) { return metav1.ConditionFalse, nil - } else { - return metav1.ConditionTrue, nil } + return metav1.ConditionTrue, nil } // SetupWithManager sets up the controller with the Manager. @@ -258,6 +279,6 @@ func (hec *HypervisorMaintenanceController) SetupWithManager(mgr ctrl.Manager) e return ctrl.NewControllerManagedBy(mgr). Named(HypervisorMaintenanceControllerName). For(&kvmv1.Hypervisor{}). - Owns(&kvmv1.Eviction{}). // trigger Reconcile whenever an Own-ed eviction is created/updated/deleted + Owns(&kvmv1.Eviction{}). Complete(hec) } From 5b4f924adb749d1327aa98bd5e37d02e01dab6ed Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Tue, 5 May 2026 17:40:34 +0200 Subject: [PATCH 06/14] Migrate OnboardingController to SSA status updates Replace PatchHypervisorStatusWithRetry with Status().Apply(). The Nova ID lookup is inlined into Reconcile and combined with the initial condition in a single apply, avoiding two applies from the same field manager in one reconcile cycle which would cause SSA to prune the IDs. HypervisorID and ServiceID are included in every subsequent apply to retain ownership. --- internal/controller/onboarding_controller.go | 264 ++++++++----------- 1 file changed, 112 insertions(+), 152 deletions(-) diff --git a/internal/controller/onboarding_controller.go b/internal/controller/onboarding_controller.go index 7e9fed7..3c0abd6 100644 --- a/internal/controller/onboarding_controller.go +++ b/internal/controller/onboarding_controller.go @@ -26,11 +26,11 @@ import ( "time" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + k8sacmetav1 "k8s.io/client-go/applyconfigurations/meta/v1" "k8s.io/utils/clock" ctrl "sigs.k8s.io/controller-runtime" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" @@ -45,6 +45,7 @@ import ( "github.com/gophercloud/utils/v2/openstack/clientconfig" kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + apiv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/applyconfigurations/api/v1" "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/openstack" "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/utils" ) @@ -99,28 +100,30 @@ func (r *OnboardingController) Reconcile(ctx context.Context, req ctrl.Request) // We bail here out, because the openstack api is not the best to poll if hv.Status.HypervisorID == "" || hv.Status.ServiceID == "" { - if err := r.ensureNovaProperties(ctx, hv); err != nil { + hypervisorID, serviceID, err := r.lookupNovaProperties(ctx, hv) + if err != nil { if errors.Is(err, errRequeue) { return ctrl.Result{RequeueAfter: defaultWaitTime}, nil } return ctrl.Result{}, err } + statusCfg := apiv1.HypervisorStatus(). + WithHypervisorID(hypervisorID). + WithServiceID(serviceID) + statusCfg.Conditions = utils.ConditionsFromStatus(hv.Status.Conditions) + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). + WithReason(kvmv1.ConditionReasonInitial). + WithMessage("Initial onboarding")) + return ctrl.Result{}, r.Status().Apply(ctx, + apiv1.Hypervisor(hv.Name, "").WithStatus(statusCfg), + k8sclient.ForceOwnership, k8sclient.FieldOwner(OnboardingControllerName)) } // check condition reason status := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) - if status == nil { - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonInitial, - Message: "Initial onboarding", - } - return ctrl.Result{}, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }) - } - switch status.Reason { case kvmv1.ConditionReasonInitial: return ctrl.Result{}, r.initialOnboarding(ctx, hv) @@ -138,6 +141,19 @@ func (r *OnboardingController) Reconcile(ctx context.Context, req ctrl.Request) } } +// applyOnboardingCondition applies a single onboarding condition via SSA, +// carrying all existing conditions and scalar fields forward. +func (r *OnboardingController) applyOnboardingCondition(ctx context.Context, hv *kvmv1.Hypervisor, cond k8sacmetav1.ConditionApplyConfiguration) error { + statusCfg := apiv1.HypervisorStatus(). + WithHypervisorID(hv.Status.HypervisorID). + WithServiceID(hv.Status.ServiceID) + statusCfg.Conditions = utils.ConditionsFromStatus(hv.Status.Conditions) + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, cond) + return r.Status().Apply(ctx, + apiv1.Hypervisor(hv.Name, "").WithStatus(statusCfg), + k8sclient.ForceOwnership, k8sclient.FieldOwner(OnboardingControllerName)) +} + func (r *OnboardingController) abortOnboarding(ctx context.Context, hv *kvmv1.Hypervisor, computeHost, message string) error { status := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) // Never onboarded @@ -145,42 +161,28 @@ func (r *OnboardingController) abortOnboarding(ctx context.Context, hv *kvmv1.Hy return nil } - base := hv.DeepCopy() - - meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionFalse, - Reason: kvmv1.ConditionReasonAborted, - Message: message, - }) - - if equality.Semantic.DeepEqual(hv, base) { - // Already aborted + // Already aborted with the same message — nothing to do + if status.Status == metav1.ConditionFalse && + status.Reason == kvmv1.ConditionReasonAborted && + status.Message == message { return nil } - condition := *meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) if err := r.deleteTestServers(ctx, computeHost); err != nil { - condition = metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, // No cleanup, so we are still "onboarding" - Reason: kvmv1.ConditionReasonAborted, - Message: err.Error(), - } - - meta.SetStatusCondition(&hv.Status.Conditions, condition) - if equality.Semantic.DeepEqual(hv, base) { - return err - } - - return errors.Join(err, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - })) - } - - return utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }) + return errors.Join(err, r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). // No cleanup, so we are still "onboarding" + WithReason(kvmv1.ConditionReasonAborted). + WithMessage(err.Error()))) + } + + return r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionFalse). + WithReason(kvmv1.ConditionReasonAborted). + WithMessage(message)) } func (r *OnboardingController) initialOnboarding(ctx context.Context, hv *kvmv1.Hypervisor) error { @@ -190,7 +192,6 @@ func (r *OnboardingController) initialOnboarding(ctx context.Context, hv *kvmv1. } // Wait for aggregates controller to apply the desired state (zone and test aggregate) - // Extract aggregate names for comparison currentAggregateNames := make([]string, len(hv.Status.Aggregates)) for i, agg := range hv.Status.Aggregates { currentAggregateNames[i] = agg.Name @@ -212,15 +213,12 @@ func (r *OnboardingController) initialOnboarding(ctx context.Context, hv *kvmv1. return result.Err } - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonTesting, - Message: "Running onboarding tests", - } - return utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }) + return r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). + WithReason(kvmv1.ConditionReasonTesting). + WithMessage("Running onboarding tests")) } func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervisor, host string) (ctrl.Result, error) { @@ -237,21 +235,17 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis id := server.ID server, err = servers.Get(ctx, r.testComputeClient, id).Extract() if err != nil { - // should not happened log.Error(err, "failed to get test instance, instance vanished", "id", id) return ctrl.Result{RequeueAfter: defaultWaitTime}, nil } // Set condition back to testing - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonTesting, - Message: "Server ended up in error state: " + server.Fault.Message, - } - if err = utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }); err != nil { + if err = r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). + WithReason(kvmv1.ConditionReasonTesting). + WithMessage("Server ended up in error state: "+server.Fault.Message)); err != nil { return ctrl.Result{}, err } @@ -261,37 +255,32 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis } return ctrl.Result{RequeueAfter: defaultWaitTime}, nil + case "ACTIVE": consoleOutput, err := servers. ShowConsoleOutput(ctx, r.testComputeClient, server.ID, servers.ShowConsoleOutputOpts{Length: 11}). Extract() if err != nil { - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonTesting, - Message: fmt.Sprintf("could not get console output %v", err), - } - if err := utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }); err != nil { - return ctrl.Result{}, err + if err2 := r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). + WithReason(kvmv1.ConditionReasonTesting). + WithMessage(fmt.Sprintf("could not get console output %v", err))); err2 != nil { + return ctrl.Result{}, err2 } return ctrl.Result{RequeueAfter: defaultWaitTime}, nil } if !strings.Contains(consoleOutput, server.Name) { if !server.LaunchedAt.IsZero() && r.Clock.Now().After(server.LaunchedAt.Add(smokeTestTimeout)) { - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonTesting, - Message: fmt.Sprintf("timeout waiting for console output since %v", server.LaunchedAt), - } - if err := utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }); err != nil { - return ctrl.Result{}, err + if err2 := r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). + WithReason(kvmv1.ConditionReasonTesting). + WithMessage(fmt.Sprintf("timeout waiting for console output since %v", server.LaunchedAt))); err2 != nil { + return ctrl.Result{}, err2 } if err = servers.Delete(ctx, r.testComputeClient, server.ID).ExtractErr(); err != nil { if !gophercloud.ResponseCodeIs(err, http.StatusNotFound) { @@ -303,28 +292,25 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis } if err = servers.Delete(ctx, r.testComputeClient, server.ID).ExtractErr(); err != nil { - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonTesting, - Message: fmt.Sprintf("failed to terminate instance %v", err), - } - if err := utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }); err != nil { - return ctrl.Result{}, err + if err2 := r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). + WithReason(kvmv1.ConditionReasonTesting). + WithMessage(fmt.Sprintf("failed to terminate instance %v", err))); err2 != nil { + return ctrl.Result{}, err2 } return ctrl.Result{RequeueAfter: defaultWaitTime}, nil } return r.completeOnboarding(ctx, host, hv) + default: return ctrl.Result{RequeueAfter: defaultWaitTime}, nil } } func (r *OnboardingController) completeOnboarding(ctx context.Context, host string, hv *kvmv1.Hypervisor) (ctrl.Result, error) { - // Check if we're in the RemovingTestAggregate phase onboardingCondition := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) if onboardingCondition != nil && onboardingCondition.Reason == kvmv1.ConditionReasonHandover { // We're waiting for aggregates and traits controllers to sync @@ -340,44 +326,32 @@ func (r *OnboardingController) completeOnboarding(ctx context.Context, host stri return ctrl.Result{}, nil } - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionFalse, - Reason: kvmv1.ConditionReasonSucceeded, - Message: "Onboarding completed", - } - - return ctrl.Result{}, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }) + return ctrl.Result{}, r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionFalse). + WithReason(kvmv1.ConditionReasonSucceeded). + WithMessage("Onboarding completed")) } // First time in completeOnboarding - clean up and prepare for aggregate sync - err := r.deleteTestServers(ctx, host) - if err != nil { - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, // No cleanup, so we are still "onboarding" - Reason: kvmv1.ConditionReasonAborted, - Message: err.Error(), - } - return ctrl.Result{}, errors.Join(err, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - })) + if err := r.deleteTestServers(ctx, host); err != nil { + return ctrl.Result{}, errors.Join(err, r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). // No cleanup, so we are still "onboarding" + WithReason(kvmv1.ConditionReasonAborted). + WithMessage(err.Error()))) } // Mark onboarding as almost complete, triggers other controllers to do their part - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonHandover, - Message: "Waiting for other controllers to take over", - } - // Patch status to signal aggregates controller - return ctrl.Result{}, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }) + return ctrl.Result{}, r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). + WithReason(kvmv1.ConditionReasonHandover). + WithMessage("Waiting for other controllers to take over")) } func (r *OnboardingController) deleteTestServers(ctx context.Context, host string) error { @@ -409,10 +383,10 @@ func (r *OnboardingController) deleteTestServers(ctx context.Context, host strin return errors.Join(errs...) } -func (r *OnboardingController) ensureNovaProperties(ctx context.Context, hv *kvmv1.Hypervisor) error { +func (r *OnboardingController) lookupNovaProperties(ctx context.Context, hv *kvmv1.Hypervisor) (hypervisorID, serviceID string, err error) { hypervisorAddress := hv.Labels[corev1.LabelHostname] if hypervisorAddress == "" { - return errRequeue + return "", "", errRequeue } shortHypervisorAddress := strings.SplitN(hypervisorAddress, ".", 2)[0] @@ -421,41 +395,27 @@ func (r *OnboardingController) ensureNovaProperties(ctx context.Context, hv *kvm hypervisorPages, err := hypervisors.List(r.computeClient, hypervisorQuery).AllPages(ctx) if err != nil { if gophercloud.ResponseCodeIs(err, http.StatusNotFound) { - return errRequeue + return "", "", errRequeue } - return err + return "", "", err } hs, err := hypervisors.ExtractHypervisors(hypervisorPages) if err != nil { - return err + return "", "", err } if len(hs) < 1 { - return errRequeue + return "", "", errRequeue } - var found = false - var myHypervisor hypervisors.Hypervisor for _, h := range hs { - short := strings.SplitN(h.HypervisorHostname, ".", 2)[0] - if short == shortHypervisorAddress { - myHypervisor = h - found = true - break + if strings.SplitN(h.HypervisorHostname, ".", 2)[0] == shortHypervisorAddress { + return h.ID, h.Service.ID, nil } } - if !found { - return fmt.Errorf("could not find exact match for %v", shortHypervisorAddress) - } - - hypervisorID := myHypervisor.ID - serviceID := myHypervisor.Service.ID - return utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - h.Status.HypervisorID = hypervisorID - h.Status.ServiceID = serviceID - }) + return "", "", fmt.Errorf("could not find exact match for %v", shortHypervisorAddress) } func (r *OnboardingController) createOrGetTestServer(ctx context.Context, zone, computeHost string, nodeUid types.UID) (*servers.Server, error) { @@ -476,9 +436,9 @@ func (r *OnboardingController) createOrGetTestServer(ctx context.Context, zone, } log := logger.FromContext(ctx) + var foundServer *servers.Server // Cleanup all other server with the same test prefix, except for the exact match // as the cleanup after onboarding may leak resources - var foundServer *servers.Server for _, server := range serverList { // The query is a substring search, we are looking for a prefix if !strings.HasPrefix(server.Name, serverPrefix) { From de6dcd3b5f1fcb99e3b7caa13b86ed5b13ec0344 Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Tue, 5 May 2026 17:40:41 +0200 Subject: [PATCH 07/14] Migrate HypervisorOffboardingReconciler to SSA status updates Replace PatchHypervisorStatusWithRetry with Status().Apply() for the ConditionTypeOffboarded condition. --- internal/controller/offboarding_controller.go | 37 +++++++++++-------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/internal/controller/offboarding_controller.go b/internal/controller/offboarding_controller.go index d838471..1aec00c 100644 --- a/internal/controller/offboarding_controller.go +++ b/internal/controller/offboarding_controller.go @@ -29,11 +29,13 @@ import ( "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + k8sacmetav1 "k8s.io/client-go/applyconfigurations/meta/v1" ctrl "sigs.k8s.io/controller-runtime" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" logger "sigs.k8s.io/controller-runtime/pkg/log" kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + apiv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/applyconfigurations/api/v1" "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/openstack" "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/utils" ) @@ -146,15 +148,21 @@ func (r *HypervisorOffboardingReconciler) Reconcile(ctx context.Context, req ctr return ctrl.Result{}, r.markOffboarded(ctx, hv) } +func (r *HypervisorOffboardingReconciler) applyStatus(ctx context.Context, hv *kvmv1.Hypervisor, cond *k8sacmetav1.ConditionApplyConfiguration) error { + statusCfg := apiv1.HypervisorStatus() + statusCfg.Conditions = utils.ConditionsFromStatus(hv.Status.Conditions) + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, *cond) + return r.Status().Apply(ctx, + apiv1.Hypervisor(hv.Name, "").WithStatus(statusCfg), + k8sclient.ForceOwnership, k8sclient.FieldOwner(OffboardingControllerName)) +} + func (r *HypervisorOffboardingReconciler) setOffboardingCondition(ctx context.Context, hv *kvmv1.Hypervisor, message string) (ctrl.Result, error) { - err := utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OffboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeOffboarded, - Status: metav1.ConditionFalse, - Reason: "Offboarding", - Message: message, - }) - }) + err := r.applyStatus(ctx, hv, k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOffboarded). + WithStatus(metav1.ConditionFalse). + WithReason("Offboarding"). + WithMessage(message)) if err != nil { return ctrl.Result{}, fmt.Errorf("cannot update hypervisor status due to %w", err) } @@ -162,14 +170,11 @@ func (r *HypervisorOffboardingReconciler) setOffboardingCondition(ctx context.Co } func (r *HypervisorOffboardingReconciler) markOffboarded(ctx context.Context, hv *kvmv1.Hypervisor) error { - err := utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OffboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeOffboarded, - Status: metav1.ConditionTrue, - Reason: "Offboarded", - Message: "Offboarding successful", - }) - }) + err := r.applyStatus(ctx, hv, k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOffboarded). + WithStatus(metav1.ConditionTrue). + WithReason("Offboarded"). + WithMessage("Offboarding successful")) if err != nil { return fmt.Errorf("cannot update hypervisor status due to %w", err) } From cf737d7b5dfab93f44340d22f529c0c026d52c7c Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Tue, 5 May 2026 17:40:50 +0200 Subject: [PATCH 08/14] Migrate AggregatesController to SSA status updates Replace PatchHypervisorStatusWithRetry with Status().Apply() for ConditionTypeAggregatesUpdated and the Aggregates slice. The error path includes the current Aggregates in the apply to prevent SSA from pruning previously-set values on a transient OpenStack error. Clearing aggregates to an empty slice uses a targeted merge patch to work around the omitempty limitation in the generated apply configuration type. --- internal/controller/aggregates_controller.go | 86 ++++++++++++++++---- 1 file changed, 70 insertions(+), 16 deletions(-) diff --git a/internal/controller/aggregates_controller.go b/internal/controller/aggregates_controller.go index 7490a4a..5670851 100644 --- a/internal/controller/aggregates_controller.go +++ b/internal/controller/aggregates_controller.go @@ -19,6 +19,7 @@ package controller import ( "context" + "encoding/json" "errors" "fmt" @@ -26,6 +27,8 @@ import ( "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + k8sacmetav1 "k8s.io/client-go/applyconfigurations/meta/v1" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" @@ -33,6 +36,7 @@ import ( "github.com/gophercloud/gophercloud/v2" kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + apiv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/applyconfigurations/api/v1" "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/openstack" "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/utils" ) @@ -76,17 +80,30 @@ func (ac *AggregatesController) Reconcile(ctx context.Context, req ctrl.Request) // Apply aggregates to OpenStack and update status aggregates, err := openstack.ApplyAggregates(ctx, ac.computeClient, hv.Name, desiredAggregateNames) if err != nil { - // Set error condition with retry on conflict - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeAggregatesUpdated, - Status: metav1.ConditionFalse, - Reason: kvmv1.ConditionReasonFailed, - Message: fmt.Errorf("failed to apply aggregates: %w", err).Error(), + // Set error condition, preserving current aggregate ownership + statusCfg := apiv1.HypervisorStatus() + statusCfg.Conditions = utils.ConditionsFromStatus(hv.Status.Conditions) + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeAggregatesUpdated). + WithStatus(metav1.ConditionFalse). + WithReason(kvmv1.ConditionReasonFailed). + WithMessage(fmt.Errorf("failed to apply aggregates: %w", err).Error())) + if len(hv.Status.Aggregates) > 0 { + aggCfgs := make([]apiv1.AggregateApplyConfiguration, len(hv.Status.Aggregates)) + for i, agg := range hv.Status.Aggregates { + a := apiv1.Aggregate().WithName(agg.Name).WithUUID(agg.UUID) + if len(agg.Metadata) > 0 { + a.WithMetadata(agg.Metadata) + } + aggCfgs[i] = *a + } + statusCfg.Aggregates = aggCfgs } - if err2 := utils.PatchHypervisorStatusWithRetry(ctx, ac.Client, hv.Name, AggregatesControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }); err2 != nil { + if err2 := ac.Status().Apply(ctx, + apiv1.Hypervisor(hv.Name, "").WithStatus(statusCfg), + k8sclient.ForceOwnership, k8sclient.FieldOwner(AggregatesControllerName)); err2 != nil { return ctrl.Result{}, errors.Join(err, err2) } return ctrl.Result{}, err @@ -106,15 +123,52 @@ func (ac *AggregatesController) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, nil } - // Patch status with retry on conflict - err := utils.PatchHypervisorStatusWithRetry(ctx, ac.Client, hv.Name, AggregatesControllerName, func(h *kvmv1.Hypervisor) { - if aggregatesChanged { - h.Status.Aggregates = newAggregates + statusCfg := apiv1.HypervisorStatus() + statusCfg.Conditions = utils.ConditionsFromStatus(hv.Status.Conditions) + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, + *k8sacmetav1.Condition(). + WithType(desiredCondition.Type). + WithStatus(desiredCondition.Status). + WithReason(desiredCondition.Reason). + WithMessage(desiredCondition.Message)) + + if aggregatesChanged && len(newAggregates) > 0 { + aggCfgs := make([]apiv1.AggregateApplyConfiguration, len(newAggregates)) + for i, agg := range newAggregates { + a := apiv1.Aggregate().WithName(agg.Name).WithUUID(agg.UUID) + if len(agg.Metadata) > 0 { + a.WithMetadata(agg.Metadata) + } + aggCfgs[i] = *a + } + statusCfg.Aggregates = aggCfgs + } + + if err := ac.Status().Apply(ctx, + apiv1.Hypervisor(hv.Name, "").WithStatus(statusCfg), + k8sclient.ForceOwnership, k8sclient.FieldOwner(AggregatesControllerName)); err != nil { + return ctrl.Result{}, err + } + + // The Aggregates field uses omitempty in the generated apply config, so an + // empty slice cannot be sent via SSA. Use a targeted merge patch to clear it. + if aggregatesChanged && len(newAggregates) == 0 { + patch, err := json.Marshal(map[string]any{ + "status": map[string]any{ + "aggregates": []kvmv1.Aggregate{}, + }, + }) + if err != nil { + return ctrl.Result{}, err } - meta.SetStatusCondition(&h.Status.Conditions, desiredCondition) - }) + fresh := &kvmv1.Hypervisor{} + if err := ac.Get(ctx, k8sclient.ObjectKey{Name: hv.Name}, fresh); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{}, ac.Status().Patch(ctx, fresh, k8sclient.RawPatch(types.MergePatchType, patch)) + } - return ctrl.Result{}, err + return ctrl.Result{}, nil } // determineDesiredState returns the desired aggregates and the corresponding condition From d06196984669e5b80a2ed64c0d15a92ba20abcd1 Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Tue, 5 May 2026 17:40:59 +0200 Subject: [PATCH 09/14] Migrate TraitsController to SSA status updates Replace PatchHypervisorStatusWithRetry with Status().Apply() for ConditionTypeTraitsUpdated and the Traits slice. Error paths pass the current hv.Status.Traits to retain ownership and avoid SSA releasing it on transient placement API failures. --- internal/controller/traits_controller.go | 42 ++++++++++++++---------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/internal/controller/traits_controller.go b/internal/controller/traits_controller.go index 6da55b8..8925bb8 100644 --- a/internal/controller/traits_controller.go +++ b/internal/controller/traits_controller.go @@ -27,6 +27,7 @@ import ( "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + k8sacmetav1 "k8s.io/client-go/applyconfigurations/meta/v1" ctrl "sigs.k8s.io/controller-runtime" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" @@ -34,6 +35,7 @@ import ( "github.com/gophercloud/gophercloud/v2/openstack/placement/v1/resourceproviders" kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + apiv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/applyconfigurations/api/v1" "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/openstack" "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/utils" ) @@ -103,11 +105,8 @@ func (tc *TraitsController) Reconcile(ctx context.Context, req ctrl.Request) (ct // fetch current traits, to ensure we don't add duplicates current, err := resourceproviders.GetTraits(ctx, tc.serviceClient, hv.Status.HypervisorID).Extract() if err != nil { - condition := getTraitCondition(err, "Failed to get current traits from placement") - patchErr := utils.PatchHypervisorStatusWithRetry(ctx, tc.Client, hv.Name, TraitsControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }) - return ctrl.Result{}, errors.Join(err, patchErr) + return ctrl.Result{}, errors.Join(err, + tc.applyTraitsStatus(ctx, hv, hv.Status.Traits, getTraitCondition(err, "Failed to get current traits from placement"))) } var targetTraits []string @@ -131,22 +130,31 @@ func (tc *TraitsController) Reconcile(ctx context.Context, req ctrl.Request) (ct ResourceProviderGeneration: current.ResourceProviderGeneration, Traits: targetTraits, }) - err = result.Err - if err != nil { - condition := getTraitCondition(err, "Failed to update traits in placement") - patchErr := utils.PatchHypervisorStatusWithRetry(ctx, tc.Client, hv.Name, TraitsControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }) - return ctrl.Result{}, errors.Join(err, patchErr) + if result.Err != nil { + return ctrl.Result{}, errors.Join(result.Err, + tc.applyTraitsStatus(ctx, hv, hv.Status.Traits, getTraitCondition(result.Err, "Failed to update traits in placement"))) } } // update status unconditionally, since we want always to propagate the current traits - err = utils.PatchHypervisorStatusWithRetry(ctx, tc.Client, hv.Name, TraitsControllerName, func(h *kvmv1.Hypervisor) { - h.Status.Traits = targetTraits - meta.SetStatusCondition(&h.Status.Conditions, getTraitCondition(nil, "Traits successfully updated")) - }) - return ctrl.Result{}, err + return ctrl.Result{}, tc.applyTraitsStatus(ctx, hv, targetTraits, getTraitCondition(nil, "Traits successfully updated")) +} + +func (tc *TraitsController) applyTraitsStatus(ctx context.Context, hv *kvmv1.Hypervisor, traits []string, cond metav1.Condition) error { + statusCfg := apiv1.HypervisorStatus() + statusCfg.Conditions = utils.ConditionsFromStatus(hv.Status.Conditions) + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, + *k8sacmetav1.Condition(). + WithType(cond.Type). + WithStatus(cond.Status). + WithReason(cond.Reason). + WithMessage(cond.Message)) + if traits != nil { + statusCfg.WithTraits(traits...) + } + return tc.Status().Apply(ctx, + apiv1.Hypervisor(hv.Name, "").WithStatus(statusCfg), + k8sclient.ForceOwnership, k8sclient.FieldOwner(TraitsControllerName)) } // getTraitCondition creates a Condition object for trait updates From b9256bdd3e6263ad2aaf1618a107e4ac3ee3234f Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Tue, 5 May 2026 17:41:07 +0200 Subject: [PATCH 10/14] Migrate ready.Controller to SSA status updates; remove PatchHypervisorStatusWithRetry Replace the last caller of PatchHypervisorStatusWithRetry with Status().Apply() for ConditionTypeReady. With no remaining callers, PatchHypervisorStatusWithRetry and StatusPatchBackoff are removed. --- internal/controller/ready/controller.go | 23 ++++++----- internal/utils/status_patch.go | 55 ------------------------- 2 files changed, 13 insertions(+), 65 deletions(-) delete mode 100644 internal/utils/status_patch.go diff --git a/internal/controller/ready/controller.go b/internal/controller/ready/controller.go index 813d583..29293ba 100644 --- a/internal/controller/ready/controller.go +++ b/internal/controller/ready/controller.go @@ -24,6 +24,7 @@ import ( "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + k8sacmetav1 "k8s.io/client-go/applyconfigurations/meta/v1" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" @@ -32,6 +33,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/predicate" kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + apiv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/applyconfigurations/api/v1" "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/utils" ) @@ -104,20 +106,21 @@ func (r *Controller) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu return ctrl.Result{}, k8sclient.IgnoreNotFound(err) } - base := hv.DeepCopy() - // Compute Ready condition based on other conditions readyCondition := ComputeReadyCondition(hv) - meta.SetStatusCondition(&hv.Status.Conditions, readyCondition) - - if equality.Semantic.DeepEqual(hv.Status, base.Status) { - return ctrl.Result{}, nil - } log.Info("Updating Ready condition", "status", readyCondition.Status, "reason", readyCondition.Reason) - return ctrl.Result{}, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, req.Name, ControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, readyCondition) - }) + statusCfg := apiv1.HypervisorStatus() + statusCfg.Conditions = utils.ConditionsFromStatus(hv.Status.Conditions) + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, + *k8sacmetav1.Condition(). + WithType(readyCondition.Type). + WithStatus(readyCondition.Status). + WithReason(readyCondition.Reason). + WithMessage(readyCondition.Message)) + return ctrl.Result{}, r.Status().Apply(ctx, + apiv1.Hypervisor(hv.Name, "").WithStatus(statusCfg), + k8sclient.ForceOwnership, k8sclient.FieldOwner(ControllerName)) } // ComputeReadyCondition determines the Ready condition based on other conditions diff --git a/internal/utils/status_patch.go b/internal/utils/status_patch.go deleted file mode 100644 index e353f87..0000000 --- a/internal/utils/status_patch.go +++ /dev/null @@ -1,55 +0,0 @@ -/* -SPDX-FileCopyrightText: Copyright 2025 SAP SE or an SAP affiliate company and cobaltcore-dev contributors -SPDX-License-Identifier: Apache-2.0 - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package utils - -import ( - "context" - "time" - - "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/client-go/util/retry" - k8sclient "sigs.k8s.io/controller-runtime/pkg/client" - - kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" -) - -// StatusPatchBackoff is a retry backoff for status patches that may conflict -// with other controllers. Uses exponential backoff with more retries than the -// default to handle high contention scenarios. -var StatusPatchBackoff = wait.Backoff{ - Steps: 10, - Duration: 50 * time.Millisecond, - Factor: 2.0, - Jitter: 0.2, -} - -// PatchHypervisorStatusWithRetry patches hypervisor status with retry on conflict. -// The updateFn receives a fresh copy of the hypervisor and should apply status changes to it. -// It re-fetches the resource before each retry attempt to get the latest resourceVersion. -func PatchHypervisorStatusWithRetry(ctx context.Context, c k8sclient.Client, name, fieldOwner string, updateFn func(*kvmv1.Hypervisor)) error { - return retry.RetryOnConflict(StatusPatchBackoff, func() error { - hv := &kvmv1.Hypervisor{} - if err := c.Get(ctx, k8sclient.ObjectKey{Name: name}, hv); err != nil { - return err - } - base := hv.DeepCopy() - updateFn(hv) - return c.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(base, - k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(fieldOwner)) - }) -} From b06e1f08b987971f7f11e6eb9f6813bf5bce8f7c Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Wed, 6 May 2026 11:10:55 +0200 Subject: [PATCH 11/14] Migrate HypervisorMaintenanceController Eviction creation to SSA Replace Create + SetControllerReference with Apply for the Eviction CR. The owner reference and labels are set in the apply configuration metadata, and SSA handles the upsert. A Get is still performed after the apply to read the current eviction status conditions. --- .../hypervisor_maintenance_controller.go | 48 ++++++++++++------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/internal/controller/hypervisor_maintenance_controller.go b/internal/controller/hypervisor_maintenance_controller.go index 1414de8..0fea962 100644 --- a/internal/controller/hypervisor_maintenance_controller.go +++ b/internal/controller/hypervisor_maintenance_controller.go @@ -24,14 +24,12 @@ import ( "context" "fmt" - k8serrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" k8sacmetav1 "k8s.io/client-go/applyconfigurations/meta/v1" ctrl "sigs.k8s.io/controller-runtime" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" logger "sigs.k8s.io/controller-runtime/pkg/log" "github.com/gophercloud/gophercloud/v2" @@ -238,25 +236,39 @@ func (hec *HypervisorMaintenanceController) reconcileEviction( func (hec *HypervisorMaintenanceController) ensureEviction(ctx context.Context, eviction *kvmv1.Eviction, hypervisor *kvmv1.Hypervisor) (metav1.ConditionStatus, error) { log := logger.FromContext(ctx) - if err := hec.Get(ctx, k8sclient.ObjectKeyFromObject(eviction), eviction); err != nil { - if !k8serrors.IsNotFound(err) { - return metav1.ConditionUnknown, fmt.Errorf("failed to get eviction due to %w", err) - } - if err := controllerutil.SetControllerReference(hypervisor, eviction, hec.Scheme); err != nil { - return metav1.ConditionUnknown, err - } - log.Info("Creating new eviction", "name", eviction.Name) - eviction.Spec = kvmv1.EvictionSpec{ - Hypervisor: hypervisor.Name, - Reason: "openstack-hypervisor-operator maintenance", + + // Build labels to transport from hypervisor (e.g. label-selector, if set) + evictionLabels := make(map[string]string) + for _, label := range transferLabels { + if v, ok := hypervisor.Labels[label]; ok { + evictionLabels[label] = v } + } - // This also transports the label-selector, if set - transportLabels(&hypervisor.ObjectMeta, &eviction.ObjectMeta) + ownerRef := k8sacmetav1.OwnerReference(). + WithAPIVersion(kvmv1.GroupVersion.String()). + WithKind("Hypervisor"). + WithName(hypervisor.Name). + WithUID(hypervisor.UID). + WithController(true). + WithBlockOwnerDeletion(true) + + evictionApplyCfg := apiv1.Eviction(eviction.Name, eviction.Namespace). + WithLabels(evictionLabels). + WithOwnerReferences(ownerRef). + WithSpec(apiv1.EvictionSpec(). + WithHypervisor(hypervisor.Name). + WithReason("openstack-hypervisor-operator maintenance")) + + log.Info("Applying eviction", "name", eviction.Name) + if err := hec.Apply(ctx, evictionApplyCfg, + k8sclient.ForceOwnership, k8sclient.FieldOwner(HypervisorMaintenanceControllerName)); err != nil { + return metav1.ConditionUnknown, fmt.Errorf("failed to apply eviction due to %w", err) + } - if err = hec.Create(ctx, eviction); err != nil { - return metav1.ConditionUnknown, fmt.Errorf("failed to create eviction due to %w", err) - } + // Re-fetch to read current eviction status + if err := hec.Get(ctx, k8sclient.ObjectKeyFromObject(eviction), eviction); err != nil { + return metav1.ConditionUnknown, fmt.Errorf("failed to get eviction status due to %w", err) } // check if we are still evicting (defaulting to yes) From dfdffec6f269be0bbbabe6384b43f8d6c3e244ef Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Wed, 6 May 2026 11:10:55 +0200 Subject: [PATCH 12/14] Migrate NodeCertificateController Certificate creation to SSA Replace CreateOrUpdate + SetOwnerReference with Apply for the cert-manager Certificate CR. The owner reference is set in the apply configuration metadata. The retry-on-conflict loop is no longer needed since SSA handles concurrent updates correctly. The controller now applies the full certificate spec on every reconcile so IP/DNS changes are picked up immediately. --- .../controller/node_certificate_controller.go | 142 ++++++++---------- 1 file changed, 60 insertions(+), 82 deletions(-) diff --git a/internal/controller/node_certificate_controller.go b/internal/controller/node_certificate_controller.go index b8da375..8287c48 100644 --- a/internal/controller/node_certificate_controller.go +++ b/internal/controller/node_certificate_controller.go @@ -24,14 +24,14 @@ import ( "time" cmapi "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" - cmmeta "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" + cmapplyv1 "github.com/cert-manager/cert-manager/pkg/client/applyconfigurations/certmanager/v1" + cmapplymetav1 "github.com/cert-manager/cert-manager/pkg/client/applyconfigurations/meta/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/client-go/util/retry" + k8sacmetav1 "k8s.io/client-go/applyconfigurations/meta/v1" ctrl "sigs.k8s.io/controller-runtime" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" logger "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/predicate" ) @@ -59,95 +59,77 @@ func (r *NodeCertificateController) ensureCertificate(ctx context.Context, node secretName, certName := getSecretAndCertName(node.Name) - certificate := &cmapi.Certificate{ - TypeMeta: metav1.TypeMeta{ - Kind: cmapi.CertificateKind, - APIVersion: cmapi.SchemeGroupVersion.String(), - }, - ObjectMeta: metav1.ObjectMeta{ - Name: certName, - Namespace: r.namespace, - }, - } - - update, err := controllerutil.CreateOrUpdate(ctx, r.Client, certificate, func() error { - if err := controllerutil.SetOwnerReference(node, certificate, r.Scheme); err != nil { - return err - } - - ipAddressSet := make(map[string]bool) - dnsNameSet := make(map[string]bool) - - dnsNameSet[computeHost] = true + ipAddressSet := make(map[string]bool) + dnsNameSet := make(map[string]bool) - for _, addr := range node.Status.Addresses { - if addr.Address == "" { - continue - } + dnsNameSet[computeHost] = true - switch addr.Type { - case corev1.NodeHostName, corev1.NodeInternalDNS, corev1.NodeExternalDNS: - dnsNameSet[addr.Address] = true - case corev1.NodeInternalIP, corev1.NodeExternalIP: - ipAddressSet[addr.Address] = true - } + for _, addr := range node.Status.Addresses { + if addr.Address == "" { + continue } - ipAddresses := make([]string, 0, len(ipAddressSet)) - for k := range ipAddressSet { - ipAddresses = append(ipAddresses, k) - } - - slices.Sort(ipAddresses) - - dnsNames := make([]string, 0, len(dnsNameSet)) - for k := range dnsNameSet { - dnsNames = append(dnsNames, k) + switch addr.Type { + case corev1.NodeHostName, corev1.NodeInternalDNS, corev1.NodeExternalDNS: + dnsNameSet[addr.Address] = true + case corev1.NodeInternalIP, corev1.NodeExternalIP: + ipAddressSet[addr.Address] = true } + } - slices.Sort(dnsNames) + ipAddresses := make([]string, 0, len(ipAddressSet)) + for k := range ipAddressSet { + ipAddresses = append(ipAddresses, k) + } + slices.Sort(ipAddresses) - certificate.Spec = cmapi.CertificateSpec{ - SecretName: secretName, - PrivateKey: &cmapi.CertificatePrivateKey{ - Algorithm: cmapi.RSAKeyAlgorithm, - Encoding: cmapi.PKCS1, - Size: 4096, - }, + dnsNames := make([]string, 0, len(dnsNameSet)) + for k := range dnsNameSet { + dnsNames = append(dnsNames, k) + } + slices.Sort(dnsNames) + + ownerRef := k8sacmetav1.OwnerReference(). + WithAPIVersion(corev1.SchemeGroupVersion.String()). + WithKind("Node"). + WithName(node.Name). + WithUID(node.UID) + + certApplyCfg := cmapplyv1.Certificate(certName, r.namespace). + WithOwnerReferences(ownerRef). + WithSpec(cmapplyv1.CertificateSpec(). + WithSecretName(secretName). + WithPrivateKey(cmapplyv1.CertificatePrivateKey(). + WithAlgorithm(cmapi.RSAKeyAlgorithm). + WithEncoding(cmapi.PKCS1). + WithSize(4096)). // Matching the CA/Browser Forum's maximum duration for 2029 - Duration: &metav1.Duration{Duration: 47 * 24 * time.Hour}, - RenewBefore: &metav1.Duration{Duration: 37 * 24 * time.Hour}, - IsCA: false, - Usages: []cmapi.KeyUsage{ + WithDuration(metav1.Duration{Duration: 47 * 24 * time.Hour}). + WithRenewBefore(metav1.Duration{Duration: 37 * 24 * time.Hour}). + WithIsCA(false). + WithUsages( cmapi.UsageServerAuth, cmapi.UsageClientAuth, cmapi.UsageCertSign, // Really? cmapi.UsageDigitalSignature, cmapi.UsageKeyEncipherment, - }, - Subject: &cmapi.X509Subject{ - Organizations: []string{"nova"}, - }, - CommonName: computeHost, - DNSNames: dnsNames, - IPAddresses: ipAddresses, - IssuerRef: cmmeta.IssuerReference{ - Name: r.issuerName, - Kind: cmapi.IssuerKind, - Group: cmapi.SchemeGroupVersion.Group, - }, - } - return nil - }) - - if err != nil { + ). + WithSubject(cmapplyv1.X509Subject(). + WithOrganizations("nova")). + WithCommonName(computeHost). + WithDNSNames(dnsNames...). + WithIPAddresses(ipAddresses...). + WithIssuerRef(cmapplymetav1.IssuerReference(). + WithName(r.issuerName). + WithKind(cmapi.IssuerKind). + WithGroup(cmapi.SchemeGroupVersion.Group))) + + if err := r.Apply(ctx, certApplyCfg, + k8sclient.ForceOwnership, k8sclient.FieldOwner(NodeCertificateControllerName)); err != nil { return err } - if update != controllerutil.OperationResultNone { - log.Info(fmt.Sprintf("Certificate %s %s", certName, update)) - } - + log.Info("Applied Certificate " + certName) return nil } @@ -166,12 +148,8 @@ func (r *NodeCertificateController) Reconcile(ctx context.Context, req ctrl.Requ return ctrl.Result{}, k8sclient.IgnoreNotFound(err) } - err := retry.RetryOnConflict(retry.DefaultRetry, func() error { - return r.ensureCertificate(ctx, node, node.Name) - }) - - if err != nil { - return ctrl.Result{}, fmt.Errorf("could not create certificate: %w", err) + if err := r.ensureCertificate(ctx, node, node.Name); err != nil { + return ctrl.Result{}, fmt.Errorf("could not apply certificate: %w", err) } return ctrl.Result{}, nil From 993a78ebf862feb3dc4493665e8ff5bded613b9d Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Wed, 6 May 2026 12:14:24 +0200 Subject: [PATCH 13/14] Refactor HypervisorMaintenanceController: pass statusCfg into sub-functions With SSA the status config is built once in Reconcile and passed to reconcileComputeService and reconcileEviction which mutate it directly. This eliminates the intermediate condition return values and makes the single-apply-per-reconcile explicit. The early-return guard for already-enabled/disabled state is inlined into each branch. --- .../hypervisor_maintenance_controller.go | 163 +++++++----------- 1 file changed, 67 insertions(+), 96 deletions(-) diff --git a/internal/controller/hypervisor_maintenance_controller.go b/internal/controller/hypervisor_maintenance_controller.go index 0fea962..fbba802 100644 --- a/internal/controller/hypervisor_maintenance_controller.go +++ b/internal/controller/hypervisor_maintenance_controller.go @@ -67,37 +67,16 @@ func (hec *HypervisorMaintenanceController) Reconcile(ctx context.Context, req c return ctrl.Result{}, nil } - // Determine desired disabled condition and eviction state - disabledCond, evictingCond, evicted, err := hec.reconcileComputeService(ctx, hv) - if err != nil { - return ctrl.Result{}, err - } - - evictingCond, evicted, err = hec.reconcileEviction(ctx, hv, evictingCond, evicted) - if err != nil { - return ctrl.Result{}, err - } - - // Build status apply config: always include conditions this controller owns; - // omit ConditionTypeEvicting when it should be removed (SSA prunes it). - statusCfg := apiv1.HypervisorStatus().WithEvicted(evicted) + // Build status apply config upfront; sub-functions mutate it directly. + statusCfg := apiv1.HypervisorStatus().WithEvicted(hv.Status.Evicted) statusCfg.Conditions = utils.ConditionsFromStatus(hv.Status.Conditions) - if disabledCond != nil { - utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, *disabledCond) + if err := hec.reconcileComputeService(ctx, hv, statusCfg); err != nil { + return ctrl.Result{}, err } - if evictingCond != nil { - utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, *evictingCond) - } else { - // Remove ConditionTypeEvicting by omitting it — SSA prunes sole-owned entries. - filtered := statusCfg.Conditions[:0] - for _, c := range statusCfg.Conditions { - if c.Type == nil || *c.Type != kvmv1.ConditionTypeEvicting { - filtered = append(filtered, c) - } - } - statusCfg.Conditions = filtered + if err := hec.reconcileEviction(ctx, hv, statusCfg); err != nil { + return ctrl.Result{}, err } return ctrl.Result{}, hec.Status().Apply(ctx, @@ -106,85 +85,67 @@ func (hec *HypervisorMaintenanceController) Reconcile(ctx context.Context, req c } // reconcileComputeService enables/disables the nova-compute service based on -// hv.Spec.Maintenance. Returns the desired HypervisorDisabled condition (nil if -// service ID is unset) and the initial evicting/evicted state. -func (hec *HypervisorMaintenanceController) reconcileComputeService(ctx context.Context, hv *kvmv1.Hypervisor) ( - disabledCond *k8sacmetav1.ConditionApplyConfiguration, - evictingCond *k8sacmetav1.ConditionApplyConfiguration, - evicted bool, - err error, -) { +// hv.Spec.Maintenance and sets the HypervisorDisabled condition on statusCfg. +func (hec *HypervisorMaintenanceController) reconcileComputeService(ctx context.Context, hv *kvmv1.Hypervisor, statusCfg *apiv1.HypervisorStatusApplyConfiguration) error { log := logger.FromContext(ctx) serviceId := hv.Status.ServiceID if serviceId == "" { // We can only do something here, if there is a service to begin with. // The onboarding should take care of that. - return nil, nil, false, nil + return nil } switch hv.Spec.Maintenance { case kvmv1.MaintenanceUnset: - cond := k8sacmetav1.Condition(). - WithType(kvmv1.ConditionTypeHypervisorDisabled). - WithStatus(metav1.ConditionFalse). - WithMessage("Hypervisor is enabled"). - WithReason(kvmv1.ConditionReasonSucceeded) - existing := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeHypervisorDisabled) - if existing != nil && existing.Status == metav1.ConditionFalse { - // Already enabled, nothing to do - return cond, nil, false, nil - } - - enableService := services.UpdateOpts{Status: services.ServiceEnabled} - // We need to enable the host as per spec - log.Info("Enabling hypervisor", "id", serviceId) - if _, err := services.Update(ctx, hec.computeClient, serviceId, enableService).Extract(); err != nil { - return nil, nil, false, fmt.Errorf("failed to enable hypervisor due to %w", err) + if existing == nil || existing.Status != metav1.ConditionFalse { + // We need to enable the host as per spec + enableService := services.UpdateOpts{Status: services.ServiceEnabled} + log.Info("Enabling hypervisor", "id", serviceId) + if _, err := services.Update(ctx, hec.computeClient, serviceId, enableService).Extract(); err != nil { + return fmt.Errorf("failed to enable hypervisor due to %w", err) + } } - return cond, nil, false, nil + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeHypervisorDisabled). + WithStatus(metav1.ConditionFalse). + WithMessage("Hypervisor is enabled"). + WithReason(kvmv1.ConditionReasonSucceeded)) case kvmv1.MaintenanceManual, kvmv1.MaintenanceAuto, kvmv1.MaintenanceHA, kvmv1.MaintenanceTermination: // Disable the compute service. // Also in case of HA, as it doesn't hurt to disable it twice, and this // allows us to enable the service again, when the maintenance field is // cleared in the case above. - cond := k8sacmetav1.Condition(). - WithType(kvmv1.ConditionTypeHypervisorDisabled). - WithStatus(metav1.ConditionTrue). - WithMessage("Hypervisor is disabled"). - WithReason(kvmv1.ConditionReasonSucceeded) - existing := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeHypervisorDisabled) - if existing != nil && existing.Status == metav1.ConditionTrue { - // Already disabled, nothing to do - return cond, nil, false, nil - } - - disableService := services.UpdateOpts{ - Status: services.ServiceDisabled, - DisabledReason: "Hypervisor CRD: spec.maintenance=" + hv.Spec.Maintenance, - } - // We need to disable the host as per spec - log.Info("Disabling hypervisor", "id", serviceId) - if _, err := services.Update(ctx, hec.computeClient, serviceId, disableService).Extract(); err != nil { - return nil, nil, false, fmt.Errorf("failed to disable hypervisor due to %w", err) + if existing == nil || existing.Status != metav1.ConditionTrue { + disableService := services.UpdateOpts{ + Status: services.ServiceDisabled, + DisabledReason: "Hypervisor CRD: spec.maintenance=" + hv.Spec.Maintenance, + } + // We need to disable the host as per spec + log.Info("Disabling hypervisor", "id", serviceId) + if _, err := services.Update(ctx, hec.computeClient, serviceId, disableService).Extract(); err != nil { + return fmt.Errorf("failed to disable hypervisor due to %w", err) + } } - return cond, nil, false, nil + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeHypervisorDisabled). + WithStatus(metav1.ConditionTrue). + WithMessage("Hypervisor is disabled"). + WithReason(kvmv1.ConditionReasonSucceeded)) } - return nil, nil, false, nil + return nil } -// reconcileEviction creates/deletes the Eviction CR and returns the desired -// ConditionTypeEvicting apply configuration (nil means remove the condition). -func (hec *HypervisorMaintenanceController) reconcileEviction( - ctx context.Context, - hv *kvmv1.Hypervisor, - evictingCond *k8sacmetav1.ConditionApplyConfiguration, - evicted bool, -) (*k8sacmetav1.ConditionApplyConfiguration, bool, error) { +// reconcileEviction creates/deletes the Eviction CR and sets the ConditionTypeEvicting +// condition and Evicted scalar on statusCfg. When eviction should be removed, the +// condition entry is filtered out so SSA prunes it. +func (hec *HypervisorMaintenanceController) reconcileEviction(ctx context.Context, hv *kvmv1.Hypervisor, statusCfg *apiv1.HypervisorStatusApplyConfiguration) error { eviction := &kvmv1.Eviction{ ObjectMeta: metav1.ObjectMeta{Name: hv.Name}, } @@ -193,45 +154,55 @@ func (hec *HypervisorMaintenanceController) reconcileEviction( case kvmv1.MaintenanceUnset: // Avoid deleting the eviction over and over. if !hv.Status.Evicted && meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeEvicting) == nil { - return nil, false, nil + return nil + } + if err := k8sclient.IgnoreNotFound(hec.Delete(ctx, eviction)); err != nil { + return err } - err := k8sclient.IgnoreNotFound(hec.Delete(ctx, eviction)) - return nil, false, err // nil evictingCond → condition omitted from apply → SSA prunes it + // Remove ConditionTypeEvicting by omitting it — SSA prunes sole-owned entries. + filtered := statusCfg.Conditions[:0] + for _, c := range statusCfg.Conditions { + if c.Type == nil || *c.Type != kvmv1.ConditionTypeEvicting { + filtered = append(filtered, c) + } + } + statusCfg.Conditions = filtered + statusCfg.WithEvicted(false) case kvmv1.MaintenanceManual, kvmv1.MaintenanceAuto, kvmv1.MaintenanceTermination: // In case of "ha", the host gets emptied from the HA service if cond := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeEvicting); cond != nil { if cond.Reason == kvmv1.ConditionReasonSucceeded { // We are done here, no need to look at the eviction any more - return evictingCond, evicted, nil + return nil } } status, err := hec.ensureEviction(ctx, eviction, hv) if err != nil { - return evictingCond, evicted, err + return err } var reason, message string if status == metav1.ConditionFalse { message = "Evicted" reason = kvmv1.ConditionReasonSucceeded - evicted = true + statusCfg.WithEvicted(true) } else { message = "Evicting" reason = kvmv1.ConditionReasonRunning - evicted = false + statusCfg.WithEvicted(false) } - cond := k8sacmetav1.Condition(). - WithType(kvmv1.ConditionTypeEvicting). - WithStatus(status). - WithReason(reason). - WithMessage(message) - return cond, evicted, nil + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeEvicting). + WithStatus(status). + WithReason(reason). + WithMessage(message)) } - return evictingCond, evicted, nil + return nil } func (hec *HypervisorMaintenanceController) ensureEviction(ctx context.Context, eviction *kvmv1.Eviction, hypervisor *kvmv1.Hypervisor) (metav1.ConditionStatus, error) { From 88bba8f9e51606ff1ed027b7ae45d6aea27689d6 Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Wed, 6 May 2026 12:14:24 +0200 Subject: [PATCH 14/14] Refactor OnboardingController: statusCfg passed through reconcile chain With SSA the status config is built once at the top of Reconcile and threaded through to all sub-functions (abortOnboarding, initialOnboarding, smokeTest, completeOnboarding) which mutate it directly. An apply closure is also passed so each function calls apply() when it has determined the desired condition, rather than building and applying the config itself. This removes the applyOnboardingCondition helper and makes the flow fully declarative: compute desired state, then apply once. --- internal/controller/onboarding_controller.go | 117 +++++++++---------- 1 file changed, 54 insertions(+), 63 deletions(-) diff --git a/internal/controller/onboarding_controller.go b/internal/controller/onboarding_controller.go index 3c0abd6..00e4a03 100644 --- a/internal/controller/onboarding_controller.go +++ b/internal/controller/onboarding_controller.go @@ -88,14 +88,27 @@ func (r *OnboardingController) Reconcile(ctx context.Context, req ctrl.Request) computeHost := hv.Name + // Build the status apply config upfront; sub-functions mutate it. + // HypervisorID and ServiceID are always retained so they are never pruned. + statusCfg := apiv1.HypervisorStatus(). + WithHypervisorID(hv.Status.HypervisorID). + WithServiceID(hv.Status.ServiceID) + statusCfg.Conditions = utils.ConditionsFromStatus(hv.Status.Conditions) + + apply := func() error { + return r.Status().Apply(ctx, + apiv1.Hypervisor(hv.Name, "").WithStatus(statusCfg), + k8sclient.ForceOwnership, k8sclient.FieldOwner(OnboardingControllerName)) + } + // check if lifecycle management is enabled if !hv.Spec.LifecycleEnabled { - return ctrl.Result{}, r.abortOnboarding(ctx, hv, computeHost, "Aborted due to LifecycleEnabled being false") + return ctrl.Result{}, r.abortOnboarding(ctx, hv, computeHost, "Aborted due to LifecycleEnabled being false", statusCfg, apply) } // check if hv is terminating if hv.Spec.Maintenance == kvmv1.MaintenanceTermination { - return ctrl.Result{}, r.abortOnboarding(ctx, hv, computeHost, "Aborted due to MaintenanceTermination") + return ctrl.Result{}, r.abortOnboarding(ctx, hv, computeHost, "Aborted due to MaintenanceTermination", statusCfg, apply) } // We bail here out, because the openstack api is not the best to poll @@ -107,54 +120,36 @@ func (r *OnboardingController) Reconcile(ctx context.Context, req ctrl.Request) } return ctrl.Result{}, err } - statusCfg := apiv1.HypervisorStatus(). - WithHypervisorID(hypervisorID). - WithServiceID(serviceID) - statusCfg.Conditions = utils.ConditionsFromStatus(hv.Status.Conditions) + statusCfg.WithHypervisorID(hypervisorID).WithServiceID(serviceID) utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, *k8sacmetav1.Condition(). WithType(kvmv1.ConditionTypeOnboarding). WithStatus(metav1.ConditionTrue). WithReason(kvmv1.ConditionReasonInitial). WithMessage("Initial onboarding")) - return ctrl.Result{}, r.Status().Apply(ctx, - apiv1.Hypervisor(hv.Name, "").WithStatus(statusCfg), - k8sclient.ForceOwnership, k8sclient.FieldOwner(OnboardingControllerName)) + return ctrl.Result{}, apply() } // check condition reason status := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) switch status.Reason { case kvmv1.ConditionReasonInitial: - return ctrl.Result{}, r.initialOnboarding(ctx, hv) + return ctrl.Result{}, r.initialOnboarding(ctx, hv, statusCfg, apply) case kvmv1.ConditionReasonTesting: if hv.Spec.SkipTests { - return r.completeOnboarding(ctx, computeHost, hv) + return r.completeOnboarding(ctx, computeHost, hv, statusCfg, apply) } else { - return r.smokeTest(ctx, hv, computeHost) + return r.smokeTest(ctx, hv, computeHost, statusCfg, apply) } case kvmv1.ConditionReasonHandover: - return r.completeOnboarding(ctx, computeHost, hv) + return r.completeOnboarding(ctx, computeHost, hv, statusCfg, apply) default: // Nothing to be done return ctrl.Result{}, nil } } -// applyOnboardingCondition applies a single onboarding condition via SSA, -// carrying all existing conditions and scalar fields forward. -func (r *OnboardingController) applyOnboardingCondition(ctx context.Context, hv *kvmv1.Hypervisor, cond k8sacmetav1.ConditionApplyConfiguration) error { - statusCfg := apiv1.HypervisorStatus(). - WithHypervisorID(hv.Status.HypervisorID). - WithServiceID(hv.Status.ServiceID) - statusCfg.Conditions = utils.ConditionsFromStatus(hv.Status.Conditions) - utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, cond) - return r.Status().Apply(ctx, - apiv1.Hypervisor(hv.Name, "").WithStatus(statusCfg), - k8sclient.ForceOwnership, k8sclient.FieldOwner(OnboardingControllerName)) -} - -func (r *OnboardingController) abortOnboarding(ctx context.Context, hv *kvmv1.Hypervisor, computeHost, message string) error { +func (r *OnboardingController) abortOnboarding(ctx context.Context, hv *kvmv1.Hypervisor, computeHost, message string, statusCfg *apiv1.HypervisorStatusApplyConfiguration, apply func() error) error { status := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) // Never onboarded if status == nil { @@ -169,23 +164,25 @@ func (r *OnboardingController) abortOnboarding(ctx context.Context, hv *kvmv1.Hy } if err := r.deleteTestServers(ctx, computeHost); err != nil { - return errors.Join(err, r.applyOnboardingCondition(ctx, hv, + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, *k8sacmetav1.Condition(). WithType(kvmv1.ConditionTypeOnboarding). WithStatus(metav1.ConditionTrue). // No cleanup, so we are still "onboarding" WithReason(kvmv1.ConditionReasonAborted). - WithMessage(err.Error()))) + WithMessage(err.Error())) + return errors.Join(err, apply()) } - return r.applyOnboardingCondition(ctx, hv, + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, *k8sacmetav1.Condition(). WithType(kvmv1.ConditionTypeOnboarding). WithStatus(metav1.ConditionFalse). WithReason(kvmv1.ConditionReasonAborted). WithMessage(message)) + return apply() } -func (r *OnboardingController) initialOnboarding(ctx context.Context, hv *kvmv1.Hypervisor) error { +func (r *OnboardingController) initialOnboarding(ctx context.Context, hv *kvmv1.Hypervisor, statusCfg *apiv1.HypervisorStatusApplyConfiguration, apply func() error) error { zone, found := hv.Labels[corev1.LabelTopologyZone] if !found || zone == "" { return fmt.Errorf("cannot find availability-zone label %v on node", corev1.LabelTopologyZone) @@ -213,15 +210,16 @@ func (r *OnboardingController) initialOnboarding(ctx context.Context, hv *kvmv1. return result.Err } - return r.applyOnboardingCondition(ctx, hv, + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, *k8sacmetav1.Condition(). WithType(kvmv1.ConditionTypeOnboarding). WithStatus(metav1.ConditionTrue). WithReason(kvmv1.ConditionReasonTesting). WithMessage("Running onboarding tests")) + return apply() } -func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervisor, host string) (ctrl.Result, error) { +func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervisor, host string, statusCfg *apiv1.HypervisorStatusApplyConfiguration, apply func() error) (ctrl.Result, error) { log := logger.FromContext(ctx) zone := hv.Labels[corev1.LabelTopologyZone] server, err := r.createOrGetTestServer(ctx, zone, host, hv.UID) @@ -229,6 +227,16 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis return ctrl.Result{}, fmt.Errorf("failed to create or get test instance: %w", err) } + setTestingCondition := func(message string) error { + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). + WithReason(kvmv1.ConditionReasonTesting). + WithMessage(message)) + return apply() + } + switch server.Status { case "ERROR": // servers.List doesn't provide the fault field, so fetch the server again @@ -240,12 +248,7 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis } // Set condition back to testing - if err = r.applyOnboardingCondition(ctx, hv, - *k8sacmetav1.Condition(). - WithType(kvmv1.ConditionTypeOnboarding). - WithStatus(metav1.ConditionTrue). - WithReason(kvmv1.ConditionReasonTesting). - WithMessage("Server ended up in error state: "+server.Fault.Message)); err != nil { + if err = setTestingCondition("Server ended up in error state: " + server.Fault.Message); err != nil { return ctrl.Result{}, err } @@ -261,12 +264,7 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis ShowConsoleOutput(ctx, r.testComputeClient, server.ID, servers.ShowConsoleOutputOpts{Length: 11}). Extract() if err != nil { - if err2 := r.applyOnboardingCondition(ctx, hv, - *k8sacmetav1.Condition(). - WithType(kvmv1.ConditionTypeOnboarding). - WithStatus(metav1.ConditionTrue). - WithReason(kvmv1.ConditionReasonTesting). - WithMessage(fmt.Sprintf("could not get console output %v", err))); err2 != nil { + if err2 := setTestingCondition(fmt.Sprintf("could not get console output %v", err)); err2 != nil { return ctrl.Result{}, err2 } return ctrl.Result{RequeueAfter: defaultWaitTime}, nil @@ -274,12 +272,7 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis if !strings.Contains(consoleOutput, server.Name) { if !server.LaunchedAt.IsZero() && r.Clock.Now().After(server.LaunchedAt.Add(smokeTestTimeout)) { - if err2 := r.applyOnboardingCondition(ctx, hv, - *k8sacmetav1.Condition(). - WithType(kvmv1.ConditionTypeOnboarding). - WithStatus(metav1.ConditionTrue). - WithReason(kvmv1.ConditionReasonTesting). - WithMessage(fmt.Sprintf("timeout waiting for console output since %v", server.LaunchedAt))); err2 != nil { + if err2 := setTestingCondition(fmt.Sprintf("timeout waiting for console output since %v", server.LaunchedAt)); err2 != nil { return ctrl.Result{}, err2 } if err = servers.Delete(ctx, r.testComputeClient, server.ID).ExtractErr(); err != nil { @@ -292,25 +285,20 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis } if err = servers.Delete(ctx, r.testComputeClient, server.ID).ExtractErr(); err != nil { - if err2 := r.applyOnboardingCondition(ctx, hv, - *k8sacmetav1.Condition(). - WithType(kvmv1.ConditionTypeOnboarding). - WithStatus(metav1.ConditionTrue). - WithReason(kvmv1.ConditionReasonTesting). - WithMessage(fmt.Sprintf("failed to terminate instance %v", err))); err2 != nil { + if err2 := setTestingCondition(fmt.Sprintf("failed to terminate instance %v", err)); err2 != nil { return ctrl.Result{}, err2 } return ctrl.Result{RequeueAfter: defaultWaitTime}, nil } - return r.completeOnboarding(ctx, host, hv) + return r.completeOnboarding(ctx, host, hv, statusCfg, apply) default: return ctrl.Result{RequeueAfter: defaultWaitTime}, nil } } -func (r *OnboardingController) completeOnboarding(ctx context.Context, host string, hv *kvmv1.Hypervisor) (ctrl.Result, error) { +func (r *OnboardingController) completeOnboarding(ctx context.Context, host string, hv *kvmv1.Hypervisor, statusCfg *apiv1.HypervisorStatusApplyConfiguration, apply func() error) (ctrl.Result, error) { onboardingCondition := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) if onboardingCondition != nil && onboardingCondition.Reason == kvmv1.ConditionReasonHandover { // We're waiting for aggregates and traits controllers to sync @@ -326,32 +314,35 @@ func (r *OnboardingController) completeOnboarding(ctx context.Context, host stri return ctrl.Result{}, nil } - return ctrl.Result{}, r.applyOnboardingCondition(ctx, hv, + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, *k8sacmetav1.Condition(). WithType(kvmv1.ConditionTypeOnboarding). WithStatus(metav1.ConditionFalse). WithReason(kvmv1.ConditionReasonSucceeded). WithMessage("Onboarding completed")) + return ctrl.Result{}, apply() } // First time in completeOnboarding - clean up and prepare for aggregate sync if err := r.deleteTestServers(ctx, host); err != nil { - return ctrl.Result{}, errors.Join(err, r.applyOnboardingCondition(ctx, hv, + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, *k8sacmetav1.Condition(). WithType(kvmv1.ConditionTypeOnboarding). WithStatus(metav1.ConditionTrue). // No cleanup, so we are still "onboarding" WithReason(kvmv1.ConditionReasonAborted). - WithMessage(err.Error()))) + WithMessage(err.Error())) + return ctrl.Result{}, errors.Join(err, apply()) } // Mark onboarding as almost complete, triggers other controllers to do their part // Patch status to signal aggregates controller - return ctrl.Result{}, r.applyOnboardingCondition(ctx, hv, + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, *k8sacmetav1.Condition(). WithType(kvmv1.ConditionTypeOnboarding). WithStatus(metav1.ConditionTrue). WithReason(kvmv1.ConditionReasonHandover). WithMessage("Waiting for other controllers to take over")) + return ctrl.Result{}, apply() } func (r *OnboardingController) deleteTestServers(ctx context.Context, host string) error {