From 7b0485741e440fb215e4105e90d78cb13a820939 Mon Sep 17 00:00:00 2001
From: Julius Clausnitzer <julius.clausnitzer@sap.com>
Date: Wed, 22 Apr 2026 16:43:27 +0200
Subject: [PATCH 01/15] initial idea

---
 api/v1alpha1/flavor_group_capacity_types.go   | 113 ++++
 api/v1alpha1/zz_generated.deepcopy.go         |  97 ++++
 cmd/manager/main.go                           |  24 +
 .../reservations/capacity/config.go           |  49 ++
 .../reservations/capacity/controller.go       | 258 +++++++++
 .../reservations/capacity/controller_test.go  | 499 ++++++++++++++++++
 .../reservations/capacity/metrics.go          | 104 ++++
 7 files changed, 1144 insertions(+)
 create mode 100644 api/v1alpha1/flavor_group_capacity_types.go
 create mode 100644 internal/scheduling/reservations/capacity/config.go
 create mode 100644 internal/scheduling/reservations/capacity/controller.go
 create mode 100644 internal/scheduling/reservations/capacity/controller_test.go
 create mode 100644 internal/scheduling/reservations/capacity/metrics.go

diff --git a/api/v1alpha1/flavor_group_capacity_types.go b/api/v1alpha1/flavor_group_capacity_types.go
new file mode 100644
index 000000000..7911e1d09
--- /dev/null
+++ b/api/v1alpha1/flavor_group_capacity_types.go
@@ -0,0 +1,113 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package v1alpha1
+
+import (
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+const (
+	// FlavorGroupCapacityConditionFresh indicates the status data is up-to-date.
+	FlavorGroupCapacityConditionFresh = "Fresh"
+)
+
+// FlavorGroupCapacitySpec defines the desired state of FlavorGroupCapacity.
+type FlavorGroupCapacitySpec struct {
+	// FlavorGroup is the name of the flavor group (e.g. "2101").
+	// +kubebuilder:validation:Required
+	FlavorGroup string `json:"flavorGroup"`
+
+	// AvailabilityZone is the OpenStack AZ this capacity data covers (e.g. "qa-de-1a").
+	// +kubebuilder:validation:Required
+	AvailabilityZone string `json:"availabilityZone"`
+}
+
+// FlavorGroupCapacityStatus defines the observed state of FlavorGroupCapacity.
+type FlavorGroupCapacityStatus struct {
+	// TotalCapacity is the total schedulable slots in an empty-datacenter scenario.
+	// Computed as sum of floor(EffectiveCapacity.Memory / smallestFlavorMemory) across
+	// all hosts eligible for this flavor group (empty-state scheduler probe).
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:validation:Minimum=0
+	TotalCapacity int64 `json:"totalCapacity,omitempty"`
+
+	// TotalHosts is the number of hosts eligible for this flavor group in the empty-state probe.
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:validation:Minimum=0
+	TotalHosts int64 `json:"totalHosts,omitempty"`
+
+	// TotalPlaceable is the schedulable slots remaining given current VM allocations.
+	// Computed from the current-state scheduler probe.
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:validation:Minimum=0
+	TotalPlaceable int64 `json:"totalPlaceable,omitempty"`
+
+	// PlaceableHosts is the number of hosts still able to accept a new smallest-flavor VM.
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:validation:Minimum=0
+	PlaceableHosts int64 `json:"placeableHosts,omitempty"`
+
+	// TotalInstances is the total number of VM instances running on hypervisors in this AZ,
+	// derived from Hypervisor CRD Status.Instances (not filtered by flavor group).
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:validation:Minimum=0
+	TotalInstances int64 `json:"totalInstances,omitempty"`
+
+	// CommittedCapacity is the sum of AcceptedAmount across Ready=True CommittedResource CRDs.
+	// TODO(BLI #337): populate once CommittedResource CRD exists.
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:validation:Minimum=0
+	CommittedCapacity int64 `json:"committedCapacity,omitempty"`
+
+	// LastReconcileAt is the timestamp of the last successful reconcile.
+	// +kubebuilder:validation:Optional
+	LastReconcileAt metav1.Time `json:"lastReconcileAt,omitempty"`
+
+	// Conditions represent the current state of the FlavorGroupCapacity.
+	// The Fresh condition indicates whether the status data is up-to-date.
+	// +kubebuilder:validation:Optional
+	// +patchStrategy=merge
+	// +patchMergeKey=type
+	Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"`
+}
+
+// +kubebuilder:object:root=true
+// +kubebuilder:subresource:status
+// +kubebuilder:resource:scope=Cluster
+// +kubebuilder:printcolumn:name="FlavorGroup",type="string",JSONPath=".spec.flavorGroup"
+// +kubebuilder:printcolumn:name="AZ",type="string",JSONPath=".spec.availabilityZone"
+// +kubebuilder:printcolumn:name="TotalCapacity",type="integer",JSONPath=".status.totalCapacity"
+// +kubebuilder:printcolumn:name="TotalPlaceable",type="integer",JSONPath=".status.totalPlaceable"
+// +kubebuilder:printcolumn:name="TotalHosts",type="integer",JSONPath=".status.totalHosts"
+// +kubebuilder:printcolumn:name="LastReconcile",type="date",JSONPath=".status.lastReconcileAt"
+// +kubebuilder:printcolumn:name="Fresh",type="string",JSONPath=".status.conditions[?(@.type=='Fresh')].status"
+
+// FlavorGroupCapacity caches pre-computed capacity data for one flavor group in one AZ.
+// One CRD exists per (flavor group × AZ) pair, updated by the capacity controller on a fixed interval.
+// The capacity API reads these CRDs instead of probing the scheduler on each request.
+type FlavorGroupCapacity struct {
+	metav1.TypeMeta `json:",inline"`
+
+	// +optional
+	metav1.ObjectMeta `json:"metadata,omitempty"`
+
+	// +required
+	Spec FlavorGroupCapacitySpec `json:"spec"`
+
+	// +optional
+	Status FlavorGroupCapacityStatus `json:"status,omitempty"`
+}
+
+// +kubebuilder:object:root=true
+
+// FlavorGroupCapacityList contains a list of FlavorGroupCapacity.
+type FlavorGroupCapacityList struct {
+	metav1.TypeMeta `json:",inline"`
+	metav1.ListMeta `json:"metadata,omitempty"`
+	Items           []FlavorGroupCapacity `json:"items"`
+}
+
+func init() {
+	SchemeBuilder.Register(&FlavorGroupCapacity{}, &FlavorGroupCapacityList{})
+}
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
index 778c91710..5322a7d1f 100644
--- a/api/v1alpha1/zz_generated.deepcopy.go
+++ b/api/v1alpha1/zz_generated.deepcopy.go
@@ -587,6 +587,103 @@ func (in *FailoverReservationStatus) DeepCopy() *FailoverReservationStatus {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *FlavorGroupCapacity) DeepCopyInto(out *FlavorGroupCapacity) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
+	out.Spec = in.Spec
+	in.Status.DeepCopyInto(&out.Status)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FlavorGroupCapacity.
+func (in *FlavorGroupCapacity) DeepCopy() *FlavorGroupCapacity {
+	if in == nil {
+		return nil
+	}
+	out := new(FlavorGroupCapacity)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *FlavorGroupCapacity) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *FlavorGroupCapacityList) DeepCopyInto(out *FlavorGroupCapacityList) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ListMeta.DeepCopyInto(&out.ListMeta)
+	if in.Items != nil {
+		in, out := &in.Items, &out.Items
+		*out = make([]FlavorGroupCapacity, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FlavorGroupCapacityList.
+func (in *FlavorGroupCapacityList) DeepCopy() *FlavorGroupCapacityList {
+	if in == nil {
+		return nil
+	}
+	out := new(FlavorGroupCapacityList)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *FlavorGroupCapacityList) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *FlavorGroupCapacitySpec) DeepCopyInto(out *FlavorGroupCapacitySpec) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FlavorGroupCapacitySpec.
+func (in *FlavorGroupCapacitySpec) DeepCopy() *FlavorGroupCapacitySpec {
+	if in == nil {
+		return nil
+	}
+	out := new(FlavorGroupCapacitySpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *FlavorGroupCapacityStatus) DeepCopyInto(out *FlavorGroupCapacityStatus) {
+	*out = *in
+	in.LastReconcileAt.DeepCopyInto(&out.LastReconcileAt)
+	if in.Conditions != nil {
+		in, out := &in.Conditions, &out.Conditions
+		*out = make([]metav1.Condition, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FlavorGroupCapacityStatus.
+func (in *FlavorGroupCapacityStatus) DeepCopy() *FlavorGroupCapacityStatus {
+	if in == nil {
+		return nil
+	}
+	out := new(FlavorGroupCapacityStatus)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *FilterSpec) DeepCopyInto(out *FilterSpec) {
 	*out = *in
diff --git a/cmd/manager/main.go b/cmd/manager/main.go
index 4c390f5a8..abe42317c 100644
--- a/cmd/manager/main.go
+++ b/cmd/manager/main.go
@@ -56,6 +56,7 @@ import (
 	"github.com/cobaltcore-dev/cortex/internal/scheduling/nova"
 	"github.com/cobaltcore-dev/cortex/internal/scheduling/pods"
 	"github.com/cobaltcore-dev/cortex/internal/scheduling/reservations"
+	"github.com/cobaltcore-dev/cortex/internal/scheduling/reservations/capacity"
 	"github.com/cobaltcore-dev/cortex/internal/scheduling/reservations/commitments"
 	commitmentsapi "github.com/cobaltcore-dev/cortex/internal/scheduling/reservations/commitments/api"
 	"github.com/cobaltcore-dev/cortex/internal/scheduling/reservations/failover"
@@ -677,6 +678,29 @@ func main() {
 			"maxVMsToProcess", failoverConfig.MaxVMsToProcess,
 			"vmSelectionRotationInterval", failoverConfig.VMSelectionRotationInterval)
 	}
+	if slices.Contains(mainConfig.EnabledControllers, "capacity-controller") {
+		setupLog.Info("enabling controller", "controller", "capacity-controller")
+		capacityConfig := conf.GetConfigOrDie[capacity.Config]()
+		capacityConfig.ApplyDefaults()
+
+		capacityMonitor := capacity.NewMonitor(multiclusterClient)
+		if err := metrics.Registry.Register(&capacityMonitor); err != nil {
+			setupLog.Error(err, "failed to register capacity monitor metrics, continuing without metrics")
+		}
+
+		capacityController := capacity.NewController(multiclusterClient, capacityConfig)
+		if err := mgr.Add(manager.RunnableFunc(func(ctx context.Context) error {
+			return capacityController.Start(ctx)
+		})); err != nil {
+			setupLog.Error(err, "unable to add capacity controller to manager")
+			os.Exit(1)
+		}
+		setupLog.Info("capacity-controller registered",
+			"schedulerURL", capacityConfig.SchedulerURL,
+			"reconcileInterval", capacityConfig.ReconcileInterval,
+			"totalPipeline", capacityConfig.TotalPipeline,
+			"placeablePipeline", capacityConfig.PlaceablePipeline)
+	}
 
 	// +kubebuilder:scaffold:builder
 
diff --git a/internal/scheduling/reservations/capacity/config.go b/internal/scheduling/reservations/capacity/config.go
new file mode 100644
index 000000000..2940f32e8
--- /dev/null
+++ b/internal/scheduling/reservations/capacity/config.go
@@ -0,0 +1,49 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package capacity
+
+import "time"
+
+// Config holds configuration for the capacity controller.
+type Config struct {
+	// ReconcileInterval is how often the controller probes the scheduler and updates CRDs.
+	ReconcileInterval time.Duration `json:"capacityReconcileInterval"`
+
+	// TotalPipeline is the scheduler pipeline used for the empty-state probe.
+	// This pipeline should ignore current VM allocations (e.g. kvm-report-capacity).
+	TotalPipeline string `json:"capacityTotalPipeline"`
+
+	// PlaceablePipeline is the scheduler pipeline used for the current-state probe.
+	// This pipeline considers current VM allocations to determine remaining placement capacity.
+	PlaceablePipeline string `json:"capacityPlaceablePipeline"`
+
+	// SchedulerURL is the endpoint of the nova external scheduler.
+	SchedulerURL string `json:"schedulerURL"`
+}
+
+// ApplyDefaults fills in any unset values with defaults.
+func (c *Config) ApplyDefaults() {
+	defaults := DefaultConfig()
+	if c.ReconcileInterval == 0 {
+		c.ReconcileInterval = defaults.ReconcileInterval
+	}
+	if c.TotalPipeline == "" {
+		c.TotalPipeline = defaults.TotalPipeline
+	}
+	if c.PlaceablePipeline == "" {
+		c.PlaceablePipeline = defaults.PlaceablePipeline
+	}
+	if c.SchedulerURL == "" {
+		c.SchedulerURL = defaults.SchedulerURL
+	}
+}
+
+func DefaultConfig() Config {
+	return Config{
+		ReconcileInterval: 5 * time.Minute,
+		TotalPipeline:     "kvm-report-capacity",
+		PlaceablePipeline: "kvm-general-purpose-load-balancing",
+		SchedulerURL:      "http://localhost:8080/scheduler/nova/external",
+	}
+}
diff --git a/internal/scheduling/reservations/capacity/controller.go b/internal/scheduling/reservations/capacity/controller.go
new file mode 100644
index 000000000..4ff2eadc4
--- /dev/null
+++ b/internal/scheduling/reservations/capacity/controller.go
@@ -0,0 +1,258 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package capacity
+
+import (
+	"context"
+	"fmt"
+	"sort"
+	"strings"
+	"time"
+
+	hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
+	"github.com/google/uuid"
+	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	"k8s.io/apimachinery/pkg/api/meta"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/types"
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+
+	"github.com/cobaltcore-dev/cortex/api/v1alpha1"
+	"github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute"
+	"github.com/cobaltcore-dev/cortex/internal/scheduling/reservations"
+)
+
+var log = ctrl.Log.WithName("capacity-controller").WithValues("module", "capacity")
+
+// Controller reconciles FlavorGroupCapacity CRDs on a fixed interval.
+// For each (flavor group × AZ) pair it runs two scheduler probes and updates the CRD status.
+type Controller struct {
+	client          client.Client
+	schedulerClient *reservations.SchedulerClient
+	config          Config
+}
+
+func NewController(c client.Client, config Config) *Controller {
+	return &Controller{
+		client:          c,
+		schedulerClient: reservations.NewSchedulerClient(config.SchedulerURL),
+		config:          config,
+	}
+}
+
+// Start runs the periodic reconcile loop. Implements manager.Runnable.
+func (c *Controller) Start(ctx context.Context) error {
+	timer := time.NewTimer(0) // fire immediately on start
+	defer timer.Stop()
+
+	for {
+		select {
+		case <-ctx.Done():
+			return nil
+		case <-timer.C:
+			if err := c.reconcileAll(ctx); err != nil {
+				log.Error(err, "reconcile cycle failed")
+			}
+			timer.Reset(c.config.ReconcileInterval)
+		}
+	}
+}
+
+// reconcileAll iterates all flavor groups × AZs and upserts FlavorGroupCapacity CRDs.
+func (c *Controller) reconcileAll(ctx context.Context) error {
+	knowledge := &reservations.FlavorGroupKnowledgeClient{Client: c.client}
+	flavorGroups, err := knowledge.GetAllFlavorGroups(ctx, nil)
+	if err != nil {
+		return fmt.Errorf("failed to get flavor groups: %w", err)
+	}
+
+	var hvList hv1.HypervisorList
+	if err := c.client.List(ctx, &hvList); err != nil {
+		return fmt.Errorf("failed to list hypervisors: %w", err)
+	}
+
+	hvByName := make(map[string]hv1.Hypervisor, len(hvList.Items))
+	for _, hv := range hvList.Items {
+		hvByName[hv.Name] = hv
+	}
+
+	azs := availabilityZones(hvList.Items)
+
+	for groupName, groupData := range flavorGroups {
+		for _, az := range azs {
+			if err := c.reconcileOne(ctx, groupName, groupData, az, hvByName, hvList.Items); err != nil {
+				log.Error(err, "failed to reconcile flavor group capacity",
+					"flavorGroup", groupName, "az", az)
+				// Continue with other pairs rather than aborting the whole cycle.
+			}
+		}
+	}
+	return nil
+}
+
+// reconcileOne updates the FlavorGroupCapacity CRD for one (group × AZ) pair.
+func (c *Controller) reconcileOne(
+	ctx context.Context,
+	groupName string,
+	groupData compute.FlavorGroupFeature,
+	az string,
+	hvByName map[string]hv1.Hypervisor,
+	allHVs []hv1.Hypervisor,
+) error {
+	smallestFlavor := groupData.SmallestFlavor
+	smallestFlavorBytes := int64(smallestFlavor.MemoryMB) * 1024 * 1024 //nolint:gosec
+	if smallestFlavorBytes <= 0 {
+		return fmt.Errorf("smallest flavor %q has invalid memory %d MB", smallestFlavor.Name, smallestFlavor.MemoryMB)
+	}
+
+	// Empty-state probe: scheduler ignores all current VM allocations.
+	totalCapacity, totalHosts, totalErr := c.probeScheduler(ctx, smallestFlavor, az, c.config.TotalPipeline, hvByName, smallestFlavorBytes)
+
+	// Current-state probe: scheduler considers current VM allocations.
+	totalPlaceable, placeableHosts, placeableErr := c.probeScheduler(ctx, smallestFlavor, az, c.config.PlaceablePipeline, hvByName, smallestFlavorBytes)
+
+	// Count total instances on hypervisors in this AZ.
+	totalInstances := countInstancesInAZ(allHVs, az)
+
+	// TODO(BLI #337): populate CommittedCapacity from Ready=True CommittedResource CRDs.
+	var committedCapacity int64
+
+	crdName := crdNameFor(groupName, az)
+	fresh := totalErr == nil && placeableErr == nil
+
+	var existing v1alpha1.FlavorGroupCapacity
+	err := c.client.Get(ctx, types.NamespacedName{Name: crdName}, &existing)
+	if apierrors.IsNotFound(err) {
+		existing = v1alpha1.FlavorGroupCapacity{
+			ObjectMeta: metav1.ObjectMeta{Name: crdName},
+			Spec: v1alpha1.FlavorGroupCapacitySpec{
+				FlavorGroup:      groupName,
+				AvailabilityZone: az,
+			},
+		}
+		if createErr := c.client.Create(ctx, &existing); createErr != nil {
+			return fmt.Errorf("failed to create FlavorGroupCapacity %s: %w", crdName, createErr)
+		}
+	} else if err != nil {
+		return fmt.Errorf("failed to get FlavorGroupCapacity %s: %w", crdName, err)
+	}
+
+	patch := client.MergeFrom(existing.DeepCopy())
+	existing.Status.TotalCapacity = totalCapacity
+	existing.Status.TotalHosts = totalHosts
+	existing.Status.TotalPlaceable = totalPlaceable
+	existing.Status.PlaceableHosts = placeableHosts
+	existing.Status.TotalInstances = totalInstances
+	existing.Status.CommittedCapacity = committedCapacity
+	existing.Status.LastReconcileAt = metav1.Now()
+
+	freshCondition := metav1.Condition{
+		Type:               v1alpha1.FlavorGroupCapacityConditionFresh,
+		ObservedGeneration: existing.Generation,
+	}
+	if fresh {
+		freshCondition.Status = metav1.ConditionTrue
+		freshCondition.Reason = "ReconcileSucceeded"
+		freshCondition.Message = "capacity data is up-to-date"
+	} else {
+		freshCondition.Status = metav1.ConditionFalse
+		freshCondition.Reason = "ReconcileFailed"
+		if totalErr != nil {
+			freshCondition.Message = fmt.Sprintf("empty-state probe failed: %v", totalErr)
+		} else {
+			freshCondition.Message = fmt.Sprintf("current-state probe failed: %v", placeableErr)
+		}
+	}
+	meta.SetStatusCondition(&existing.Status.Conditions, freshCondition)
+
+	if patchErr := c.client.Status().Patch(ctx, &existing, patch); patchErr != nil {
+		return fmt.Errorf("failed to patch FlavorGroupCapacity %s status: %w", crdName, patchErr)
+	}
+	return nil
+}
+
+// probeScheduler calls the scheduler with the given pipeline and returns capacity + host count.
+func (c *Controller) probeScheduler(
+	ctx context.Context,
+	flavor compute.FlavorInGroup,
+	az, pipeline string,
+	hvByName map[string]hv1.Hypervisor,
+	smallestFlavorBytes int64,
+) (capacity int64, hosts int64, err error) {
+	resp, err := c.schedulerClient.ScheduleReservation(ctx, reservations.ScheduleReservationRequest{
+		InstanceUUID:     uuid.New().String(),
+		ProjectID:        "cortex-capacity-probe",
+		FlavorName:       flavor.Name,
+		MemoryMB:         flavor.MemoryMB,
+		VCPUs:            flavor.VCPUs,
+		FlavorExtraSpecs: flavor.ExtraSpecs,
+		AvailabilityZone: az,
+		Pipeline:         pipeline,
+	})
+	if err != nil {
+		return 0, 0, fmt.Errorf("scheduler call failed (pipeline=%s): %w", pipeline, err)
+	}
+
+	hosts = int64(len(resp.Hosts)) //nolint:gosec
+	for _, hostName := range resp.Hosts {
+		hv, ok := hvByName[hostName]
+		if !ok {
+			continue
+		}
+		effectiveCap := hv.Status.EffectiveCapacity
+		if effectiveCap == nil {
+			effectiveCap = hv.Status.Capacity
+		}
+		if effectiveCap == nil {
+			continue
+		}
+		memCap, ok := effectiveCap[hv1.ResourceMemory]
+		if !ok {
+			continue
+		}
+		if capBytes := memCap.Value(); capBytes > 0 {
+			capacity += capBytes / smallestFlavorBytes
+		}
+	}
+	return capacity, hosts, nil
+}
+
+// availabilityZones returns a sorted, deduplicated list of AZs from Hypervisor CRD labels.
+func availabilityZones(hvs []hv1.Hypervisor) []string {
+	azSet := make(map[string]struct{})
+	for _, hv := range hvs {
+		if az, ok := hv.Labels["topology.kubernetes.io/zone"]; ok && az != "" {
+			azSet[az] = struct{}{}
+		}
+	}
+	azs := make([]string, 0, len(azSet))
+	for az := range azSet {
+		azs = append(azs, az)
+	}
+	sort.Strings(azs)
+	return azs
+}
+
+// countInstancesInAZ counts total VM instances across all hypervisors in the given AZ.
+func countInstancesInAZ(hvs []hv1.Hypervisor, az string) int64 {
+	var total int64
+	for _, hv := range hvs {
+		if hv.Labels["topology.kubernetes.io/zone"] != az {
+			continue
+		}
+		total += int64(len(hv.Status.Instances)) //nolint:gosec
+	}
+	return total
+}
+
+// crdNameFor produces a valid DNS subdomain name for a (flavorGroup, az) pair.
+// Underscores and dots are replaced with dashes; the result is lowercased.
+func crdNameFor(flavorGroup, az string) string {
+	combined := flavorGroup + "-" + az
+	combined = strings.ToLower(combined)
+	combined = strings.ReplaceAll(combined, "_", "-")
+	combined = strings.ReplaceAll(combined, ".", "-")
+	return combined
+}
diff --git a/internal/scheduling/reservations/capacity/controller_test.go b/internal/scheduling/reservations/capacity/controller_test.go
new file mode 100644
index 000000000..3f3ceb6f0
--- /dev/null
+++ b/internal/scheduling/reservations/capacity/controller_test.go
@@ -0,0 +1,499 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package capacity
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"sort"
+	"testing"
+
+	hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/types"
+	"sigs.k8s.io/controller-runtime/pkg/client/fake"
+
+	schedulerapi "github.com/cobaltcore-dev/cortex/api/external/nova"
+	"github.com/cobaltcore-dev/cortex/api/v1alpha1"
+	"github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute"
+	"github.com/cobaltcore-dev/cortex/internal/scheduling/reservations"
+)
+
+// newTestScheme returns a runtime.Scheme with all required types registered.
+func newTestScheme(t *testing.T) *runtime.Scheme {
+	t.Helper()
+	s := runtime.NewScheme()
+	if err := v1alpha1.AddToScheme(s); err != nil {
+		t.Fatalf("failed to add v1alpha1 scheme: %v", err)
+	}
+	if err := hv1.AddToScheme(s); err != nil {
+		t.Fatalf("failed to add hypervisor scheme: %v", err)
+	}
+	return s
+}
+
+// newFlavorGroupKnowledge creates a ready Knowledge CRD with a single flavor group.
+func newFlavorGroupKnowledge(t *testing.T, groupName string, smallestMemoryMB uint64) *v1alpha1.Knowledge {
+	t.Helper()
+	features := []compute.FlavorGroupFeature{
+		{
+			Name: groupName,
+			SmallestFlavor: compute.FlavorInGroup{
+				Name:       groupName + "-small",
+				MemoryMB:   smallestMemoryMB,
+				VCPUs:      2,
+				ExtraSpecs: map[string]string{"hw:cpu_policy": "dedicated"},
+			},
+		},
+	}
+	raw, err := v1alpha1.BoxFeatureList(features)
+	if err != nil {
+		t.Fatalf("failed to box features: %v", err)
+	}
+	return &v1alpha1.Knowledge{
+		ObjectMeta: metav1.ObjectMeta{Name: "flavor-groups"},
+		Spec: v1alpha1.KnowledgeSpec{
+			SchedulingDomain: v1alpha1.SchedulingDomainNova,
+			Extractor:        v1alpha1.KnowledgeExtractorSpec{Name: "flavor_groups"},
+		},
+		Status: v1alpha1.KnowledgeStatus{
+			Raw: raw,
+			Conditions: []metav1.Condition{
+				{
+					Type:   v1alpha1.KnowledgeConditionReady,
+					Status: metav1.ConditionTrue,
+					Reason: "ExtractorSucceeded",
+				},
+			},
+		},
+	}
+}
+
+// newHypervisor creates a Hypervisor CRD with a topology AZ label and effective capacity.
+func newHypervisor(name, az string, memoryBytes int64, instanceIDs ...string) *hv1.Hypervisor {
+	hv := &hv1.Hypervisor{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:   name,
+			Labels: map[string]string{"topology.kubernetes.io/zone": az},
+		},
+	}
+	if memoryBytes > 0 {
+		qty := resource.NewQuantity(memoryBytes, resource.BinarySI)
+		hv.Status.EffectiveCapacity = map[hv1.ResourceName]resource.Quantity{
+			hv1.ResourceMemory: *qty,
+		}
+	}
+	for _, id := range instanceIDs {
+		hv.Status.Instances = append(hv.Status.Instances, hv1.Instance{ID: id})
+	}
+	return hv
+}
+
+// newMockSchedulerServer creates an httptest server that always returns the given host list.
+func newMockSchedulerServer(t *testing.T, hosts []string) *httptest.Server {
+	t.Helper()
+	return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		resp := schedulerapi.ExternalSchedulerResponse{Hosts: hosts}
+		if err := json.NewEncoder(w).Encode(resp); err != nil {
+			t.Errorf("mock scheduler: failed to encode response: %v", err)
+		}
+	}))
+}
+
+// --- unit tests for pure helper functions ---
+
+func TestCrdNameFor(t *testing.T) {
+	tests := []struct {
+		group, az, want string
+	}{
+		{"2101", "qa-de-1a", "2101-qa-de-1a"},
+		{"My_Group", "eu.west.1", "my-group-eu-west-1"},
+		{"G", "AZ_1", "g-az-1"},
+	}
+	for _, tt := range tests {
+		got := crdNameFor(tt.group, tt.az)
+		if got != tt.want {
+			t.Errorf("crdNameFor(%q, %q) = %q, want %q", tt.group, tt.az, got, tt.want)
+		}
+	}
+}
+
+func TestAvailabilityZones(t *testing.T) {
+	hvs := []hv1.Hypervisor{
+		*newHypervisor("h1", "az-a", 0),
+		*newHypervisor("h2", "az-b", 0),
+		*newHypervisor("h3", "az-a", 0), // duplicate
+		{ObjectMeta: metav1.ObjectMeta{Name: "h4"}},  // no label
+	}
+	got := availabilityZones(hvs)
+	want := []string{"az-a", "az-b"}
+	if len(got) != len(want) {
+		t.Fatalf("availabilityZones() = %v, want %v", got, want)
+	}
+	sort.Strings(got)
+	for i := range want {
+		if got[i] != want[i] {
+			t.Errorf("availabilityZones()[%d] = %q, want %q", i, got[i], want[i])
+		}
+	}
+}
+
+func TestCountInstancesInAZ(t *testing.T) {
+	hvs := []hv1.Hypervisor{
+		*newHypervisor("h1", "az-a", 0, "vm1", "vm2"),
+		*newHypervisor("h2", "az-a", 0, "vm3"),
+		*newHypervisor("h3", "az-b", 0, "vm4"),
+	}
+	if got := countInstancesInAZ(hvs, "az-a"); got != 3 {
+		t.Errorf("countInstancesInAZ(az-a) = %d, want 3", got)
+	}
+	if got := countInstancesInAZ(hvs, "az-b"); got != 1 {
+		t.Errorf("countInstancesInAZ(az-b) = %d, want 1", got)
+	}
+	if got := countInstancesInAZ(hvs, "az-c"); got != 0 {
+		t.Errorf("countInstancesInAZ(az-c) = %d, want 0", got)
+	}
+}
+
+// --- integration-style tests for reconcileOne ---
+
+func TestReconcileOne_CreatesCRD(t *testing.T) {
+	const (
+		groupName    = "2101"
+		az           = "qa-de-1a"
+		memMB        = 4096                      // 4 GiB
+		memBytes     = int64(memMB) * 1024 * 1024
+	)
+
+	scheme := newTestScheme(t)
+	hv := newHypervisor("host-1", az, memBytes, "vm1")
+	knowledge := newFlavorGroupKnowledge(t, groupName, memMB)
+
+	fakeClient := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithObjects(knowledge, hv).
+		WithStatusSubresource(&v1alpha1.FlavorGroupCapacity{}, &v1alpha1.Knowledge{}).
+		Build()
+
+	// Both probes return host-1 so capacity = floor(4GiB/4GiB) = 1
+	schedulerServer := newMockSchedulerServer(t, []string{"host-1"})
+	defer schedulerServer.Close()
+
+	ctrl := NewController(fakeClient, Config{
+		SchedulerURL:      schedulerServer.URL,
+		TotalPipeline:     "kvm-report-capacity",
+		PlaceablePipeline: "kvm-general-purpose",
+	})
+
+	groupData := compute.FlavorGroupFeature{
+		SmallestFlavor: compute.FlavorInGroup{Name: groupName + "-small", MemoryMB: memMB},
+	}
+	hvByName := map[string]hv1.Hypervisor{"host-1": *hv}
+
+	if err := ctrl.reconcileOne(context.Background(), groupName, groupData, az, hvByName, []hv1.Hypervisor{*hv}); err != nil {
+		t.Fatalf("reconcileOne failed: %v", err)
+	}
+
+	// Verify CRD was created with correct status
+	var crd v1alpha1.FlavorGroupCapacity
+	if err := fakeClient.Get(context.Background(), types.NamespacedName{Name: crdNameFor(groupName, az)}, &crd); err != nil {
+		t.Fatalf("failed to get CRD: %v", err)
+	}
+	if crd.Status.TotalCapacity != 1 {
+		t.Errorf("TotalCapacity = %d, want 1", crd.Status.TotalCapacity)
+	}
+	if crd.Status.TotalHosts != 1 {
+		t.Errorf("TotalHosts = %d, want 1", crd.Status.TotalHosts)
+	}
+	if crd.Status.TotalInstances != 1 {
+		t.Errorf("TotalInstances = %d, want 1", crd.Status.TotalInstances)
+	}
+	if crd.Status.TotalPlaceable != 1 {
+		t.Errorf("TotalPlaceable = %d, want 1", crd.Status.TotalPlaceable)
+	}
+}
+
+func TestReconcileOne_SetsFreshConditionFalseOnSchedulerError(t *testing.T) {
+	const (
+		groupName = "2101"
+		az        = "qa-de-1a"
+		memMB     = 2048
+	)
+
+	scheme := newTestScheme(t)
+	knowledge := newFlavorGroupKnowledge(t, groupName, memMB)
+
+	fakeClient := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithObjects(knowledge).
+		WithStatusSubresource(&v1alpha1.FlavorGroupCapacity{}, &v1alpha1.Knowledge{}).
+		Build()
+
+	// Scheduler returns 500 to simulate error
+	failServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		w.WriteHeader(http.StatusInternalServerError)
+	}))
+	defer failServer.Close()
+
+	ctrl := NewController(fakeClient, Config{
+		SchedulerURL:      failServer.URL,
+		TotalPipeline:     "kvm-report-capacity",
+		PlaceablePipeline: "kvm-general-purpose",
+	})
+
+	groupData := compute.FlavorGroupFeature{
+		SmallestFlavor: compute.FlavorInGroup{Name: groupName + "-small", MemoryMB: memMB},
+	}
+
+	// reconcileOne returns no error itself (it continues on probe failure), but sets Fresh=False
+	if err := ctrl.reconcileOne(context.Background(), groupName, groupData, az, map[string]hv1.Hypervisor{}, []hv1.Hypervisor{}); err != nil {
+		t.Fatalf("reconcileOne failed: %v", err)
+	}
+
+	var crd v1alpha1.FlavorGroupCapacity
+	if err := fakeClient.Get(context.Background(), types.NamespacedName{Name: crdNameFor(groupName, az)}, &crd); err != nil {
+		t.Fatalf("failed to get CRD: %v", err)
+	}
+
+	var freshStatus metav1.ConditionStatus
+	for _, c := range crd.Status.Conditions {
+		if c.Type == v1alpha1.FlavorGroupCapacityConditionFresh {
+			freshStatus = c.Status
+		}
+	}
+	if freshStatus != metav1.ConditionFalse {
+		t.Errorf("Fresh condition = %q, want %q", freshStatus, metav1.ConditionFalse)
+	}
+}
+
+func TestReconcileOne_IdempotentUpdate(t *testing.T) {
+	const (
+		groupName = "2101"
+		az        = "qa-de-1a"
+		memMB     = 2048
+		memBytes  = int64(memMB) * 1024 * 1024
+	)
+
+	scheme := newTestScheme(t)
+	hv := newHypervisor("host-1", az, memBytes)
+	knowledge := newFlavorGroupKnowledge(t, groupName, memMB)
+	crdName := crdNameFor(groupName, az)
+
+	// Pre-create the CRD to test the update path (not create path)
+	existing := &v1alpha1.FlavorGroupCapacity{
+		ObjectMeta: metav1.ObjectMeta{Name: crdName},
+		Spec: v1alpha1.FlavorGroupCapacitySpec{
+			FlavorGroup:      groupName,
+			AvailabilityZone: az,
+		},
+	}
+
+	fakeClient := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithObjects(knowledge, hv, existing).
+		WithStatusSubresource(&v1alpha1.FlavorGroupCapacity{}, &v1alpha1.Knowledge{}).
+		Build()
+
+	schedulerServer := newMockSchedulerServer(t, []string{"host-1"})
+	defer schedulerServer.Close()
+
+	ctrl := NewController(fakeClient, Config{
+		SchedulerURL:      schedulerServer.URL,
+		TotalPipeline:     "kvm-report-capacity",
+		PlaceablePipeline: "kvm-general-purpose",
+	})
+
+	groupData := compute.FlavorGroupFeature{
+		SmallestFlavor: compute.FlavorInGroup{Name: groupName + "-small", MemoryMB: memMB},
+	}
+	hvByName := map[string]hv1.Hypervisor{"host-1": *hv}
+
+	// First call
+	if err := ctrl.reconcileOne(context.Background(), groupName, groupData, az, hvByName, []hv1.Hypervisor{*hv}); err != nil {
+		t.Fatalf("first reconcileOne failed: %v", err)
+	}
+	// Second call — should not error on the already-existing CRD
+	if err := ctrl.reconcileOne(context.Background(), groupName, groupData, az, hvByName, []hv1.Hypervisor{*hv}); err != nil {
+		t.Fatalf("second reconcileOne failed: %v", err)
+	}
+
+	var crd v1alpha1.FlavorGroupCapacity
+	if err := fakeClient.Get(context.Background(), types.NamespacedName{Name: crdName}, &crd); err != nil {
+		t.Fatalf("failed to get CRD: %v", err)
+	}
+	if crd.Status.TotalCapacity != 1 {
+		t.Errorf("TotalCapacity = %d, want 1", crd.Status.TotalCapacity)
+	}
+}
+
+func TestReconcileAll_SkipsGroupsWithNoAZs(t *testing.T) {
+	scheme := newTestScheme(t)
+	knowledge := newFlavorGroupKnowledge(t, "2101", 2048)
+
+	// No hypervisors → no AZs → reconcileAll returns without error
+	fakeClient := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithObjects(knowledge).
+		WithStatusSubresource(&v1alpha1.FlavorGroupCapacity{}, &v1alpha1.Knowledge{}).
+		Build()
+
+	ctrl := NewController(fakeClient, Config{
+		SchedulerURL:      "http://localhost:9999", // unreachable; not called
+		TotalPipeline:     "kvm-report-capacity",
+		PlaceablePipeline: "kvm-general-purpose",
+	})
+
+	if err := ctrl.reconcileAll(context.Background()); err != nil {
+		t.Errorf("reconcileAll with no hypervisors returned error: %v", err)
+	}
+
+	var list v1alpha1.FlavorGroupCapacityList
+	if err := fakeClient.List(context.Background(), &list); err != nil {
+		t.Fatalf("failed to list CRDs: %v", err)
+	}
+	if len(list.Items) != 0 {
+		t.Errorf("expected 0 CRDs, got %d", len(list.Items))
+	}
+}
+
+func TestProbeScheduler_CapacityCalculation(t *testing.T) {
+	const memMB = 4096
+	const memBytes = int64(memMB) * 1024 * 1024
+
+	scheme := newTestScheme(t)
+	hv1Obj := newHypervisor("host-1", "az-a", memBytes)
+	hv2Obj := newHypervisor("host-2", "az-a", memBytes*2) // 2x memory
+
+	fakeClient := fake.NewClientBuilder().WithScheme(scheme).Build()
+
+	// Scheduler returns both hosts
+	srv := newMockSchedulerServer(t, []string{"host-1", "host-2"})
+	defer srv.Close()
+
+	c := NewController(fakeClient, Config{SchedulerURL: srv.URL})
+	hvByName := map[string]hv1.Hypervisor{
+		"host-1": *hv1Obj,
+		"host-2": *hv2Obj,
+	}
+	flavor := compute.FlavorInGroup{Name: "test-flavor", MemoryMB: memMB}
+
+	capacity, hosts, err := c.probeScheduler(context.Background(), flavor, "az-a", "test-pipeline", hvByName, memBytes)
+	if err != nil {
+		t.Fatalf("probeScheduler failed: %v", err)
+	}
+	if hosts != 2 {
+		t.Errorf("hosts = %d, want 2", hosts)
+	}
+	// host-1 = 1 slot (4GiB/4GiB), host-2 = 2 slots (8GiB/4GiB)
+	if capacity != 3 {
+		t.Errorf("capacity = %d, want 3", capacity)
+	}
+}
+
+func TestReconcileAll_MultipleGroupsAndAZs(t *testing.T) {
+	scheme := newTestScheme(t)
+
+	const memMB = 2048
+	const memBytes = int64(memMB) * 1024 * 1024
+
+	// Two AZs, two hypervisors
+	hv1Obj := newHypervisor("h1", "az-a", memBytes)
+	hv2Obj := newHypervisor("h2", "az-b", memBytes)
+	knowledge := newFlavorGroupKnowledge(t, "2101", memMB)
+
+	fakeClient := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithObjects(knowledge, hv1Obj, hv2Obj).
+		WithStatusSubresource(&v1alpha1.FlavorGroupCapacity{}, &v1alpha1.Knowledge{}).
+		Build()
+
+	srv := newMockSchedulerServer(t, []string{})
+	defer srv.Close()
+
+	c := NewController(fakeClient, Config{
+		SchedulerURL:      srv.URL,
+		TotalPipeline:     "kvm-report-capacity",
+		PlaceablePipeline: "kvm-general-purpose",
+	})
+
+	if err := c.reconcileAll(context.Background()); err != nil {
+		t.Fatalf("reconcileAll failed: %v", err)
+	}
+
+	// Expect one CRD per AZ for the single group
+	var list v1alpha1.FlavorGroupCapacityList
+	if err := fakeClient.List(context.Background(), &list); err != nil {
+		t.Fatalf("failed to list CRDs: %v", err)
+	}
+	if len(list.Items) != 2 {
+		names := make([]string, len(list.Items))
+		for i, item := range list.Items {
+			names[i] = item.Name
+		}
+		t.Errorf("expected 2 CRDs (one per AZ), got %d: %v", len(list.Items), names)
+	}
+}
+
+func TestReconcileAll_FlavorGroupsKnowledgeNotReady(t *testing.T) {
+	scheme := newTestScheme(t)
+
+	// Knowledge CRD exists but is not Ready
+	knowledge := &v1alpha1.Knowledge{
+		ObjectMeta: metav1.ObjectMeta{Name: "flavor-groups"},
+		Spec: v1alpha1.KnowledgeSpec{
+			SchedulingDomain: v1alpha1.SchedulingDomainNova,
+			Extractor:        v1alpha1.KnowledgeExtractorSpec{Name: "flavor_groups"},
+		},
+		Status: v1alpha1.KnowledgeStatus{
+			Conditions: []metav1.Condition{
+				{
+					Type:   v1alpha1.KnowledgeConditionReady,
+					Status: metav1.ConditionFalse,
+					Reason: "NotReady",
+				},
+			},
+		},
+	}
+
+	fakeClient := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithObjects(knowledge).
+		WithStatusSubresource(&v1alpha1.Knowledge{}).
+		Build()
+
+	c := NewController(fakeClient, Config{
+		SchedulerURL:      "http://localhost:9999",
+		TotalPipeline:     "kvm-report-capacity",
+		PlaceablePipeline: "kvm-general-purpose",
+	})
+
+	// Should return an error when knowledge is not ready
+	if err := c.reconcileAll(context.Background()); err == nil {
+		t.Error("reconcileAll should fail when flavor groups knowledge is not ready")
+	}
+}
+
+func TestReconcileOne_ZeroMemoryFlavorReturnsError(t *testing.T) {
+	scheme := newTestScheme(t)
+	fakeClient := fake.NewClientBuilder().WithScheme(scheme).Build()
+	c := NewController(fakeClient, Config{})
+
+	groupData := compute.FlavorGroupFeature{
+		SmallestFlavor: compute.FlavorInGroup{Name: "bad-flavor", MemoryMB: 0},
+	}
+	err := c.reconcileOne(context.Background(), "2101", groupData, "az-a", nil, nil)
+	if err == nil {
+		t.Error("expected error for zero-memory flavor")
+	}
+}
+
+// Verify that the module-level log variable from reservations package doesn't
+// collide with the one in this package.
+func TestPackageLogVar(t *testing.T) {
+	_ = reservations.NewSchedulerClient("http://localhost")
+}
diff --git a/internal/scheduling/reservations/capacity/metrics.go b/internal/scheduling/reservations/capacity/metrics.go
new file mode 100644
index 000000000..698d0ab9e
--- /dev/null
+++ b/internal/scheduling/reservations/capacity/metrics.go
@@ -0,0 +1,104 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package capacity
+
+import (
+	"context"
+
+	"github.com/cobaltcore-dev/cortex/api/v1alpha1"
+	"github.com/prometheus/client_golang/prometheus"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+)
+
+var capacityLabels = []string{"flavor_group", "az"}
+
+// Monitor provides Prometheus metrics for FlavorGroupCapacity CRDs.
+// It implements prometheus.Collector and reads CRD status on each Collect call.
+type Monitor struct {
+	client            client.Client
+	totalCapacity     *prometheus.GaugeVec
+	totalPlaceable    *prometheus.GaugeVec
+	totalHosts        *prometheus.GaugeVec
+	placeableHosts    *prometheus.GaugeVec
+	totalInstances    *prometheus.GaugeVec
+	committedCapacity *prometheus.GaugeVec
+}
+
+// NewMonitor creates a new Monitor that reads FlavorGroupCapacity CRDs.
+func NewMonitor(c client.Client) Monitor {
+	return Monitor{
+		client: c,
+		totalCapacity: prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Name: "cortex_committed_resource_capacity_total",
+			Help: "Total schedulable slots in an empty-datacenter scenario per flavor group and AZ.",
+		}, capacityLabels),
+		totalPlaceable: prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Name: "cortex_committed_resource_capacity_placeable",
+			Help: "Schedulable slots remaining given current VM allocations per flavor group and AZ.",
+		}, capacityLabels),
+		totalHosts: prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Name: "cortex_committed_resource_capacity_hosts_total",
+			Help: "Number of hosts eligible for this flavor group in the empty-state probe.",
+		}, capacityLabels),
+		placeableHosts: prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Name: "cortex_committed_resource_capacity_hosts_placeable",
+			Help: "Number of hosts still able to accept a new smallest-flavor VM.",
+		}, capacityLabels),
+		totalInstances: prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Name: "cortex_committed_resource_capacity_instances",
+			Help: "Total VM instances running on hypervisors in this AZ (not filtered by flavor group).",
+		}, capacityLabels),
+		committedCapacity: prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Name: "cortex_committed_resource_capacity_committed",
+			Help: "Sum of AcceptedAmount across Ready CommittedResource CRDs for this flavor group and AZ.",
+		}, capacityLabels),
+	}
+}
+
+// Describe implements prometheus.Collector.
+func (m *Monitor) Describe(ch chan<- *prometheus.Desc) {
+	m.totalCapacity.Describe(ch)
+	m.totalPlaceable.Describe(ch)
+	m.totalHosts.Describe(ch)
+	m.placeableHosts.Describe(ch)
+	m.totalInstances.Describe(ch)
+	m.committedCapacity.Describe(ch)
+}
+
+// Collect implements prometheus.Collector — lists all FlavorGroupCapacity CRDs and exports gauges.
+func (m *Monitor) Collect(ch chan<- prometheus.Metric) {
+	var list v1alpha1.FlavorGroupCapacityList
+	if err := m.client.List(context.Background(), &list); err != nil {
+		log.Error(err, "failed to list FlavorGroupCapacity CRDs for metrics")
+		return
+	}
+
+	// Reset all gauges so deleted CRDs don't linger.
+	m.totalCapacity.Reset()
+	m.totalPlaceable.Reset()
+	m.totalHosts.Reset()
+	m.placeableHosts.Reset()
+	m.totalInstances.Reset()
+	m.committedCapacity.Reset()
+
+	for _, c := range list.Items {
+		labels := prometheus.Labels{
+			"flavor_group": c.Spec.FlavorGroup,
+			"az":           c.Spec.AvailabilityZone,
+		}
+		m.totalCapacity.With(labels).Set(float64(c.Status.TotalCapacity))
+		m.totalPlaceable.With(labels).Set(float64(c.Status.TotalPlaceable))
+		m.totalHosts.With(labels).Set(float64(c.Status.TotalHosts))
+		m.placeableHosts.With(labels).Set(float64(c.Status.PlaceableHosts))
+		m.totalInstances.With(labels).Set(float64(c.Status.TotalInstances))
+		m.committedCapacity.With(labels).Set(float64(c.Status.CommittedCapacity))
+	}
+
+	m.totalCapacity.Collect(ch)
+	m.totalPlaceable.Collect(ch)
+	m.totalHosts.Collect(ch)
+	m.placeableHosts.Collect(ch)
+	m.totalInstances.Collect(ch)
+	m.committedCapacity.Collect(ch)
+}

From 3411dd01f89917bf908112a6869e678044237be8 Mon Sep 17 00:00:00 2001
From: Julius Clausnitzer <julius.clausnitzer@sap.com>
Date: Tue, 28 Apr 2026 14:22:18 +0200
Subject: [PATCH 02/15] helm and rbac

---
 helm/bundles/cortex-nova/values.yaml          |   1 +
 .../cortex.cloud_flavorgroupcapacities.yaml   | 166 ++++++++++++++++++
 helm/library/cortex/templates/rbac/role.yaml  |   3 +
 3 files changed, 170 insertions(+)
 create mode 100644 helm/library/cortex/files/crds/cortex.cloud_flavorgroupcapacities.yaml

diff --git a/helm/bundles/cortex-nova/values.yaml b/helm/bundles/cortex-nova/values.yaml
index c40849739..f709bea5f 100644
--- a/helm/bundles/cortex-nova/values.yaml
+++ b/helm/bundles/cortex-nova/values.yaml
@@ -133,6 +133,7 @@ cortex-scheduling-controllers:
       - hypervisor-overcommit-controller
       - committed-resource-reservations-controller
       - failover-reservations-controller
+      - capacity-controller
     enabledTasks:
       - nova-history-cleanup-task
     # If true, the external scheduler API will limit the list of hosts in its
diff --git a/helm/library/cortex/files/crds/cortex.cloud_flavorgroupcapacities.yaml b/helm/library/cortex/files/crds/cortex.cloud_flavorgroupcapacities.yaml
new file mode 100644
index 000000000..a3bc11fbe
--- /dev/null
+++ b/helm/library/cortex/files/crds/cortex.cloud_flavorgroupcapacities.yaml
@@ -0,0 +1,166 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.20.1
+  name: flavorgroupcapacities.cortex.cloud
+spec:
+  group: cortex.cloud
+  names:
+    kind: FlavorGroupCapacity
+    listKind: FlavorGroupCapacityList
+    plural: flavorgroupcapacities
+    singular: flavorgroupcapacity
+  scope: Cluster
+  versions:
+  - additionalPrinterColumns:
+    - jsonPath: .spec.flavorGroup
+      name: FlavorGroup
+      type: string
+    - jsonPath: .spec.availabilityZone
+      name: AZ
+      type: string
+    - jsonPath: .status.totalCapacity
+      name: TotalCapacity
+      type: integer
+    - jsonPath: .status.totalPlaceable
+      name: TotalPlaceable
+      type: integer
+    - jsonPath: .status.totalHosts
+      name: TotalHosts
+      type: integer
+    - jsonPath: .status.lastReconcileAt
+      name: LastReconcile
+      type: date
+    - jsonPath: .status.conditions[?(@.type=='Ready')].status
+      name: Ready
+      type: string
+    name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: |-
+          FlavorGroupCapacity caches pre-computed capacity data for one flavor group in one AZ.
+          One CRD exists per (flavor group × AZ) pair, updated by the capacity controller on a fixed interval.
+          The capacity API reads these CRDs instead of probing the scheduler on each request.
+        properties:
+          apiVersion:
+            description: APIVersion defines the versioned schema of this representation of an object.
+            type: string
+          kind:
+            description: Kind is a string value representing the REST resource this object represents.
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: FlavorGroupCapacitySpec defines the desired state of FlavorGroupCapacity.
+            properties:
+              availabilityZone:
+                description: AvailabilityZone is the OpenStack AZ this capacity data covers (e.g. "qa-de-1a").
+                type: string
+              flavorGroup:
+                description: FlavorGroup is the name of the flavor group (e.g. "2101").
+                type: string
+            required:
+            - availabilityZone
+            - flavorGroup
+            type: object
+          status:
+            description: FlavorGroupCapacityStatus defines the observed state of FlavorGroupCapacity.
+            properties:
+              committedCapacity:
+                description: CommittedCapacity is the sum of AcceptedAmount across Ready=True CommittedResource CRDs.
+                format: int64
+                minimum: 0
+                type: integer
+              conditions:
+                description: |-
+                  Conditions represent the current state of the FlavorGroupCapacity.
+                  The Ready condition indicates whether the status data is up-to-date.
+                items:
+                  description: Condition contains details for one aspect of the current state of this API Resource.
+                  properties:
+                    lastTransitionTime:
+                      description: lastTransitionTime is the last time the condition transitioned from one status to another.
+                      format: date-time
+                      type: string
+                    message:
+                      description: message is a human readable message indicating details about the transition.
+                      maxLength: 32768
+                      type: string
+                    observedGeneration:
+                      description: observedGeneration represents the .metadata.generation that the condition was set based upon.
+                      format: int64
+                      minimum: 0
+                      type: integer
+                    reason:
+                      description: reason contains a programmatic identifier indicating the reason for the condition's last transition.
+                      maxLength: 1024
+                      minLength: 1
+                      pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+                      type: string
+                    status:
+                      description: status of the condition, one of True, False, Unknown.
+                      enum:
+                      - "True"
+                      - "False"
+                      - Unknown
+                      type: string
+                    type:
+                      description: type of condition in CamelCase or in foo.example.com/CamelCase.
+                      maxLength: 316
+                      pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+                      type: string
+                  required:
+                  - lastTransitionTime
+                  - message
+                  - reason
+                  - status
+                  - type
+                  type: object
+                type: array
+                x-kubernetes-list-map-keys:
+                - type
+                x-kubernetes-list-type: map
+              lastReconcileAt:
+                description: LastReconcileAt is the timestamp of the last successful reconcile.
+                format: date-time
+                type: string
+              placeableHosts:
+                description: PlaceableHosts is the number of hosts still able to accept a new smallest-flavor VM.
+                format: int64
+                minimum: 0
+                type: integer
+              totalCapacity:
+                description: |-
+                  TotalCapacity is the total schedulable slots in an empty-datacenter scenario.
+                  Computed as sum of floor(EffectiveCapacity.Memory / smallestFlavorMemory) across
+                  all hosts eligible for this flavor group (empty-state scheduler probe).
+                format: int64
+                minimum: 0
+                type: integer
+              totalHosts:
+                description: TotalHosts is the number of hosts eligible for this flavor group in the empty-state probe.
+                format: int64
+                minimum: 0
+                type: integer
+              totalInstances:
+                description: |-
+                  TotalInstances is the total number of VM instances running on hypervisors in this AZ,
+                  derived from Hypervisor CRD Status.Instances (not filtered by flavor group).
+                format: int64
+                minimum: 0
+                type: integer
+              totalPlaceable:
+                description: TotalPlaceable is the schedulable slots remaining given current VM allocations.
+                format: int64
+                minimum: 0
+                type: integer
+            type: object
+        required:
+        - spec
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
diff --git a/helm/library/cortex/templates/rbac/role.yaml b/helm/library/cortex/templates/rbac/role.yaml
index ea75c6897..6b89a2e33 100644
--- a/helm/library/cortex/templates/rbac/role.yaml
+++ b/helm/library/cortex/templates/rbac/role.yaml
@@ -14,6 +14,7 @@ rules:
   - datasources
   - reservations
   - committedresources
+  - flavorgroupcapacities
   - decisions
   - deschedulings
   - pipelines
@@ -34,6 +35,7 @@ rules:
   - datasources/finalizers
   - reservations/finalizers
   - committedresources/finalizers
+  - flavorgroupcapacities/finalizers
   - decisions/finalizers
   - deschedulings/finalizers
   - pipelines/finalizers
@@ -48,6 +50,7 @@ rules:
   - datasources/status
   - reservations/status
   - committedresources/status
+  - flavorgroupcapacities/status
   - decisions/status
   - deschedulings/status
   - pipelines/status

From fc28ac8210a9a4be84ae27d380bae23eeae7175f Mon Sep 17 00:00:00 2001
From: Julius Clausnitzer <julius.clausnitzer@sap.com>
Date: Tue, 28 Apr 2026 14:23:11 +0200
Subject: [PATCH 03/15] adjusting to CommittedResource CRD

---
 api/v1alpha1/flavor_group_capacity_types.go   | 25 +++----
 api/v1alpha1/zz_generated.deepcopy.go         |  2 +-
 .../reservations/capacity/controller.go       | 43 +++++++++++-
 .../reservations/capacity/controller_test.go  | 68 +++++++++++++++++--
 4 files changed, 114 insertions(+), 24 deletions(-)

diff --git a/api/v1alpha1/flavor_group_capacity_types.go b/api/v1alpha1/flavor_group_capacity_types.go
index 7911e1d09..edd04ca90 100644
--- a/api/v1alpha1/flavor_group_capacity_types.go
+++ b/api/v1alpha1/flavor_group_capacity_types.go
@@ -8,8 +8,8 @@ import (
 )
 
 const (
-	// FlavorGroupCapacityConditionFresh indicates the status data is up-to-date.
-	FlavorGroupCapacityConditionFresh = "Fresh"
+	// FlavorGroupCapacityConditionReady indicates the status data is up-to-date.
+	FlavorGroupCapacityConditionReady = "Ready"
 )
 
 // FlavorGroupCapacitySpec defines the desired state of FlavorGroupCapacity.
@@ -29,46 +29,36 @@ type FlavorGroupCapacityStatus struct {
 	// Computed as sum of floor(EffectiveCapacity.Memory / smallestFlavorMemory) across
 	// all hosts eligible for this flavor group (empty-state scheduler probe).
 	// +kubebuilder:validation:Optional
-	// +kubebuilder:validation:Minimum=0
 	TotalCapacity int64 `json:"totalCapacity,omitempty"`
 
 	// TotalHosts is the number of hosts eligible for this flavor group in the empty-state probe.
 	// +kubebuilder:validation:Optional
-	// +kubebuilder:validation:Minimum=0
 	TotalHosts int64 `json:"totalHosts,omitempty"`
 
 	// TotalPlaceable is the schedulable slots remaining given current VM allocations.
 	// Computed from the current-state scheduler probe.
 	// +kubebuilder:validation:Optional
-	// +kubebuilder:validation:Minimum=0
 	TotalPlaceable int64 `json:"totalPlaceable,omitempty"`
 
 	// PlaceableHosts is the number of hosts still able to accept a new smallest-flavor VM.
 	// +kubebuilder:validation:Optional
-	// +kubebuilder:validation:Minimum=0
 	PlaceableHosts int64 `json:"placeableHosts,omitempty"`
 
 	// TotalInstances is the total number of VM instances running on hypervisors in this AZ,
 	// derived from Hypervisor CRD Status.Instances (not filtered by flavor group).
 	// +kubebuilder:validation:Optional
-	// +kubebuilder:validation:Minimum=0
 	TotalInstances int64 `json:"totalInstances,omitempty"`
 
 	// CommittedCapacity is the sum of AcceptedAmount across Ready=True CommittedResource CRDs.
-	// TODO(BLI #337): populate once CommittedResource CRD exists.
 	// +kubebuilder:validation:Optional
-	// +kubebuilder:validation:Minimum=0
 	CommittedCapacity int64 `json:"committedCapacity,omitempty"`
 
 	// LastReconcileAt is the timestamp of the last successful reconcile.
 	// +kubebuilder:validation:Optional
 	LastReconcileAt metav1.Time `json:"lastReconcileAt,omitempty"`
 
-	// Conditions represent the current state of the FlavorGroupCapacity.
-	// The Fresh condition indicates whether the status data is up-to-date.
+	// The current status conditions of the FlavorGroupCapacity.
 	// +kubebuilder:validation:Optional
-	// +patchStrategy=merge
-	// +patchMergeKey=type
 	Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"`
 }
 
@@ -81,7 +71,7 @@ type FlavorGroupCapacityStatus struct {
 // +kubebuilder:printcolumn:name="TotalPlaceable",type="integer",JSONPath=".status.totalPlaceable"
 // +kubebuilder:printcolumn:name="TotalHosts",type="integer",JSONPath=".status.totalHosts"
 // +kubebuilder:printcolumn:name="LastReconcile",type="date",JSONPath=".status.lastReconcileAt"
-// +kubebuilder:printcolumn:name="Fresh",type="string",JSONPath=".status.conditions[?(@.type=='Fresh')].status"
+// +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].status"
 
 // FlavorGroupCapacity caches pre-computed capacity data for one flavor group in one AZ.
 // One CRD exists per (flavor group × AZ) pair, updated by the capacity controller on a fixed interval.
@@ -89,14 +79,17 @@ type FlavorGroupCapacityStatus struct {
 type FlavorGroupCapacity struct {
 	metav1.TypeMeta `json:",inline"`
 
+	// metadata is a standard object metadata
 	// +optional
-	metav1.ObjectMeta `json:"metadata,omitempty"`
+	metav1.ObjectMeta `json:"metadata,omitempty,omitzero"`
 
+	// spec defines the desired state of FlavorGroupCapacity
 	// +required
 	Spec FlavorGroupCapacitySpec `json:"spec"`
 
+	// status defines the observed state of FlavorGroupCapacity
 	// +optional
-	Status FlavorGroupCapacityStatus `json:"status,omitempty"`
+	Status FlavorGroupCapacityStatus `json:"status,omitempty,omitzero"`
 }
 
 // +kubebuilder:object:root=true
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
index be5683460..be8791809 100644
--- a/api/v1alpha1/zz_generated.deepcopy.go
+++ b/api/v1alpha1/zz_generated.deepcopy.go
@@ -807,7 +807,7 @@ func (in *FlavorGroupCapacityStatus) DeepCopyInto(out *FlavorGroupCapacityStatus
 	in.LastReconcileAt.DeepCopyInto(&out.LastReconcileAt)
 	if in.Conditions != nil {
 		in, out := &in.Conditions, &out.Conditions
-		*out = make([]metav1.Condition, len(*in))
+		*out = make([]v1.Condition, len(*in))
 		for i := range *in {
 			(*in)[i].DeepCopyInto(&(*out)[i])
 		}
diff --git a/internal/scheduling/reservations/capacity/controller.go b/internal/scheduling/reservations/capacity/controller.go
index 4ff2eadc4..00853904d 100644
--- a/internal/scheduling/reservations/capacity/controller.go
+++ b/internal/scheduling/reservations/capacity/controller.go
@@ -116,8 +116,11 @@ func (c *Controller) reconcileOne(
 	// Count total instances on hypervisors in this AZ.
 	totalInstances := countInstancesInAZ(allHVs, az)
 
-	// TODO(BLI #337): populate CommittedCapacity from Ready=True CommittedResource CRDs.
-	var committedCapacity int64
+	committedCapacity, committedErr := c.sumCommittedCapacity(ctx, groupName, az, smallestFlavorBytes)
+	if committedErr != nil {
+		log.Error(committedErr, "failed to sum committed capacity", "flavorGroup", groupName, "az", az)
+		committedCapacity = 0
+	}
 
 	crdName := crdNameFor(groupName, az)
 	fresh := totalErr == nil && placeableErr == nil
@@ -149,7 +152,7 @@ func (c *Controller) reconcileOne(
 	existing.Status.LastReconcileAt = metav1.Now()
 
 	freshCondition := metav1.Condition{
-		Type:               v1alpha1.FlavorGroupCapacityConditionFresh,
+		Type:               v1alpha1.FlavorGroupCapacityConditionReady,
 		ObservedGeneration: existing.Generation,
 	}
 	if fresh {
@@ -219,6 +222,40 @@ func (c *Controller) probeScheduler(
 	return capacity, hosts, nil
 }
 
+// sumCommittedCapacity sums AcceptedAmount (or Spec.Amount as fallback) across all
+// CommittedResource CRDs for the given (flavorGroup, az) pair with an active state
+// (guaranteed or confirmed) and resource type memory. Returns the total in slots.
+func (c *Controller) sumCommittedCapacity(ctx context.Context, groupName, az string, smallestFlavorBytes int64) (int64, error) {
+	var list v1alpha1.CommittedResourceList
+	if err := c.client.List(ctx, &list); err != nil {
+		return 0, fmt.Errorf("failed to list CommittedResources: %w", err)
+	}
+
+	var total int64
+	for _, cr := range list.Items {
+		if cr.Spec.FlavorGroupName != groupName {
+			continue
+		}
+		if cr.Spec.AvailabilityZone != az {
+			continue
+		}
+		if cr.Spec.ResourceType != v1alpha1.CommittedResourceTypeMemory {
+			continue
+		}
+		if cr.Spec.State != v1alpha1.CommitmentStatusGuaranteed && cr.Spec.State != v1alpha1.CommitmentStatusConfirmed {
+			continue
+		}
+		amount := cr.Spec.Amount
+		if cr.Status.AcceptedAmount != nil {
+			amount = *cr.Status.AcceptedAmount
+		}
+		if bytes := amount.Value(); bytes > 0 {
+			total += bytes / smallestFlavorBytes
+		}
+	}
+	return total, nil
+}
+
 // availabilityZones returns a sorted, deduplicated list of AZs from Hypervisor CRD labels.
 func availabilityZones(hvs []hv1.Hypervisor) []string {
 	azSet := make(map[string]struct{})
diff --git a/internal/scheduling/reservations/capacity/controller_test.go b/internal/scheduling/reservations/capacity/controller_test.go
index 3f3ceb6f0..7f9ae2fb3 100644
--- a/internal/scheduling/reservations/capacity/controller_test.go
+++ b/internal/scheduling/reservations/capacity/controller_test.go
@@ -16,6 +16,7 @@ import (
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/types"
+	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/client/fake"
 
 	schedulerapi "github.com/cobaltcore-dev/cortex/api/external/nova"
@@ -218,7 +219,7 @@ func TestReconcileOne_CreatesCRD(t *testing.T) {
 	}
 }
 
-func TestReconcileOne_SetsFreshConditionFalseOnSchedulerError(t *testing.T) {
+func TestReconcileOne_SetsReadyConditionFalseOnSchedulerError(t *testing.T) {
 	const (
 		groupName = "2101"
 		az        = "qa-de-1a"
@@ -250,7 +251,7 @@ func TestReconcileOne_SetsFreshConditionFalseOnSchedulerError(t *testing.T) {
 		SmallestFlavor: compute.FlavorInGroup{Name: groupName + "-small", MemoryMB: memMB},
 	}
 
-	// reconcileOne returns no error itself (it continues on probe failure), but sets Fresh=False
+	// reconcileOne returns no error itself (it continues on probe failure), but sets Ready=False
 	if err := ctrl.reconcileOne(context.Background(), groupName, groupData, az, map[string]hv1.Hypervisor{}, []hv1.Hypervisor{}); err != nil {
 		t.Fatalf("reconcileOne failed: %v", err)
 	}
@@ -262,12 +263,12 @@ func TestReconcileOne_SetsFreshConditionFalseOnSchedulerError(t *testing.T) {
 
 	var freshStatus metav1.ConditionStatus
 	for _, c := range crd.Status.Conditions {
-		if c.Type == v1alpha1.FlavorGroupCapacityConditionFresh {
+		if c.Type == v1alpha1.FlavorGroupCapacityConditionReady {
 			freshStatus = c.Status
 		}
 	}
 	if freshStatus != metav1.ConditionFalse {
-		t.Errorf("Fresh condition = %q, want %q", freshStatus, metav1.ConditionFalse)
+		t.Errorf("Ready condition = %q, want %q", freshStatus, metav1.ConditionFalse)
 	}
 }
 
@@ -497,3 +498,62 @@ func TestReconcileOne_ZeroMemoryFlavorReturnsError(t *testing.T) {
 func TestPackageLogVar(t *testing.T) {
 	_ = reservations.NewSchedulerClient("http://localhost")
 }
+
+func TestSumCommittedCapacity(t *testing.T) {
+	const (
+		groupName    = "2101"
+		az           = "qa-de-1a"
+		memMB        = 4096
+		memBytes     = int64(memMB) * 1024 * 1024
+	)
+
+	newCR := func(name, group, zone string, state v1alpha1.CommitmentStatus, resType v1alpha1.CommittedResourceType, amount string, acceptedAmount string) *v1alpha1.CommittedResource {
+		qty := resource.MustParse(amount)
+		cr := &v1alpha1.CommittedResource{
+			ObjectMeta: metav1.ObjectMeta{Name: name},
+			Spec: v1alpha1.CommittedResourceSpec{
+				FlavorGroupName:  group,
+				AvailabilityZone: zone,
+				State:            state,
+				ResourceType:     resType,
+				Amount:           qty,
+			},
+		}
+		if acceptedAmount != "" {
+			accepted := resource.MustParse(acceptedAmount)
+			cr.Status.AcceptedAmount = &accepted
+		}
+		return cr
+	}
+
+	scheme := newTestScheme(t)
+	objects := []client.Object{
+		// Should count: confirmed, memory, right group+AZ, AcceptedAmount set
+		newCR("cr1", groupName, az, v1alpha1.CommitmentStatusConfirmed, v1alpha1.CommittedResourceTypeMemory, "8Gi", "8Gi"),
+		// Should count: guaranteed, memory, right group+AZ, no AcceptedAmount → falls back to Spec.Amount
+		newCR("cr2", groupName, az, v1alpha1.CommitmentStatusGuaranteed, v1alpha1.CommittedResourceTypeMemory, "4Gi", ""),
+		// Should NOT count: wrong state
+		newCR("cr3", groupName, az, v1alpha1.CommitmentStatusPlanned, v1alpha1.CommittedResourceTypeMemory, "4Gi", ""),
+		// Should NOT count: wrong resource type
+		newCR("cr4", groupName, az, v1alpha1.CommitmentStatusConfirmed, v1alpha1.CommittedResourceTypeCores, "4Gi", ""),
+		// Should NOT count: wrong AZ
+		newCR("cr5", groupName, "other-az", v1alpha1.CommitmentStatusConfirmed, v1alpha1.CommittedResourceTypeMemory, "4Gi", ""),
+		// Should NOT count: wrong flavor group
+		newCR("cr6", "other-group", az, v1alpha1.CommitmentStatusConfirmed, v1alpha1.CommittedResourceTypeMemory, "4Gi", ""),
+	}
+
+	fakeClient := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithObjects(objects...).
+		Build()
+
+	c := NewController(fakeClient, Config{})
+	// smallestFlavorBytes = 4GiB → cr1 = 8GiB/4GiB = 2 slots, cr2 = 4GiB/4GiB = 1 slot → total = 3
+	got, err := c.sumCommittedCapacity(context.Background(), groupName, az, memBytes)
+	if err != nil {
+		t.Fatalf("sumCommittedCapacity failed: %v", err)
+	}
+	if got != 3 {
+		t.Errorf("sumCommittedCapacity = %d, want 3", got)
+	}
+}

From 742d604e463f784e9732f586a17db8608f0fd3f1 Mon Sep 17 00:00:00 2001
From: Julius Clausnitzer <julius.clausnitzer@sap.com>
Date: Tue, 28 Apr 2026 15:34:28 +0200
Subject: [PATCH 04/15] fix

---
 internal/scheduling/reservations/capacity/controller.go      | 2 +-
 internal/scheduling/reservations/capacity/controller_test.go | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/internal/scheduling/reservations/capacity/controller.go b/internal/scheduling/reservations/capacity/controller.go
index 00853904d..e5e8e352e 100644
--- a/internal/scheduling/reservations/capacity/controller.go
+++ b/internal/scheduling/reservations/capacity/controller.go
@@ -183,7 +183,7 @@ func (c *Controller) probeScheduler(
 	az, pipeline string,
 	hvByName map[string]hv1.Hypervisor,
 	smallestFlavorBytes int64,
-) (capacity int64, hosts int64, err error) {
+) (capacity, hosts int64, err error) {
 	resp, err := c.schedulerClient.ScheduleReservation(ctx, reservations.ScheduleReservationRequest{
 		InstanceUUID:     uuid.New().String(),
 		ProjectID:        "cortex-capacity-probe",
diff --git a/internal/scheduling/reservations/capacity/controller_test.go b/internal/scheduling/reservations/capacity/controller_test.go
index 7f9ae2fb3..ea8686c7e 100644
--- a/internal/scheduling/reservations/capacity/controller_test.go
+++ b/internal/scheduling/reservations/capacity/controller_test.go
@@ -405,7 +405,7 @@ func TestReconcileAll_MultipleGroupsAndAZs(t *testing.T) {
 	// Two AZs, two hypervisors
 	hv1Obj := newHypervisor("h1", "az-a", memBytes)
 	hv2Obj := newHypervisor("h2", "az-b", memBytes)
-	knowledge := newFlavorGroupKnowledge(t, "2101", memMB)
+	knowledge := newFlavorGroupKnowledge(t, "2152", memMB)
 
 	fakeClient := fake.NewClientBuilder().
 		WithScheme(scheme).

From b8057cb496c5e3a42c0facd5d4bac4657b19777a Mon Sep 17 00:00:00 2001
From: Julius Clausnitzer <julius.clausnitzer@sap.com>
Date: Tue, 28 Apr 2026 15:44:10 +0200
Subject: [PATCH 05/15] make crds deepcopy lint-fix

---
 api/v1alpha1/zz_generated.deepcopy.go         | 44 ++++++------
 .../cortex.cloud_flavorgroupcapacities.yaml   | 72 ++++++++++++-------
 .../reservations/capacity/controller.go       |  6 +-
 .../reservations/capacity/controller_test.go  | 20 +++---
 4 files changed, 81 insertions(+), 61 deletions(-)

diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
index be8791809..f995ec9be 100644
--- a/api/v1alpha1/zz_generated.deepcopy.go
+++ b/api/v1alpha1/zz_generated.deepcopy.go
@@ -727,6 +727,28 @@ func (in *FailoverReservationStatus) DeepCopy() *FailoverReservationStatus {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *FilterSpec) DeepCopyInto(out *FilterSpec) {
+	*out = *in
+	if in.Params != nil {
+		in, out := &in.Params, &out.Params
+		*out = make(Parameters, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FilterSpec.
+func (in *FilterSpec) DeepCopy() *FilterSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(FilterSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *FlavorGroupCapacity) DeepCopyInto(out *FlavorGroupCapacity) {
 	*out = *in
@@ -824,28 +846,6 @@ func (in *FlavorGroupCapacityStatus) DeepCopy() *FlavorGroupCapacityStatus {
 	return out
 }
 
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *FilterSpec) DeepCopyInto(out *FilterSpec) {
-	*out = *in
-	if in.Params != nil {
-		in, out := &in.Params, &out.Params
-		*out = make(Parameters, len(*in))
-		for i := range *in {
-			(*in)[i].DeepCopyInto(&(*out)[i])
-		}
-	}
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FilterSpec.
-func (in *FilterSpec) DeepCopy() *FilterSpec {
-	if in == nil {
-		return nil
-	}
-	out := new(FilterSpec)
-	in.DeepCopyInto(out)
-	return out
-}
-
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *History) DeepCopyInto(out *History) {
 	*out = *in
diff --git a/helm/library/cortex/files/crds/cortex.cloud_flavorgroupcapacities.yaml b/helm/library/cortex/files/crds/cortex.cloud_flavorgroupcapacities.yaml
index a3bc11fbe..efa690a12 100644
--- a/helm/library/cortex/files/crds/cortex.cloud_flavorgroupcapacities.yaml
+++ b/helm/library/cortex/files/crds/cortex.cloud_flavorgroupcapacities.yaml
@@ -45,18 +45,28 @@ spec:
           The capacity API reads these CRDs instead of probing the scheduler on each request.
         properties:
           apiVersion:
-            description: APIVersion defines the versioned schema of this representation of an object.
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
             type: string
           kind:
-            description: Kind is a string value representing the REST resource this object represents.
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
             type: string
           metadata:
             type: object
           spec:
-            description: FlavorGroupCapacitySpec defines the desired state of FlavorGroupCapacity.
+            description: spec defines the desired state of FlavorGroupCapacity
             properties:
               availabilityZone:
-                description: AvailabilityZone is the OpenStack AZ this capacity data covers (e.g. "qa-de-1a").
+                description: AvailabilityZone is the OpenStack AZ this capacity data
+                  covers (e.g. "qa-de-1a").
                 type: string
               flavorGroup:
                 description: FlavorGroup is the name of the flavor group (e.g. "2101").
@@ -66,35 +76,46 @@ spec:
             - flavorGroup
             type: object
           status:
-            description: FlavorGroupCapacityStatus defines the observed state of FlavorGroupCapacity.
+            description: status defines the observed state of FlavorGroupCapacity
             properties:
               committedCapacity:
-                description: CommittedCapacity is the sum of AcceptedAmount across Ready=True CommittedResource CRDs.
+                description: CommittedCapacity is the sum of AcceptedAmount across
+                  Ready=True CommittedResource CRDs.
                 format: int64
-                minimum: 0
                 type: integer
               conditions:
-                description: |-
-                  Conditions represent the current state of the FlavorGroupCapacity.
-                  The Ready condition indicates whether the status data is up-to-date.
+                description: The current status conditions of the FlavorGroupCapacity.
                 items:
-                  description: Condition contains details for one aspect of the current state of this API Resource.
+                  description: Condition contains details for one aspect of the current
+                    state of this API Resource.
                   properties:
                     lastTransitionTime:
-                      description: lastTransitionTime is the last time the condition transitioned from one status to another.
+                      description: |-
+                        lastTransitionTime is the last time the condition transitioned from one status to another.
+                        This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
                       format: date-time
                       type: string
                     message:
-                      description: message is a human readable message indicating details about the transition.
+                      description: |-
+                        message is a human readable message indicating details about the transition.
+                        This may be an empty string.
                       maxLength: 32768
                       type: string
                     observedGeneration:
-                      description: observedGeneration represents the .metadata.generation that the condition was set based upon.
+                      description: |-
+                        observedGeneration represents the .metadata.generation that the condition was set based upon.
+                        For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+                        with respect to the current state of the instance.
                       format: int64
                       minimum: 0
                       type: integer
                     reason:
-                      description: reason contains a programmatic identifier indicating the reason for the condition's last transition.
+                      description: |-
+                        reason contains a programmatic identifier indicating the reason for the condition's last transition.
+                        Producers of specific condition types may define expected values and meanings for this field,
+                        and whether the values are considered a guaranteed API.
+                        The value should be a CamelCase string.
+                        This field may not be empty.
                       maxLength: 1024
                       minLength: 1
                       pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
@@ -119,17 +140,15 @@ spec:
                   - type
                   type: object
                 type: array
-                x-kubernetes-list-map-keys:
-                - type
-                x-kubernetes-list-type: map
               lastReconcileAt:
-                description: LastReconcileAt is the timestamp of the last successful reconcile.
+                description: LastReconcileAt is the timestamp of the last successful
+                  reconcile.
                 format: date-time
                 type: string
               placeableHosts:
-                description: PlaceableHosts is the number of hosts still able to accept a new smallest-flavor VM.
+                description: PlaceableHosts is the number of hosts still able to accept
+                  a new smallest-flavor VM.
                 format: int64
-                minimum: 0
                 type: integer
               totalCapacity:
                 description: |-
@@ -137,24 +156,23 @@ spec:
                   Computed as sum of floor(EffectiveCapacity.Memory / smallestFlavorMemory) across
                   all hosts eligible for this flavor group (empty-state scheduler probe).
                 format: int64
-                minimum: 0
                 type: integer
               totalHosts:
-                description: TotalHosts is the number of hosts eligible for this flavor group in the empty-state probe.
+                description: TotalHosts is the number of hosts eligible for this flavor
+                  group in the empty-state probe.
                 format: int64
-                minimum: 0
                 type: integer
               totalInstances:
                 description: |-
                   TotalInstances is the total number of VM instances running on hypervisors in this AZ,
                   derived from Hypervisor CRD Status.Instances (not filtered by flavor group).
                 format: int64
-                minimum: 0
                 type: integer
               totalPlaceable:
-                description: TotalPlaceable is the schedulable slots remaining given current VM allocations.
+                description: |-
+                  TotalPlaceable is the schedulable slots remaining given current VM allocations.
+                  Computed from the current-state scheduler probe.
                 format: int64
-                minimum: 0
                 type: integer
             type: object
         required:
diff --git a/internal/scheduling/reservations/capacity/controller.go b/internal/scheduling/reservations/capacity/controller.go
index e5e8e352e..c6496227a 100644
--- a/internal/scheduling/reservations/capacity/controller.go
+++ b/internal/scheduling/reservations/capacity/controller.go
@@ -101,6 +101,7 @@ func (c *Controller) reconcileOne(
 	hvByName map[string]hv1.Hypervisor,
 	allHVs []hv1.Hypervisor,
 ) error {
+
 	smallestFlavor := groupData.SmallestFlavor
 	smallestFlavorBytes := int64(smallestFlavor.MemoryMB) * 1024 * 1024 //nolint:gosec
 	if smallestFlavorBytes <= 0 {
@@ -184,6 +185,7 @@ func (c *Controller) probeScheduler(
 	hvByName map[string]hv1.Hypervisor,
 	smallestFlavorBytes int64,
 ) (capacity, hosts int64, err error) {
+
 	resp, err := c.schedulerClient.ScheduleReservation(ctx, reservations.ScheduleReservationRequest{
 		InstanceUUID:     uuid.New().String(),
 		ProjectID:        "cortex-capacity-probe",
@@ -198,7 +200,7 @@ func (c *Controller) probeScheduler(
 		return 0, 0, fmt.Errorf("scheduler call failed (pipeline=%s): %w", pipeline, err)
 	}
 
-	hosts = int64(len(resp.Hosts)) //nolint:gosec
+	hosts = int64(len(resp.Hosts))
 	for _, hostName := range resp.Hosts {
 		hv, ok := hvByName[hostName]
 		if !ok {
@@ -279,7 +281,7 @@ func countInstancesInAZ(hvs []hv1.Hypervisor, az string) int64 {
 		if hv.Labels["topology.kubernetes.io/zone"] != az {
 			continue
 		}
-		total += int64(len(hv.Status.Instances)) //nolint:gosec
+		total += int64(len(hv.Status.Instances))
 	}
 	return total
 }
diff --git a/internal/scheduling/reservations/capacity/controller_test.go b/internal/scheduling/reservations/capacity/controller_test.go
index ea8686c7e..c75b3e5c7 100644
--- a/internal/scheduling/reservations/capacity/controller_test.go
+++ b/internal/scheduling/reservations/capacity/controller_test.go
@@ -128,8 +128,8 @@ func TestAvailabilityZones(t *testing.T) {
 	hvs := []hv1.Hypervisor{
 		*newHypervisor("h1", "az-a", 0),
 		*newHypervisor("h2", "az-b", 0),
-		*newHypervisor("h3", "az-a", 0), // duplicate
-		{ObjectMeta: metav1.ObjectMeta{Name: "h4"}},  // no label
+		*newHypervisor("h3", "az-a", 0),             // duplicate
+		{ObjectMeta: metav1.ObjectMeta{Name: "h4"}}, // no label
 	}
 	got := availabilityZones(hvs)
 	want := []string{"az-a", "az-b"}
@@ -165,10 +165,10 @@ func TestCountInstancesInAZ(t *testing.T) {
 
 func TestReconcileOne_CreatesCRD(t *testing.T) {
 	const (
-		groupName    = "2101"
-		az           = "qa-de-1a"
-		memMB        = 4096                      // 4 GiB
-		memBytes     = int64(memMB) * 1024 * 1024
+		groupName = "2101"
+		az        = "qa-de-1a"
+		memMB     = 4096 // 4 GiB
+		memBytes  = int64(memMB) * 1024 * 1024
 	)
 
 	scheme := newTestScheme(t)
@@ -501,10 +501,10 @@ func TestPackageLogVar(t *testing.T) {
 
 func TestSumCommittedCapacity(t *testing.T) {
 	const (
-		groupName    = "2101"
-		az           = "qa-de-1a"
-		memMB        = 4096
-		memBytes     = int64(memMB) * 1024 * 1024
+		groupName = "2101"
+		az        = "qa-de-1a"
+		memMB     = 4096
+		memBytes  = int64(memMB) * 1024 * 1024
 	)
 
 	newCR := func(name, group, zone string, state v1alpha1.CommitmentStatus, resType v1alpha1.CommittedResourceType, amount string, acceptedAmount string) *v1alpha1.CommittedResource {

From 19f3e9dc4467bb9c20b113041055fd75f4b3b966 Mon Sep 17 00:00:00 2001
From: Julius Clausnitzer <julius.clausnitzer@sap.com>
Date: Tue, 28 Apr 2026 15:57:36 +0200
Subject: [PATCH 06/15] small fix

---
 api/v1alpha1/flavor_group_capacity_types.go          |  2 +-
 .../crds/cortex.cloud_flavorgroupcapacities.yaml     |  2 +-
 .../reservations/capacity/controller_test.go         | 12 ++++++------
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/api/v1alpha1/flavor_group_capacity_types.go b/api/v1alpha1/flavor_group_capacity_types.go
index edd04ca90..0ad70f701 100644
--- a/api/v1alpha1/flavor_group_capacity_types.go
+++ b/api/v1alpha1/flavor_group_capacity_types.go
@@ -14,7 +14,7 @@ const (
 
 // FlavorGroupCapacitySpec defines the desired state of FlavorGroupCapacity.
 type FlavorGroupCapacitySpec struct {
-	// FlavorGroup is the name of the flavor group (e.g. "2101").
+	// FlavorGroup is the name of the flavor group (e.g. "hana-v2").
 	// +kubebuilder:validation:Required
 	FlavorGroup string `json:"flavorGroup"`
 
diff --git a/helm/library/cortex/files/crds/cortex.cloud_flavorgroupcapacities.yaml b/helm/library/cortex/files/crds/cortex.cloud_flavorgroupcapacities.yaml
index efa690a12..b41418895 100644
--- a/helm/library/cortex/files/crds/cortex.cloud_flavorgroupcapacities.yaml
+++ b/helm/library/cortex/files/crds/cortex.cloud_flavorgroupcapacities.yaml
@@ -69,7 +69,7 @@ spec:
                   covers (e.g. "qa-de-1a").
                 type: string
               flavorGroup:
-                description: FlavorGroup is the name of the flavor group (e.g. "2101").
+                description: FlavorGroup is the name of the flavor group (e.g. "hana-v2").
                 type: string
             required:
             - availabilityZone
diff --git a/internal/scheduling/reservations/capacity/controller_test.go b/internal/scheduling/reservations/capacity/controller_test.go
index c75b3e5c7..8d453ec33 100644
--- a/internal/scheduling/reservations/capacity/controller_test.go
+++ b/internal/scheduling/reservations/capacity/controller_test.go
@@ -165,7 +165,7 @@ func TestCountInstancesInAZ(t *testing.T) {
 
 func TestReconcileOne_CreatesCRD(t *testing.T) {
 	const (
-		groupName = "2101"
+		groupName = "hana-v2"
 		az        = "qa-de-1a"
 		memMB     = 4096 // 4 GiB
 		memBytes  = int64(memMB) * 1024 * 1024
@@ -221,7 +221,7 @@ func TestReconcileOne_CreatesCRD(t *testing.T) {
 
 func TestReconcileOne_SetsReadyConditionFalseOnSchedulerError(t *testing.T) {
 	const (
-		groupName = "2101"
+		groupName = "hana-v2"
 		az        = "qa-de-1a"
 		memMB     = 2048
 	)
@@ -274,7 +274,7 @@ func TestReconcileOne_SetsReadyConditionFalseOnSchedulerError(t *testing.T) {
 
 func TestReconcileOne_IdempotentUpdate(t *testing.T) {
 	const (
-		groupName = "2101"
+		groupName = "hana-v2"
 		az        = "qa-de-1a"
 		memMB     = 2048
 		memBytes  = int64(memMB) * 1024 * 1024
@@ -334,7 +334,7 @@ func TestReconcileOne_IdempotentUpdate(t *testing.T) {
 
 func TestReconcileAll_SkipsGroupsWithNoAZs(t *testing.T) {
 	scheme := newTestScheme(t)
-	knowledge := newFlavorGroupKnowledge(t, "2101", 2048)
+	knowledge := newFlavorGroupKnowledge(t, "hana-v2", 2048)
 
 	// No hypervisors → no AZs → reconcileAll returns without error
 	fakeClient := fake.NewClientBuilder().
@@ -487,7 +487,7 @@ func TestReconcileOne_ZeroMemoryFlavorReturnsError(t *testing.T) {
 	groupData := compute.FlavorGroupFeature{
 		SmallestFlavor: compute.FlavorInGroup{Name: "bad-flavor", MemoryMB: 0},
 	}
-	err := c.reconcileOne(context.Background(), "2101", groupData, "az-a", nil, nil)
+	err := c.reconcileOne(context.Background(), "hana-v2", groupData, "az-a", nil, nil)
 	if err == nil {
 		t.Error("expected error for zero-memory flavor")
 	}
@@ -501,7 +501,7 @@ func TestPackageLogVar(t *testing.T) {
 
 func TestSumCommittedCapacity(t *testing.T) {
 	const (
-		groupName = "2101"
+		groupName = "hana-v2"
 		az        = "qa-de-1a"
 		memMB     = 4096
 		memBytes  = int64(memMB) * 1024 * 1024

From 6436aa51d04da0a0bb0bb94ce07f32a0603c3822 Mon Sep 17 00:00:00 2001
From: Julius Clausnitzer <julius.clausnitzer@sap.com>
Date: Tue, 28 Apr 2026 16:37:28 +0200
Subject: [PATCH 07/15] pull over pipeline changes from old branch

---
 .../cortex-nova/templates/pipelines_kvm.yaml  | 40 +++++++++++++++++++
 helm/bundles/cortex-nova/values.yaml          |  6 +++
 .../filters/filter_has_enough_capacity.go     | 28 ++++++++-----
 3 files changed, 63 insertions(+), 11 deletions(-)

diff --git a/helm/bundles/cortex-nova/templates/pipelines_kvm.yaml b/helm/bundles/cortex-nova/templates/pipelines_kvm.yaml
index 561d9fc3c..8078c069b 100644
--- a/helm/bundles/cortex-nova/templates/pipelines_kvm.yaml
+++ b/helm/bundles/cortex-nova/templates/pipelines_kvm.yaml
@@ -557,4 +557,44 @@ spec:
         VM is allocated get a higher weight, encouraging placement on
         pre-reserved failover capacity. For non-evacuation requests, this
         weigher has no effect.
+---
+apiVersion: cortex.cloud/v1alpha1
+kind: Pipeline
+metadata:
+  name: kvm-report-capacity
+spec:
+  schedulingDomain: nova
+  description: |
+    This pipeline is used by the capacity controller to determine the
+    theoretical maximum capacity of each flavor group per availability zone,
+    as if all hosts were completely empty. It ignores current VM allocations
+    and all reservation blockings so that only raw hardware capacity is
+    considered.
+  type: filter-weigher
+  createDecisions: false
+  # Fetch all placement candidates, ignoring nova's preselection.
+  ignorePreselection: true
+  filters:
+    - name: filter_correct_az
+      description: |
+        Restricts host candidates to the requested availability zone.
+    - name: filter_has_enough_capacity
+      description: |
+        Filters hosts that cannot fit the flavor based on raw hardware capacity.
+        VM allocations and all reservation types are ignored to represent an
+        empty datacenter scenario.
+      params:
+        - {key: ignoreAllocations, boolValue: true}
+        - {key: ignoredReservationTypes, stringListValue: ["CommittedResourceReservation", "FailoverReservation"]}
+    - name: filter_has_requested_traits
+      description: |
+        Ensures hosts have the hardware traits required by the flavor.
+    - name: filter_capabilities
+      description: |
+        Ensures hosts meet the compute capabilities required by the flavor
+        extra specs (e.g., architecture, maxphysaddr bits).
+    - name: filter_status_conditions
+      description: |
+        Excludes hosts that are not ready or are disabled.
+  weighers: []
 {{- end }}
diff --git a/helm/bundles/cortex-nova/values.yaml b/helm/bundles/cortex-nova/values.yaml
index f709bea5f..e08f3f1d4 100644
--- a/helm/bundles/cortex-nova/values.yaml
+++ b/helm/bundles/cortex-nova/values.yaml
@@ -134,6 +134,12 @@ cortex-scheduling-controllers:
       - committed-resource-reservations-controller
       - failover-reservations-controller
       - capacity-controller
+    # Pipeline used for the empty-state capacity probe (ignores allocations and reservations).
+    capacityTotalPipeline: "kvm-report-capacity"
+    # Pipeline used for the current-state capacity probe (considers current VM allocations).
+    capacityPlaceablePipeline: "kvm-general-purpose-load-balancing"
+    # How often the capacity controller re-runs its scheduler probes.
+    capacityReconcileInterval: 5m
     enabledTasks:
       - nova-history-cleanup-task
     # If true, the external scheduler API will limit the list of hosts in its
diff --git a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go
index e6956609a..2ceb4944f 100644
--- a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go
+++ b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go
@@ -26,6 +26,10 @@ type FilterHasEnoughCapacityOpts struct {
 	// When a reservation type is in this list, its capacity is not blocked.
 	// Default: empty (all reservation types are considered)
 	IgnoredReservationTypes []v1alpha1.ReservationType `json:"ignoredReservationTypes,omitempty"`
+
+	// IgnoreAllocations skips subtracting current VM allocations from host capacity.
+	// When true, only raw hardware capacity is considered (empty datacenter scenario).
+	IgnoreAllocations bool `json:"ignoreAllocations,omitempty"`
 }
 
 func (FilterHasEnoughCapacityOpts) Validate() error { return nil }
@@ -71,18 +75,20 @@ func (s *FilterHasEnoughCapacity) Run(traceLog *slog.Logger, request api.Externa
 			freeResourcesByHost[hv.Name] = hv.Status.EffectiveCapacity
 		}
 
-		// Subtract allocated resources.
-		for resourceName, allocated := range hv.Status.Allocation {
-			free, ok := freeResourcesByHost[hv.Name][resourceName]
-			if !ok {
-				traceLog.Error(
-					"hypervisor with allocation for unknown resource",
-					"host", hv.Name, "resource", resourceName,
-				)
-				continue
+		// Subtract allocated resources (skip when ignoring allocations for empty-datacenter capacity queries).
+		if !s.Options.IgnoreAllocations {
+			for resourceName, allocated := range hv.Status.Allocation {
+				free, ok := freeResourcesByHost[hv.Name][resourceName]
+				if !ok {
+					traceLog.Error(
+						"hypervisor with allocation for unknown resource",
+						"host", hv.Name, "resource", resourceName,
+					)
+					continue
+				}
+				free.Sub(allocated)
+				freeResourcesByHost[hv.Name][resourceName] = free
 			}
-			free.Sub(allocated)
-			freeResourcesByHost[hv.Name][resourceName] = free
 		}
 	}
 

From 393a6c309c4e7d4e0099bb424f678805bad1d526 Mon Sep 17 00:00:00 2001
From: Julius Clausnitzer <julius.clausnitzer@sap.com>
Date: Tue, 5 May 2026 10:27:59 +0200
Subject: [PATCH 08/15] fix

---
 .../reservations/capacity/controller.go          | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/internal/scheduling/reservations/capacity/controller.go b/internal/scheduling/reservations/capacity/controller.go
index c6496227a..24767065b 100644
--- a/internal/scheduling/reservations/capacity/controller.go
+++ b/internal/scheduling/reservations/capacity/controller.go
@@ -144,12 +144,16 @@ func (c *Controller) reconcileOne(
 	}
 
 	patch := client.MergeFrom(existing.DeepCopy())
-	existing.Status.TotalCapacity = totalCapacity
-	existing.Status.TotalHosts = totalHosts
-	existing.Status.TotalPlaceable = totalPlaceable
-	existing.Status.PlaceableHosts = placeableHosts
-	existing.Status.TotalInstances = totalInstances
-	existing.Status.CommittedCapacity = committedCapacity
+	if totalErr == nil {
+		existing.Status.TotalCapacity = totalCapacity
+		existing.Status.TotalHosts = totalHosts
+		existing.Status.TotalInstances = totalInstances
+		existing.Status.CommittedCapacity = committedCapacity
+	}
+	if placeableErr == nil {
+		existing.Status.TotalPlaceable = totalPlaceable
+		existing.Status.PlaceableHosts = placeableHosts
+	}
 	existing.Status.LastReconcileAt = metav1.Now()
 
 	freshCondition := metav1.Condition{

From f63f317aa951666fcd1e17c3de7565076d6eca8d Mon Sep 17 00:00:00 2001
From: Julius Clausnitzer <julius.clausnitzer@sap.com>
Date: Tue, 5 May 2026 11:35:23 +0200
Subject: [PATCH 09/15] extending flavor group crd to every flavor

---
 api/v1alpha1/flavor_group_capacity_types.go   |  46 ++++---
 api/v1alpha1/zz_generated.deepcopy.go         |  20 +++
 .../cortex.cloud_flavorgroupcapacities.yaml   |  72 ++++++-----
 .../reservations/capacity/controller.go       | 119 +++++++++++-------
 .../reservations/capacity/controller_test.go  | 100 +++++++++++----
 .../reservations/capacity/metrics.go          |  87 +++++++------
 6 files changed, 281 insertions(+), 163 deletions(-)

diff --git a/api/v1alpha1/flavor_group_capacity_types.go b/api/v1alpha1/flavor_group_capacity_types.go
index 0ad70f701..a7339dce2 100644
--- a/api/v1alpha1/flavor_group_capacity_types.go
+++ b/api/v1alpha1/flavor_group_capacity_types.go
@@ -23,36 +23,44 @@ type FlavorGroupCapacitySpec struct {
 	AvailabilityZone string `json:"availabilityZone"`
 }
 
-// FlavorGroupCapacityStatus defines the observed state of FlavorGroupCapacity.
-type FlavorGroupCapacityStatus struct {
-	// TotalCapacity is the total schedulable slots in an empty-datacenter scenario.
-	// Computed as sum of floor(EffectiveCapacity.Memory / smallestFlavorMemory) across
-	// all hosts eligible for this flavor group (empty-state scheduler probe).
+// FlavorCapacityStatus holds per-flavor capacity numbers for one (flavor group × AZ) pair.
+type FlavorCapacityStatus struct {
+	// FlavorName is the OpenStack flavor name (e.g. "hana-v2-small").
+	FlavorName string `json:"flavorName"`
+
+	// PlaceableHosts is the number of hosts that can still fit this flavor given current allocations.
 	// +kubebuilder:validation:Optional
-	TotalCapacity int64 `json:"totalCapacity,omitempty"`
+	PlaceableHosts int64 `json:"placeableHosts,omitempty"`
 
-	// TotalHosts is the number of hosts eligible for this flavor group in the empty-state probe.
+	// PlaceableVMs is the number of VM slots remaining for this flavor given current allocations.
 	// +kubebuilder:validation:Optional
-	TotalHosts int64 `json:"totalHosts,omitempty"`
+	PlaceableVMs int64 `json:"placeableVms,omitempty"`
 
-	// TotalPlaceable is the schedulable slots remaining given current VM allocations.
-	// Computed from the current-state scheduler probe.
+	// TotalCapacityHosts is the number of eligible hosts in an empty-datacenter scenario.
 	// +kubebuilder:validation:Optional
-	TotalPlaceable int64 `json:"totalPlaceable,omitempty"`
+	TotalCapacityHosts int64 `json:"totalCapacityHosts,omitempty"`
 
-	// PlaceableHosts is the number of hosts still able to accept a new smallest-flavor VM.
+	// TotalCapacityVMSlots is the maximum number of VM slots in an empty-datacenter scenario.
 	// +kubebuilder:validation:Optional
-	PlaceableHosts int64 `json:"placeableHosts,omitempty"`
+	TotalCapacityVMSlots int64 `json:"totalCapacityVmSlots,omitempty"`
+}
 
-	// TotalInstances is the total number of VM instances running on hypervisors in this AZ,
-	// derived from Hypervisor CRD Status.Instances (not filtered by flavor group).
+// FlavorGroupCapacityStatus defines the observed state of FlavorGroupCapacity.
+type FlavorGroupCapacityStatus struct {
+	// Flavors holds per-flavor capacity data for all flavors in the group.
 	// +kubebuilder:validation:Optional
-	TotalInstances int64 `json:"totalInstances,omitempty"`
+	Flavors []FlavorCapacityStatus `json:"flavors,omitempty"`
 
-	// CommittedCapacity is the sum of AcceptedAmount across Ready=True CommittedResource CRDs.
+	// CommittedCapacity is the sum of AcceptedAmount across active CommittedResource CRDs,
+	// expressed in multiples of the smallest flavor's memory.
 	// +kubebuilder:validation:Optional
 	CommittedCapacity int64 `json:"committedCapacity,omitempty"`
 
+	// TotalInstances is the total number of VM instances running on hypervisors in this AZ,
+	// derived from Hypervisor CRD Status.Instances (not filtered by flavor group).
+	// +kubebuilder:validation:Optional
+	TotalInstances int64 `json:"totalInstances,omitempty"`
+
 	// LastReconcileAt is the timestamp of the last successful reconcile.
 	// +kubebuilder:validation:Optional
 	LastReconcileAt metav1.Time `json:"lastReconcileAt,omitempty"`
@@ -67,9 +75,7 @@ type FlavorGroupCapacityStatus struct {
 // +kubebuilder:resource:scope=Cluster
 // +kubebuilder:printcolumn:name="FlavorGroup",type="string",JSONPath=".spec.flavorGroup"
 // +kubebuilder:printcolumn:name="AZ",type="string",JSONPath=".spec.availabilityZone"
-// +kubebuilder:printcolumn:name="TotalCapacity",type="integer",JSONPath=".status.totalCapacity"
-// +kubebuilder:printcolumn:name="TotalPlaceable",type="integer",JSONPath=".status.totalPlaceable"
-// +kubebuilder:printcolumn:name="TotalHosts",type="integer",JSONPath=".status.totalHosts"
+// +kubebuilder:printcolumn:name="TotalInstances",type="integer",JSONPath=".status.totalInstances"
 // +kubebuilder:printcolumn:name="LastReconcile",type="date",JSONPath=".status.lastReconcileAt"
 // +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].status"
 
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
index f995ec9be..e75332b77 100644
--- a/api/v1alpha1/zz_generated.deepcopy.go
+++ b/api/v1alpha1/zz_generated.deepcopy.go
@@ -749,6 +749,21 @@ func (in *FilterSpec) DeepCopy() *FilterSpec {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *FlavorCapacityStatus) DeepCopyInto(out *FlavorCapacityStatus) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FlavorCapacityStatus.
+func (in *FlavorCapacityStatus) DeepCopy() *FlavorCapacityStatus {
+	if in == nil {
+		return nil
+	}
+	out := new(FlavorCapacityStatus)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *FlavorGroupCapacity) DeepCopyInto(out *FlavorGroupCapacity) {
 	*out = *in
@@ -826,6 +841,11 @@ func (in *FlavorGroupCapacitySpec) DeepCopy() *FlavorGroupCapacitySpec {
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *FlavorGroupCapacityStatus) DeepCopyInto(out *FlavorGroupCapacityStatus) {
 	*out = *in
+	if in.Flavors != nil {
+		in, out := &in.Flavors, &out.Flavors
+		*out = make([]FlavorCapacityStatus, len(*in))
+		copy(*out, *in)
+	}
 	in.LastReconcileAt.DeepCopyInto(&out.LastReconcileAt)
 	if in.Conditions != nil {
 		in, out := &in.Conditions, &out.Conditions
diff --git a/helm/library/cortex/files/crds/cortex.cloud_flavorgroupcapacities.yaml b/helm/library/cortex/files/crds/cortex.cloud_flavorgroupcapacities.yaml
index b41418895..5f475689e 100644
--- a/helm/library/cortex/files/crds/cortex.cloud_flavorgroupcapacities.yaml
+++ b/helm/library/cortex/files/crds/cortex.cloud_flavorgroupcapacities.yaml
@@ -21,14 +21,8 @@ spec:
     - jsonPath: .spec.availabilityZone
       name: AZ
       type: string
-    - jsonPath: .status.totalCapacity
-      name: TotalCapacity
-      type: integer
-    - jsonPath: .status.totalPlaceable
-      name: TotalPlaceable
-      type: integer
-    - jsonPath: .status.totalHosts
-      name: TotalHosts
+    - jsonPath: .status.totalInstances
+      name: TotalInstances
       type: integer
     - jsonPath: .status.lastReconcileAt
       name: LastReconcile
@@ -79,8 +73,9 @@ spec:
             description: status defines the observed state of FlavorGroupCapacity
             properties:
               committedCapacity:
-                description: CommittedCapacity is the sum of AcceptedAmount across
-                  Ready=True CommittedResource CRDs.
+                description: |-
+                  CommittedCapacity is the sum of AcceptedAmount across active CommittedResource CRDs,
+                  expressed in multiples of the smallest flavor's memory.
                 format: int64
                 type: integer
               conditions:
@@ -140,40 +135,51 @@ spec:
                   - type
                   type: object
                 type: array
+              flavors:
+                description: Flavors holds per-flavor capacity data for all flavors
+                  in the group.
+                items:
+                  description: FlavorCapacityStatus holds per-flavor capacity numbers
+                    for one (flavor group × AZ) pair.
+                  properties:
+                    flavorName:
+                      description: FlavorName is the OpenStack flavor name (e.g. "hana-v2-small").
+                      type: string
+                    placeableHosts:
+                      description: PlaceableHosts is the number of hosts that can
+                        still fit this flavor given current allocations.
+                      format: int64
+                      type: integer
+                    placeableVms:
+                      description: PlaceableVMs is the number of VM slots remaining
+                        for this flavor given current allocations.
+                      format: int64
+                      type: integer
+                    totalCapacityHosts:
+                      description: TotalCapacityHosts is the number of eligible hosts
+                        in an empty-datacenter scenario.
+                      format: int64
+                      type: integer
+                    totalCapacityVmSlots:
+                      description: TotalCapacityVMSlots is the maximum number of VM
+                        slots in an empty-datacenter scenario.
+                      format: int64
+                      type: integer
+                  required:
+                  - flavorName
+                  type: object
+                type: array
               lastReconcileAt:
                 description: LastReconcileAt is the timestamp of the last successful
                   reconcile.
                 format: date-time
                 type: string
-              placeableHosts:
-                description: PlaceableHosts is the number of hosts still able to accept
-                  a new smallest-flavor VM.
-                format: int64
-                type: integer
-              totalCapacity:
-                description: |-
-                  TotalCapacity is the total schedulable slots in an empty-datacenter scenario.
-                  Computed as sum of floor(EffectiveCapacity.Memory / smallestFlavorMemory) across
-                  all hosts eligible for this flavor group (empty-state scheduler probe).
-                format: int64
-                type: integer
-              totalHosts:
-                description: TotalHosts is the number of hosts eligible for this flavor
-                  group in the empty-state probe.
-                format: int64
-                type: integer
               totalInstances:
                 description: |-
                   TotalInstances is the total number of VM instances running on hypervisors in this AZ,
                   derived from Hypervisor CRD Status.Instances (not filtered by flavor group).
                 format: int64
                 type: integer
-              totalPlaceable:
-                description: |-
-                  TotalPlaceable is the schedulable slots remaining given current VM allocations.
-                  Computed from the current-state scheduler probe.
-                format: int64
-                type: integer
             type: object
         required:
         - spec
diff --git a/internal/scheduling/reservations/capacity/controller.go b/internal/scheduling/reservations/capacity/controller.go
index 24767065b..8522f0036 100644
--- a/internal/scheduling/reservations/capacity/controller.go
+++ b/internal/scheduling/reservations/capacity/controller.go
@@ -6,6 +6,7 @@ package capacity
 import (
 	"context"
 	"fmt"
+	"hash/fnv"
 	"sort"
 	"strings"
 	"time"
@@ -27,7 +28,7 @@ import (
 var log = ctrl.Log.WithName("capacity-controller").WithValues("module", "capacity")
 
 // Controller reconciles FlavorGroupCapacity CRDs on a fixed interval.
-// For each (flavor group × AZ) pair it runs two scheduler probes and updates the CRD status.
+// For each (flavor group × AZ) pair it probes all flavors in the group and updates the CRD status.
 type Controller struct {
 	client          client.Client
 	schedulerClient *reservations.SchedulerClient
@@ -102,29 +103,13 @@ func (c *Controller) reconcileOne(
 	allHVs []hv1.Hypervisor,
 ) error {
 
-	smallestFlavor := groupData.SmallestFlavor
-	smallestFlavorBytes := int64(smallestFlavor.MemoryMB) * 1024 * 1024 //nolint:gosec
+	smallestFlavorBytes := int64(groupData.SmallestFlavor.MemoryMB) * 1024 * 1024 //nolint:gosec
 	if smallestFlavorBytes <= 0 {
-		return fmt.Errorf("smallest flavor %q has invalid memory %d MB", smallestFlavor.Name, smallestFlavor.MemoryMB)
-	}
-
-	// Empty-state probe: scheduler ignores all current VM allocations.
-	totalCapacity, totalHosts, totalErr := c.probeScheduler(ctx, smallestFlavor, az, c.config.TotalPipeline, hvByName, smallestFlavorBytes)
-
-	// Current-state probe: scheduler considers current VM allocations.
-	totalPlaceable, placeableHosts, placeableErr := c.probeScheduler(ctx, smallestFlavor, az, c.config.PlaceablePipeline, hvByName, smallestFlavorBytes)
-
-	// Count total instances on hypervisors in this AZ.
-	totalInstances := countInstancesInAZ(allHVs, az)
-
-	committedCapacity, committedErr := c.sumCommittedCapacity(ctx, groupName, az, smallestFlavorBytes)
-	if committedErr != nil {
-		log.Error(committedErr, "failed to sum committed capacity", "flavorGroup", groupName, "az", az)
-		committedCapacity = 0
+		return fmt.Errorf("smallest flavor %q has invalid memory %d MB",
+			groupData.SmallestFlavor.Name, groupData.SmallestFlavor.MemoryMB)
 	}
 
 	crdName := crdNameFor(groupName, az)
-	fresh := totalErr == nil && placeableErr == nil
 
 	var existing v1alpha1.FlavorGroupCapacity
 	err := c.client.Get(ctx, types.NamespacedName{Name: crdName}, &existing)
@@ -143,35 +128,67 @@ func (c *Controller) reconcileOne(
 		return fmt.Errorf("failed to get FlavorGroupCapacity %s: %w", crdName, err)
 	}
 
-	patch := client.MergeFrom(existing.DeepCopy())
-	if totalErr == nil {
-		existing.Status.TotalCapacity = totalCapacity
-		existing.Status.TotalHosts = totalHosts
-		existing.Status.TotalInstances = totalInstances
-		existing.Status.CommittedCapacity = committedCapacity
+	// Build a lookup of existing per-flavor data so we can preserve stale values on probe failure.
+	existingByName := make(map[string]v1alpha1.FlavorCapacityStatus, len(existing.Status.Flavors))
+	for _, f := range existing.Status.Flavors {
+		existingByName[f.FlavorName] = f
 	}
-	if placeableErr == nil {
-		existing.Status.TotalPlaceable = totalPlaceable
-		existing.Status.PlaceableHosts = placeableHosts
+
+	// Probe all flavors in the group. Sort for stable CRD output.
+	flavors := make([]compute.FlavorInGroup, len(groupData.Flavors))
+	copy(flavors, groupData.Flavors)
+	sort.Slice(flavors, func(i, j int) bool { return flavors[i].Name < flavors[j].Name })
+
+	allFresh := true
+	newFlavors := make([]v1alpha1.FlavorCapacityStatus, 0, len(flavors))
+	for _, flavor := range flavors {
+		cur := existingByName[flavor.Name]
+		cur.FlavorName = flavor.Name
+
+		totalVMSlots, totalHosts, totalErr := c.probeScheduler(ctx, flavor, az, c.config.TotalPipeline, hvByName)
+		placeableVMs, placeableHosts, placeableErr := c.probeScheduler(ctx, flavor, az, c.config.PlaceablePipeline, hvByName)
+
+		if totalErr != nil {
+			allFresh = false
+		} else {
+			cur.TotalCapacityVMSlots = totalVMSlots
+			cur.TotalCapacityHosts = totalHosts
+		}
+		if placeableErr != nil {
+			allFresh = false
+		} else {
+			cur.PlaceableVMs = placeableVMs
+			cur.PlaceableHosts = placeableHosts
+		}
+		newFlavors = append(newFlavors, cur)
+	}
+
+	// Count total instances and committed capacity (always available regardless of probe results).
+	totalInstances := countInstancesInAZ(allHVs, az)
+	committedCapacity, committedErr := c.sumCommittedCapacity(ctx, groupName, az, smallestFlavorBytes)
+	if committedErr != nil {
+		log.Error(committedErr, "failed to sum committed capacity", "flavorGroup", groupName, "az", az)
+		committedCapacity = 0
 	}
+
+	patch := client.MergeFrom(existing.DeepCopy())
+	existing.Status.Flavors = newFlavors
+	existing.Status.TotalInstances = totalInstances
+	existing.Status.CommittedCapacity = committedCapacity
 	existing.Status.LastReconcileAt = metav1.Now()
 
 	freshCondition := metav1.Condition{
 		Type:               v1alpha1.FlavorGroupCapacityConditionReady,
 		ObservedGeneration: existing.Generation,
 	}
-	if fresh {
+	if allFresh {
 		freshCondition.Status = metav1.ConditionTrue
 		freshCondition.Reason = "ReconcileSucceeded"
 		freshCondition.Message = "capacity data is up-to-date"
 	} else {
 		freshCondition.Status = metav1.ConditionFalse
 		freshCondition.Reason = "ReconcileFailed"
-		if totalErr != nil {
-			freshCondition.Message = fmt.Sprintf("empty-state probe failed: %v", totalErr)
-		} else {
-			freshCondition.Message = fmt.Sprintf("current-state probe failed: %v", placeableErr)
-		}
+		freshCondition.Message = "one or more flavor probes failed"
 	}
 	meta.SetStatusCondition(&existing.Status.Conditions, freshCondition)
 
@@ -181,15 +198,20 @@ func (c *Controller) reconcileOne(
 	return nil
 }
 
-// probeScheduler calls the scheduler with the given pipeline and returns capacity + host count.
+// probeScheduler calls the scheduler with the given pipeline and returns VM slots + host count.
+// Capacity is computed as sum of floor(hostMemory / flavorMemory) across returned hosts.
 func (c *Controller) probeScheduler(
 	ctx context.Context,
 	flavor compute.FlavorInGroup,
 	az, pipeline string,
 	hvByName map[string]hv1.Hypervisor,
-	smallestFlavorBytes int64,
 ) (capacity, hosts int64, err error) {
 
+	flavorBytes := int64(flavor.MemoryMB) * 1024 * 1024 //nolint:gosec
+	if flavorBytes <= 0 {
+		return 0, 0, fmt.Errorf("flavor %q has invalid memory %d MB", flavor.Name, flavor.MemoryMB)
+	}
+
 	resp, err := c.schedulerClient.ScheduleReservation(ctx, reservations.ScheduleReservationRequest{
 		InstanceUUID:     uuid.New().String(),
 		ProjectID:        "cortex-capacity-probe",
@@ -222,7 +244,7 @@ func (c *Controller) probeScheduler(
 			continue
 		}
 		if capBytes := memCap.Value(); capBytes > 0 {
-			capacity += capBytes / smallestFlavorBytes
+			capacity += capBytes / flavorBytes
 		}
 	}
 	return capacity, hosts, nil
@@ -290,12 +312,19 @@ func countInstancesInAZ(hvs []hv1.Hypervisor, az string) int64 {
 	return total
 }
 
-// crdNameFor produces a valid DNS subdomain name for a (flavorGroup, az) pair.
-// Underscores and dots are replaced with dashes; the result is lowercased.
+// crdNameFor produces a collision-safe DNS label for a (flavorGroup, az) pair.
+// A 6-hex-char FNV-1a hash of the raw inputs is appended so that pairs differing only
+// by characters that sanitise identically (e.g. "." vs "-") still get unique names.
 func crdNameFor(flavorGroup, az string) string {
-	combined := flavorGroup + "-" + az
-	combined = strings.ToLower(combined)
-	combined = strings.ReplaceAll(combined, "_", "-")
-	combined = strings.ReplaceAll(combined, ".", "-")
-	return combined
+	h := fnv.New32a()
+	_, _ = h.Write([]byte(flavorGroup + "\x00" + az))
+	suffix := fmt.Sprintf("%06x", h.Sum32()&0xFFFFFF)
+
+	prefix := strings.ToLower(flavorGroup + "-" + az)
+	prefix = strings.ReplaceAll(prefix, "_", "-")
+	prefix = strings.ReplaceAll(prefix, ".", "-")
+	if len(prefix) > 56 { // 56 + "-" + 6 = 63 chars (DNS label limit)
+		prefix = prefix[:56]
+	}
+	return prefix + "-" + suffix
 }
diff --git a/internal/scheduling/reservations/capacity/controller_test.go b/internal/scheduling/reservations/capacity/controller_test.go
index 8d453ec33..2cb15f3e7 100644
--- a/internal/scheduling/reservations/capacity/controller_test.go
+++ b/internal/scheduling/reservations/capacity/controller_test.go
@@ -8,6 +8,7 @@ import (
 	"encoding/json"
 	"net/http"
 	"net/http/httptest"
+	"regexp"
 	"sort"
 	"testing"
 
@@ -41,15 +42,17 @@ func newTestScheme(t *testing.T) *runtime.Scheme {
 // newFlavorGroupKnowledge creates a ready Knowledge CRD with a single flavor group.
 func newFlavorGroupKnowledge(t *testing.T, groupName string, smallestMemoryMB uint64) *v1alpha1.Knowledge {
 	t.Helper()
+	smallestFlavor := compute.FlavorInGroup{
+		Name:       groupName + "-small",
+		MemoryMB:   smallestMemoryMB,
+		VCPUs:      2,
+		ExtraSpecs: map[string]string{"hw:cpu_policy": "dedicated"},
+	}
 	features := []compute.FlavorGroupFeature{
 		{
-			Name: groupName,
-			SmallestFlavor: compute.FlavorInGroup{
-				Name:       groupName + "-small",
-				MemoryMB:   smallestMemoryMB,
-				VCPUs:      2,
-				ExtraSpecs: map[string]string{"hw:cpu_policy": "dedicated"},
-			},
+			Name:           groupName,
+			SmallestFlavor: smallestFlavor,
+			Flavors:        []compute.FlavorInGroup{smallestFlavor},
 		},
 	}
 	raw, err := v1alpha1.BoxFeatureList(features)
@@ -108,20 +111,45 @@ func newMockSchedulerServer(t *testing.T, hosts []string) *httptest.Server {
 
 // --- unit tests for pure helper functions ---
 
+var (
+	dnsLabelRE   = regexp.MustCompile(`^[a-z0-9][a-z0-9-]{0,61}[a-z0-9]$`)
+	hashSuffixRE = regexp.MustCompile(`^[0-9a-f]{6}$`)
+)
+
 func TestCrdNameFor(t *testing.T) {
 	tests := []struct {
-		group, az, want string
+		group, az  string
+		wantPrefix string
 	}{
-		{"2101", "qa-de-1a", "2101-qa-de-1a"},
-		{"My_Group", "eu.west.1", "my-group-eu-west-1"},
-		{"G", "AZ_1", "g-az-1"},
+		{"hana-v2", "qa-de-1a", "hana-v2-qa-de-1a-"},
+		{"My_Group", "eu.west.1", "my-group-eu-west-1-"},
+		{"G", "AZ_1", "g-az-1-"},
 	}
 	for _, tt := range tests {
 		got := crdNameFor(tt.group, tt.az)
-		if got != tt.want {
-			t.Errorf("crdNameFor(%q, %q) = %q, want %q", tt.group, tt.az, got, tt.want)
+		// Must be a valid DNS label (lowercase, hyphens, ≤63 chars).
+		if len(got) > 63 {
+			t.Errorf("crdNameFor(%q, %q) = %q (len=%d > 63)", tt.group, tt.az, got, len(got))
+		}
+		if !dnsLabelRE.MatchString(got) {
+			t.Errorf("crdNameFor(%q, %q) = %q is not a valid DNS label", tt.group, tt.az, got)
+		}
+		// Must start with the expected sanitised prefix followed by a 6-hex-char hash suffix.
+		if len(got) < len(tt.wantPrefix)+6 || got[:len(tt.wantPrefix)] != tt.wantPrefix {
+			t.Errorf("crdNameFor(%q, %q) = %q, want prefix %q + 6 hex chars", tt.group, tt.az, got, tt.wantPrefix)
+		}
+		hashPart := got[len(tt.wantPrefix):]
+		if !hashSuffixRE.MatchString(hashPart) {
+			t.Errorf("crdNameFor(%q, %q) hash suffix %q is not 6 hex chars", tt.group, tt.az, hashPart)
 		}
 	}
+
+	// Inputs that differ only by "." vs "-" must produce different CRD names.
+	dotName := crdNameFor("hana.v2", "qa-de-1a")
+	dashName := crdNameFor("hana-v2", "qa-de-1a")
+	if dotName == dashName {
+		t.Errorf("crdNameFor collision: hana.v2 and hana-v2 both produced %q", dotName)
+	}
 }
 
 func TestAvailabilityZones(t *testing.T) {
@@ -191,8 +219,10 @@ func TestReconcileOne_CreatesCRD(t *testing.T) {
 		PlaceablePipeline: "kvm-general-purpose",
 	})
 
+	smallFlavor := compute.FlavorInGroup{Name: groupName + "-small", MemoryMB: memMB, VCPUs: 2}
 	groupData := compute.FlavorGroupFeature{
-		SmallestFlavor: compute.FlavorInGroup{Name: groupName + "-small", MemoryMB: memMB},
+		SmallestFlavor: smallFlavor,
+		Flavors:        []compute.FlavorInGroup{smallFlavor},
 	}
 	hvByName := map[string]hv1.Hypervisor{"host-1": *hv}
 
@@ -200,23 +230,32 @@ func TestReconcileOne_CreatesCRD(t *testing.T) {
 		t.Fatalf("reconcileOne failed: %v", err)
 	}
 
-	// Verify CRD was created with correct status
 	var crd v1alpha1.FlavorGroupCapacity
 	if err := fakeClient.Get(context.Background(), types.NamespacedName{Name: crdNameFor(groupName, az)}, &crd); err != nil {
 		t.Fatalf("failed to get CRD: %v", err)
 	}
-	if crd.Status.TotalCapacity != 1 {
-		t.Errorf("TotalCapacity = %d, want 1", crd.Status.TotalCapacity)
+	if len(crd.Status.Flavors) != 1 {
+		t.Fatalf("len(Status.Flavors) = %d, want 1", len(crd.Status.Flavors))
+	}
+	f := crd.Status.Flavors[0]
+	if f.FlavorName != groupName+"-small" {
+		t.Errorf("FlavorName = %q, want %q", f.FlavorName, groupName+"-small")
+	}
+	if f.TotalCapacityVMSlots != 1 {
+		t.Errorf("TotalCapacityVMSlots = %d, want 1", f.TotalCapacityVMSlots)
+	}
+	if f.TotalCapacityHosts != 1 {
+		t.Errorf("TotalCapacityHosts = %d, want 1", f.TotalCapacityHosts)
 	}
-	if crd.Status.TotalHosts != 1 {
-		t.Errorf("TotalHosts = %d, want 1", crd.Status.TotalHosts)
+	if f.PlaceableVMs != 1 {
+		t.Errorf("PlaceableVMs = %d, want 1", f.PlaceableVMs)
+	}
+	if f.PlaceableHosts != 1 {
+		t.Errorf("PlaceableHosts = %d, want 1", f.PlaceableHosts)
 	}
 	if crd.Status.TotalInstances != 1 {
 		t.Errorf("TotalInstances = %d, want 1", crd.Status.TotalInstances)
 	}
-	if crd.Status.TotalPlaceable != 1 {
-		t.Errorf("TotalPlaceable = %d, want 1", crd.Status.TotalPlaceable)
-	}
 }
 
 func TestReconcileOne_SetsReadyConditionFalseOnSchedulerError(t *testing.T) {
@@ -247,8 +286,10 @@ func TestReconcileOne_SetsReadyConditionFalseOnSchedulerError(t *testing.T) {
 		PlaceablePipeline: "kvm-general-purpose",
 	})
 
+	smallFlavor := compute.FlavorInGroup{Name: groupName + "-small", MemoryMB: memMB, VCPUs: 2}
 	groupData := compute.FlavorGroupFeature{
-		SmallestFlavor: compute.FlavorInGroup{Name: groupName + "-small", MemoryMB: memMB},
+		SmallestFlavor: smallFlavor,
+		Flavors:        []compute.FlavorInGroup{smallFlavor},
 	}
 
 	// reconcileOne returns no error itself (it continues on probe failure), but sets Ready=False
@@ -309,8 +350,10 @@ func TestReconcileOne_IdempotentUpdate(t *testing.T) {
 		PlaceablePipeline: "kvm-general-purpose",
 	})
 
+	smallFlavor := compute.FlavorInGroup{Name: groupName + "-small", MemoryMB: memMB, VCPUs: 2}
 	groupData := compute.FlavorGroupFeature{
-		SmallestFlavor: compute.FlavorInGroup{Name: groupName + "-small", MemoryMB: memMB},
+		SmallestFlavor: smallFlavor,
+		Flavors:        []compute.FlavorInGroup{smallFlavor},
 	}
 	hvByName := map[string]hv1.Hypervisor{"host-1": *hv}
 
@@ -327,8 +370,11 @@ func TestReconcileOne_IdempotentUpdate(t *testing.T) {
 	if err := fakeClient.Get(context.Background(), types.NamespacedName{Name: crdName}, &crd); err != nil {
 		t.Fatalf("failed to get CRD: %v", err)
 	}
-	if crd.Status.TotalCapacity != 1 {
-		t.Errorf("TotalCapacity = %d, want 1", crd.Status.TotalCapacity)
+	if len(crd.Status.Flavors) != 1 {
+		t.Fatalf("len(Status.Flavors) = %d, want 1", len(crd.Status.Flavors))
+	}
+	if crd.Status.Flavors[0].TotalCapacityVMSlots != 1 {
+		t.Errorf("TotalCapacityVMSlots = %d, want 1", crd.Status.Flavors[0].TotalCapacityVMSlots)
 	}
 }
 
@@ -383,7 +429,7 @@ func TestProbeScheduler_CapacityCalculation(t *testing.T) {
 	}
 	flavor := compute.FlavorInGroup{Name: "test-flavor", MemoryMB: memMB}
 
-	capacity, hosts, err := c.probeScheduler(context.Background(), flavor, "az-a", "test-pipeline", hvByName, memBytes)
+	capacity, hosts, err := c.probeScheduler(context.Background(), flavor, "az-a", "test-pipeline", hvByName)
 	if err != nil {
 		t.Fatalf("probeScheduler failed: %v", err)
 	}
diff --git a/internal/scheduling/reservations/capacity/metrics.go b/internal/scheduling/reservations/capacity/metrics.go
index 698d0ab9e..c33426b6d 100644
--- a/internal/scheduling/reservations/capacity/metrics.go
+++ b/internal/scheduling/reservations/capacity/metrics.go
@@ -11,40 +11,43 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/client"
 )
 
-var capacityLabels = []string{"flavor_group", "az"}
+var (
+	capacityLabels       = []string{"flavor_group", "az"}
+	capacityFlavorLabels = []string{"flavor_group", "az", "flavor_name"}
+)
 
 // Monitor provides Prometheus metrics for FlavorGroupCapacity CRDs.
 // It implements prometheus.Collector and reads CRD status on each Collect call.
 type Monitor struct {
-	client            client.Client
-	totalCapacity     *prometheus.GaugeVec
-	totalPlaceable    *prometheus.GaugeVec
-	totalHosts        *prometheus.GaugeVec
-	placeableHosts    *prometheus.GaugeVec
-	totalInstances    *prometheus.GaugeVec
-	committedCapacity *prometheus.GaugeVec
+	client               client.Client
+	totalCapacityVMSlots *prometheus.GaugeVec
+	placeableVMs         *prometheus.GaugeVec
+	totalCapacityHosts   *prometheus.GaugeVec
+	placeableHosts       *prometheus.GaugeVec
+	totalInstances       *prometheus.GaugeVec
+	committedCapacity    *prometheus.GaugeVec
 }
 
 // NewMonitor creates a new Monitor that reads FlavorGroupCapacity CRDs.
 func NewMonitor(c client.Client) Monitor {
 	return Monitor{
 		client: c,
-		totalCapacity: prometheus.NewGaugeVec(prometheus.GaugeOpts{
+		totalCapacityVMSlots: prometheus.NewGaugeVec(prometheus.GaugeOpts{
 			Name: "cortex_committed_resource_capacity_total",
-			Help: "Total schedulable slots in an empty-datacenter scenario per flavor group and AZ.",
-		}, capacityLabels),
-		totalPlaceable: prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Help: "Total schedulable slots in an empty-datacenter scenario per flavor.",
+		}, capacityFlavorLabels),
+		placeableVMs: prometheus.NewGaugeVec(prometheus.GaugeOpts{
 			Name: "cortex_committed_resource_capacity_placeable",
-			Help: "Schedulable slots remaining given current VM allocations per flavor group and AZ.",
-		}, capacityLabels),
-		totalHosts: prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Help: "Schedulable slots remaining given current VM allocations per flavor.",
+		}, capacityFlavorLabels),
+		totalCapacityHosts: prometheus.NewGaugeVec(prometheus.GaugeOpts{
 			Name: "cortex_committed_resource_capacity_hosts_total",
-			Help: "Number of hosts eligible for this flavor group in the empty-state probe.",
-		}, capacityLabels),
+			Help: "Number of hosts eligible for this flavor in the empty-state probe.",
+		}, capacityFlavorLabels),
 		placeableHosts: prometheus.NewGaugeVec(prometheus.GaugeOpts{
 			Name: "cortex_committed_resource_capacity_hosts_placeable",
-			Help: "Number of hosts still able to accept a new smallest-flavor VM.",
-		}, capacityLabels),
+			Help: "Number of hosts still able to accept a new VM of this flavor.",
+		}, capacityFlavorLabels),
 		totalInstances: prometheus.NewGaugeVec(prometheus.GaugeOpts{
 			Name: "cortex_committed_resource_capacity_instances",
 			Help: "Total VM instances running on hypervisors in this AZ (not filtered by flavor group).",
@@ -58,9 +61,9 @@ func NewMonitor(c client.Client) Monitor {
 
 // Describe implements prometheus.Collector.
 func (m *Monitor) Describe(ch chan<- *prometheus.Desc) {
-	m.totalCapacity.Describe(ch)
-	m.totalPlaceable.Describe(ch)
-	m.totalHosts.Describe(ch)
+	m.totalCapacityVMSlots.Describe(ch)
+	m.placeableVMs.Describe(ch)
+	m.totalCapacityHosts.Describe(ch)
 	m.placeableHosts.Describe(ch)
 	m.totalInstances.Describe(ch)
 	m.committedCapacity.Describe(ch)
@@ -75,29 +78,37 @@ func (m *Monitor) Collect(ch chan<- prometheus.Metric) {
 	}
 
 	// Reset all gauges so deleted CRDs don't linger.
-	m.totalCapacity.Reset()
-	m.totalPlaceable.Reset()
-	m.totalHosts.Reset()
+	m.totalCapacityVMSlots.Reset()
+	m.placeableVMs.Reset()
+	m.totalCapacityHosts.Reset()
 	m.placeableHosts.Reset()
 	m.totalInstances.Reset()
 	m.committedCapacity.Reset()
 
-	for _, c := range list.Items {
-		labels := prometheus.Labels{
-			"flavor_group": c.Spec.FlavorGroup,
-			"az":           c.Spec.AvailabilityZone,
+	for _, crd := range list.Items {
+		groupAZLabels := prometheus.Labels{
+			"flavor_group": crd.Spec.FlavorGroup,
+			"az":           crd.Spec.AvailabilityZone,
+		}
+		m.totalInstances.With(groupAZLabels).Set(float64(crd.Status.TotalInstances))
+		m.committedCapacity.With(groupAZLabels).Set(float64(crd.Status.CommittedCapacity))
+
+		for _, f := range crd.Status.Flavors {
+			flavorLabels := prometheus.Labels{
+				"flavor_group": crd.Spec.FlavorGroup,
+				"az":           crd.Spec.AvailabilityZone,
+				"flavor_name":  f.FlavorName,
+			}
+			m.totalCapacityVMSlots.With(flavorLabels).Set(float64(f.TotalCapacityVMSlots))
+			m.placeableVMs.With(flavorLabels).Set(float64(f.PlaceableVMs))
+			m.totalCapacityHosts.With(flavorLabels).Set(float64(f.TotalCapacityHosts))
+			m.placeableHosts.With(flavorLabels).Set(float64(f.PlaceableHosts))
 		}
-		m.totalCapacity.With(labels).Set(float64(c.Status.TotalCapacity))
-		m.totalPlaceable.With(labels).Set(float64(c.Status.TotalPlaceable))
-		m.totalHosts.With(labels).Set(float64(c.Status.TotalHosts))
-		m.placeableHosts.With(labels).Set(float64(c.Status.PlaceableHosts))
-		m.totalInstances.With(labels).Set(float64(c.Status.TotalInstances))
-		m.committedCapacity.With(labels).Set(float64(c.Status.CommittedCapacity))
 	}
 
-	m.totalCapacity.Collect(ch)
-	m.totalPlaceable.Collect(ch)
-	m.totalHosts.Collect(ch)
+	m.totalCapacityVMSlots.Collect(ch)
+	m.placeableVMs.Collect(ch)
+	m.totalCapacityHosts.Collect(ch)
 	m.placeableHosts.Collect(ch)
 	m.totalInstances.Collect(ch)
 	m.committedCapacity.Collect(ch)

From 9a69822895a5fa730738b16254e72c4491eee8e3 Mon Sep 17 00:00:00 2001
From: Julius Clausnitzer <julius.clausnitzer@sap.com>
Date: Tue, 5 May 2026 11:47:07 +0200
Subject: [PATCH 10/15] fix

---
 .../scheduling/reservations/capacity/controller.go     | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/internal/scheduling/reservations/capacity/controller.go b/internal/scheduling/reservations/capacity/controller.go
index 8522f0036..337ab1aa5 100644
--- a/internal/scheduling/reservations/capacity/controller.go
+++ b/internal/scheduling/reservations/capacity/controller.go
@@ -21,6 +21,7 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/client"
 
 	"github.com/cobaltcore-dev/cortex/api/v1alpha1"
+	schedulerapi "github.com/cobaltcore-dev/cortex/api/external/nova"
 	"github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute"
 	"github.com/cobaltcore-dev/cortex/internal/scheduling/reservations"
 )
@@ -212,6 +213,14 @@ func (c *Controller) probeScheduler(
 		return 0, 0, fmt.Errorf("flavor %q has invalid memory %d MB", flavor.Name, flavor.MemoryMB)
 	}
 
+	// Build EligibleHosts from all known hypervisors so that novaLimitHostsToRequest
+	// (which filters the response to hosts present in the request) does not zero out
+	// the result. The AZ filter in the pipeline handles narrowing to the correct AZ.
+	eligibleHosts := make([]schedulerapi.ExternalSchedulerHost, 0, len(hvByName))
+	for name := range hvByName {
+		eligibleHosts = append(eligibleHosts, schedulerapi.ExternalSchedulerHost{ComputeHost: name})
+	}
+
 	resp, err := c.schedulerClient.ScheduleReservation(ctx, reservations.ScheduleReservationRequest{
 		InstanceUUID:     uuid.New().String(),
 		ProjectID:        "cortex-capacity-probe",
@@ -221,6 +230,7 @@ func (c *Controller) probeScheduler(
 		FlavorExtraSpecs: flavor.ExtraSpecs,
 		AvailabilityZone: az,
 		Pipeline:         pipeline,
+		EligibleHosts:    eligibleHosts,
 	})
 	if err != nil {
 		return 0, 0, fmt.Errorf("scheduler call failed (pipeline=%s): %w", pipeline, err)

From 78977addf4d22bb98b865833ab5aa52250b089aa Mon Sep 17 00:00:00 2001
From: Julius Clausnitzer <julius.clausnitzer@sap.com>
Date: Tue, 5 May 2026 13:00:25 +0200
Subject: [PATCH 11/15] fix

---
 .../nova/plugins/filters/filter_has_enough_capacity.go        | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go
index 9e5e6b16d..b97d3e0e5 100644
--- a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go
+++ b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go
@@ -196,6 +196,10 @@ func (s *FilterHasEnoughCapacity) Run(traceLog *slog.Logger, request api.Externa
 		// Oversize spec-only: if a pending VM is larger than the remaining slot, block its full size.
 		var resourcesToBlock map[hv1.ResourceName]resource.Quantity
 		if reservation.Spec.Type == v1alpha1.ReservationTypeCommittedResource &&
+			// When ignoring allocations (empty-datacenter scenario) VM resources are not
+			// deducted, so the confirmed-VM adjustment would under-block: always use the
+			// full slot instead.
+			!s.Options.IgnoreAllocations &&
 			// if the reservation is not being migrated, block only unused resources
 			reservation.Spec.TargetHost == reservation.Status.Host &&
 			reservation.Spec.CommittedResourceReservation != nil &&

From c51696aee1b74222c3c25d82df90dc148b600282 Mon Sep 17 00:00:00 2001
From: Julius Clausnitzer <julius.clausnitzer@sap.com>
Date: Tue, 5 May 2026 13:12:33 +0200
Subject: [PATCH 12/15] timeout to avoid blocking of API

---
 internal/scheduling/reservations/capacity/metrics.go | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/internal/scheduling/reservations/capacity/metrics.go b/internal/scheduling/reservations/capacity/metrics.go
index c33426b6d..bd13ca7ca 100644
--- a/internal/scheduling/reservations/capacity/metrics.go
+++ b/internal/scheduling/reservations/capacity/metrics.go
@@ -5,6 +5,7 @@ package capacity
 
 import (
 	"context"
+	"time"
 
 	"github.com/cobaltcore-dev/cortex/api/v1alpha1"
 	"github.com/prometheus/client_golang/prometheus"
@@ -72,7 +73,9 @@ func (m *Monitor) Describe(ch chan<- *prometheus.Desc) {
 // Collect implements prometheus.Collector — lists all FlavorGroupCapacity CRDs and exports gauges.
 func (m *Monitor) Collect(ch chan<- prometheus.Metric) {
 	var list v1alpha1.FlavorGroupCapacityList
-	if err := m.client.List(context.Background(), &list); err != nil {
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+	if err := m.client.List(ctx, &list); err != nil {
 		log.Error(err, "failed to list FlavorGroupCapacity CRDs for metrics")
 		return
 	}

From ff70c00a0ba53dfe188bc3429d43e199cb891701 Mon Sep 17 00:00:00 2001
From: Julius Clausnitzer <julius.clausnitzer@sap.com>
Date: Tue, 5 May 2026 14:44:56 +0200
Subject: [PATCH 13/15] fix

---
 helm/bundles/cortex-nova/values.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/helm/bundles/cortex-nova/values.yaml b/helm/bundles/cortex-nova/values.yaml
index 9360687ea..85e995ba6 100644
--- a/helm/bundles/cortex-nova/values.yaml
+++ b/helm/bundles/cortex-nova/values.yaml
@@ -95,6 +95,8 @@ cortex: &cortex
           - cortex.cloud/v1alpha1/ReservationList
           - cortex.cloud/v1alpha1/CommittedResource
           - cortex.cloud/v1alpha1/CommittedResourceList
+          - cortex.cloud/v1alpha1/FlavorGroupCapacity
+          - cortex.cloud/v1alpha1/FlavorGroupCapacityList
           - kvm.cloud.sap/v1/Hypervisor
           - kvm.cloud.sap/v1/HypervisorList
           - v1/Secret

From 642f9aaa6db87e26e68c9e1f492c87aff894fcf8 Mon Sep 17 00:00:00 2001
From: Julius Clausnitzer <julius.clausnitzer@sap.com>
Date: Tue, 5 May 2026 15:03:23 +0200
Subject: [PATCH 14/15] fix

---
 internal/scheduling/reservations/capacity/config.go  | 12 ++++++++----
 .../scheduling/reservations/capacity/controller.go   |  2 +-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/internal/scheduling/reservations/capacity/config.go b/internal/scheduling/reservations/capacity/config.go
index 2940f32e8..dc134e887 100644
--- a/internal/scheduling/reservations/capacity/config.go
+++ b/internal/scheduling/reservations/capacity/config.go
@@ -3,12 +3,16 @@
 
 package capacity
 
-import "time"
+import (
+	"time"
+
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
 
 // Config holds configuration for the capacity controller.
 type Config struct {
 	// ReconcileInterval is how often the controller probes the scheduler and updates CRDs.
-	ReconcileInterval time.Duration `json:"capacityReconcileInterval"`
+	ReconcileInterval metav1.Duration `json:"capacityReconcileInterval"`
 
 	// TotalPipeline is the scheduler pipeline used for the empty-state probe.
 	// This pipeline should ignore current VM allocations (e.g. kvm-report-capacity).
@@ -25,7 +29,7 @@ type Config struct {
 // ApplyDefaults fills in any unset values with defaults.
 func (c *Config) ApplyDefaults() {
 	defaults := DefaultConfig()
-	if c.ReconcileInterval == 0 {
+	if c.ReconcileInterval.Duration == 0 {
 		c.ReconcileInterval = defaults.ReconcileInterval
 	}
 	if c.TotalPipeline == "" {
@@ -41,7 +45,7 @@ func (c *Config) ApplyDefaults() {
 
 func DefaultConfig() Config {
 	return Config{
-		ReconcileInterval: 5 * time.Minute,
+		ReconcileInterval: metav1.Duration{Duration: 5 * time.Minute},
 		TotalPipeline:     "kvm-report-capacity",
 		PlaceablePipeline: "kvm-general-purpose-load-balancing",
 		SchedulerURL:      "http://localhost:8080/scheduler/nova/external",
diff --git a/internal/scheduling/reservations/capacity/controller.go b/internal/scheduling/reservations/capacity/controller.go
index 337ab1aa5..b37597911 100644
--- a/internal/scheduling/reservations/capacity/controller.go
+++ b/internal/scheduling/reservations/capacity/controller.go
@@ -57,7 +57,7 @@ func (c *Controller) Start(ctx context.Context) error {
 			if err := c.reconcileAll(ctx); err != nil {
 				log.Error(err, "reconcile cycle failed")
 			}
-			timer.Reset(c.config.ReconcileInterval)
+			timer.Reset(c.config.ReconcileInterval.Duration)
 		}
 	}
 }

From 9d9a0bc54246021d661e104f385e375dfa567877 Mon Sep 17 00:00:00 2001
From: Julius Clausnitzer <julius.clausnitzer@sap.com>
Date: Wed, 6 May 2026 16:21:30 +0200
Subject: [PATCH 15/15] lint

---
 internal/scheduling/reservations/capacity/controller.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/internal/scheduling/reservations/capacity/controller.go b/internal/scheduling/reservations/capacity/controller.go
index b37597911..7a013a0a0 100644
--- a/internal/scheduling/reservations/capacity/controller.go
+++ b/internal/scheduling/reservations/capacity/controller.go
@@ -20,8 +20,8 @@ import (
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 
-	"github.com/cobaltcore-dev/cortex/api/v1alpha1"
 	schedulerapi "github.com/cobaltcore-dev/cortex/api/external/nova"
+	"github.com/cobaltcore-dev/cortex/api/v1alpha1"
 	"github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute"
 	"github.com/cobaltcore-dev/cortex/internal/scheduling/reservations"
 )