cobaltcore-dev · juliusclausnitzer · Apr 22, 2026 · Apr 28, 2026 · Apr 28, 2026 · Apr 28, 2026
@@ -0,0 +1,112 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package v1alpha1
+
+import (
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+const (
+	// FlavorGroupCapacityConditionReady indicates the status data is up-to-date.
+	FlavorGroupCapacityConditionReady = "Ready"
+)
+
+// FlavorGroupCapacitySpec defines the desired state of FlavorGroupCapacity.
+type FlavorGroupCapacitySpec struct {
+	// FlavorGroup is the name of the flavor group (e.g. "hana-v2").
+	// +kubebuilder:validation:Required
+	FlavorGroup string `json:"flavorGroup"`
+
+	// AvailabilityZone is the OpenStack AZ this capacity data covers (e.g. "qa-de-1a").
+	// +kubebuilder:validation:Required
+	AvailabilityZone string `json:"availabilityZone"`
+}
+
+// FlavorCapacityStatus holds per-flavor capacity numbers for one (flavor group × AZ) pair.
+type FlavorCapacityStatus struct {
+	// FlavorName is the OpenStack flavor name (e.g. "hana-v2-small").
+	FlavorName string `json:"flavorName"`
+
+	// PlaceableHosts is the number of hosts that can still fit this flavor given current allocations.
+	// +kubebuilder:validation:Optional
+	PlaceableHosts int64 `json:"placeableHosts,omitempty"`
+
+	// PlaceableVMs is the number of VM slots remaining for this flavor given current allocations.
+	// +kubebuilder:validation:Optional
+	PlaceableVMs int64 `json:"placeableVms,omitempty"`
+
+	// TotalCapacityHosts is the number of eligible hosts in an empty-datacenter scenario.
+	// +kubebuilder:validation:Optional
+	TotalCapacityHosts int64 `json:"totalCapacityHosts,omitempty"`
+
+	// TotalCapacityVMSlots is the maximum number of VM slots in an empty-datacenter scenario.
+	// +kubebuilder:validation:Optional
+	TotalCapacityVMSlots int64 `json:"totalCapacityVmSlots,omitempty"`
+}
+
+// FlavorGroupCapacityStatus defines the observed state of FlavorGroupCapacity.
+type FlavorGroupCapacityStatus struct {
+	// Flavors holds per-flavor capacity data for all flavors in the group.
+	// +kubebuilder:validation:Optional
+	Flavors []FlavorCapacityStatus `json:"flavors,omitempty"`
+
+	// CommittedCapacity is the sum of AcceptedAmount across active CommittedResource CRDs,
+	// expressed in multiples of the smallest flavor's memory.
+	// +kubebuilder:validation:Optional
+	CommittedCapacity int64 `json:"committedCapacity,omitempty"`
+
+	// TotalInstances is the total number of VM instances running on hypervisors in this AZ,
+	// derived from Hypervisor CRD Status.Instances (not filtered by flavor group).
+	// +kubebuilder:validation:Optional
+	TotalInstances int64 `json:"totalInstances,omitempty"`
+
+	// LastReconcileAt is the timestamp of the last successful reconcile.
+	// +kubebuilder:validation:Optional
+	LastReconcileAt metav1.Time `json:"lastReconcileAt,omitempty"`
+
+	// The current status conditions of the FlavorGroupCapacity.
+	// +kubebuilder:validation:Optional
+	Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"`
+}
+
+// +kubebuilder:object:root=true
+// +kubebuilder:subresource:status
+// +kubebuilder:resource:scope=Cluster
+// +kubebuilder:printcolumn:name="FlavorGroup",type="string",JSONPath=".spec.flavorGroup"
+// +kubebuilder:printcolumn:name="AZ",type="string",JSONPath=".spec.availabilityZone"
+// +kubebuilder:printcolumn:name="TotalInstances",type="integer",JSONPath=".status.totalInstances"
+// +kubebuilder:printcolumn:name="LastReconcile",type="date",JSONPath=".status.lastReconcileAt"
+// +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].status"
+
+// FlavorGroupCapacity caches pre-computed capacity data for one flavor group in one AZ.
+// One CRD exists per (flavor group × AZ) pair, updated by the capacity controller on a fixed interval.
+// The capacity API reads these CRDs instead of probing the scheduler on each request.
+type FlavorGroupCapacity struct {
+	metav1.TypeMeta `json:",inline"`
+
+	// metadata is a standard object metadata
+	// +optional
+	metav1.ObjectMeta `json:"metadata,omitempty,omitzero"`
+
+	// spec defines the desired state of FlavorGroupCapacity
+	// +required
+	Spec FlavorGroupCapacitySpec `json:"spec"`
+
+	// status defines the observed state of FlavorGroupCapacity
+	// +optional
+	Status FlavorGroupCapacityStatus `json:"status,omitempty,omitzero"`
+}
+
+// +kubebuilder:object:root=true
+
+// FlavorGroupCapacityList contains a list of FlavorGroupCapacity.
+type FlavorGroupCapacityList struct {
+	metav1.TypeMeta `json:",inline"`
+	metav1.ListMeta `json:"metadata,omitempty"`
+	Items           []FlavorGroupCapacity `json:"items"`
+}
+
+func init() {
+	SchemeBuilder.Register(&FlavorGroupCapacity{}, &FlavorGroupCapacityList{})
+}
@@ -56,6 +56,7 @@ import (
 	"github.com/cobaltcore-dev/cortex/internal/scheduling/nova"
 	"github.com/cobaltcore-dev/cortex/internal/scheduling/pods"
 	"github.com/cobaltcore-dev/cortex/internal/scheduling/reservations"
+	"github.com/cobaltcore-dev/cortex/internal/scheduling/reservations/capacity"
 	"github.com/cobaltcore-dev/cortex/internal/scheduling/reservations/commitments"
 	commitmentsapi "github.com/cobaltcore-dev/cortex/internal/scheduling/reservations/commitments/api"
 	"github.com/cobaltcore-dev/cortex/internal/scheduling/reservations/failover"
@@ -686,6 +687,29 @@ func main() {
 			"maxVMsToProcess", failoverConfig.MaxVMsToProcess,
 			"vmSelectionRotationInterval", failoverConfig.VMSelectionRotationInterval)
 	}
+	if slices.Contains(mainConfig.EnabledControllers, "capacity-controller") {
+		setupLog.Info("enabling controller", "controller", "capacity-controller")
+		capacityConfig := conf.GetConfigOrDie[capacity.Config]()
+		capacityConfig.ApplyDefaults()
+
+		capacityMonitor := capacity.NewMonitor(multiclusterClient)
+		if err := metrics.Registry.Register(&capacityMonitor); err != nil {
+			setupLog.Error(err, "failed to register capacity monitor metrics, continuing without metrics")
+		}
+
+		capacityController := capacity.NewController(multiclusterClient, capacityConfig)
+		if err := mgr.Add(manager.RunnableFunc(func(ctx context.Context) error {
+			return capacityController.Start(ctx)
+		})); err != nil {
+			setupLog.Error(err, "unable to add capacity controller to manager")
+			os.Exit(1)
+		}
+		setupLog.Info("capacity-controller registered",
+			"schedulerURL", capacityConfig.SchedulerURL,
+			"reconcileInterval", capacityConfig.ReconcileInterval,
+			"totalPipeline", capacityConfig.TotalPipeline,
+			"placeablePipeline", capacityConfig.PlaceablePipeline)
+	}
 
 	// +kubebuilder:scaffold:builder
 

@@ -557,4 +557,44 @@ spec:
         VM is allocated get a higher weight, encouraging placement on
         pre-reserved failover capacity. For non-evacuation requests, this
         weigher has no effect.
+---
+apiVersion: cortex.cloud/v1alpha1
+kind: Pipeline
+metadata:
+  name: kvm-report-capacity
+spec:
+  schedulingDomain: nova
+  description: |
+    This pipeline is used by the capacity controller to determine the
+    theoretical maximum capacity of each flavor group per availability zone,
+    as if all hosts were completely empty. It ignores current VM allocations
+    and all reservation blockings so that only raw hardware capacity is
+    considered.
+  type: filter-weigher
+  createDecisions: false
+  # Fetch all placement candidates, ignoring nova's preselection.
+  ignorePreselection: true
+  filters:
+    - name: filter_correct_az
+      description: |
+        Restricts host candidates to the requested availability zone.
+    - name: filter_has_enough_capacity
+      description: |
+        Filters hosts that cannot fit the flavor based on raw hardware capacity.
+        VM allocations and all reservation types are ignored to represent an
+        empty datacenter scenario.
+      params:
+        - {key: ignoreAllocations, boolValue: true}
+        - {key: ignoredReservationTypes, stringListValue: ["CommittedResourceReservation", "FailoverReservation"]}
+    - name: filter_has_requested_traits
+      description: |
+        Ensures hosts have the hardware traits required by the flavor.
+    - name: filter_capabilities
+      description: |
+        Ensures hosts meet the compute capabilities required by the flavor
+        extra specs (e.g., architecture, maxphysaddr bits).
+    - name: filter_status_conditions
+      description: |
+        Excludes hosts that are not ready or are disabled.
+  weighers: []
 {{- end }}
@@ -95,6 +95,8 @@ cortex: &cortex
           - cortex.cloud/v1alpha1/ReservationList
           - cortex.cloud/v1alpha1/CommittedResource
           - cortex.cloud/v1alpha1/CommittedResourceList
+          - cortex.cloud/v1alpha1/FlavorGroupCapacity
+          - cortex.cloud/v1alpha1/FlavorGroupCapacityList
           - kvm.cloud.sap/v1/Hypervisor
           - kvm.cloud.sap/v1/HypervisorList
           - v1/Secret
@@ -130,6 +132,13 @@ cortex-scheduling-controllers:
       - hypervisor-overcommit-controller
       - committed-resource-reservations-controller
       - failover-reservations-controller
+      - capacity-controller
+    # Pipeline used for the empty-state capacity probe (ignores allocations and reservations).
+    capacityTotalPipeline: "kvm-report-capacity"
+    # Pipeline used for the current-state capacity probe (considers current VM allocations).
+    capacityPlaceablePipeline: "kvm-general-purpose-load-balancing"
+    # How often the capacity controller re-runs its scheduler probes.
+    capacityReconcileInterval: 5m
     enabledTasks:
       - nova-history-cleanup-task
     # If true, the external scheduler API will limit the list of hosts in its