Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions api/v1alpha1/flavor_group_capacity_types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// Copyright SAP SE
// SPDX-License-Identifier: Apache-2.0

package v1alpha1

import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

const (
// FlavorGroupCapacityConditionReady indicates the status data is up-to-date.
FlavorGroupCapacityConditionReady = "Ready"
)

// FlavorGroupCapacitySpec defines the desired state of FlavorGroupCapacity.
type FlavorGroupCapacitySpec struct {
// FlavorGroup is the name of the flavor group (e.g. "hana-v2").
// +kubebuilder:validation:Required
FlavorGroup string `json:"flavorGroup"`

// AvailabilityZone is the OpenStack AZ this capacity data covers (e.g. "qa-de-1a").
// +kubebuilder:validation:Required
AvailabilityZone string `json:"availabilityZone"`
}

// FlavorCapacityStatus holds per-flavor capacity numbers for one (flavor group × AZ) pair.
type FlavorCapacityStatus struct {
// FlavorName is the OpenStack flavor name (e.g. "hana-v2-small").
FlavorName string `json:"flavorName"`

// PlaceableHosts is the number of hosts that can still fit this flavor given current allocations.
// +kubebuilder:validation:Optional
PlaceableHosts int64 `json:"placeableHosts,omitempty"`

// PlaceableVMs is the number of VM slots remaining for this flavor given current allocations.
// +kubebuilder:validation:Optional
PlaceableVMs int64 `json:"placeableVms,omitempty"`

// TotalCapacityHosts is the number of eligible hosts in an empty-datacenter scenario.
// +kubebuilder:validation:Optional
TotalCapacityHosts int64 `json:"totalCapacityHosts,omitempty"`

// TotalCapacityVMSlots is the maximum number of VM slots in an empty-datacenter scenario.
// +kubebuilder:validation:Optional
TotalCapacityVMSlots int64 `json:"totalCapacityVmSlots,omitempty"`
}

// FlavorGroupCapacityStatus defines the observed state of FlavorGroupCapacity.
type FlavorGroupCapacityStatus struct {
// Flavors holds per-flavor capacity data for all flavors in the group.
// +kubebuilder:validation:Optional
Flavors []FlavorCapacityStatus `json:"flavors,omitempty"`

// CommittedCapacity is the sum of AcceptedAmount across active CommittedResource CRDs,
// expressed in multiples of the smallest flavor's memory.
// +kubebuilder:validation:Optional
CommittedCapacity int64 `json:"committedCapacity,omitempty"`

// TotalInstances is the total number of VM instances running on hypervisors in this AZ,
// derived from Hypervisor CRD Status.Instances (not filtered by flavor group).
// +kubebuilder:validation:Optional
TotalInstances int64 `json:"totalInstances,omitempty"`

// LastReconcileAt is the timestamp of the last successful reconcile.
// +kubebuilder:validation:Optional
LastReconcileAt metav1.Time `json:"lastReconcileAt,omitempty"`

// The current status conditions of the FlavorGroupCapacity.
// +kubebuilder:validation:Optional
Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"`
}
Comment thread
juliusclausnitzer marked this conversation as resolved.

// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
// +kubebuilder:resource:scope=Cluster
// +kubebuilder:printcolumn:name="FlavorGroup",type="string",JSONPath=".spec.flavorGroup"
// +kubebuilder:printcolumn:name="AZ",type="string",JSONPath=".spec.availabilityZone"
// +kubebuilder:printcolumn:name="TotalInstances",type="integer",JSONPath=".status.totalInstances"
// +kubebuilder:printcolumn:name="LastReconcile",type="date",JSONPath=".status.lastReconcileAt"
// +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].status"

// FlavorGroupCapacity caches pre-computed capacity data for one flavor group in one AZ.
// One CRD exists per (flavor group × AZ) pair, updated by the capacity controller on a fixed interval.
// The capacity API reads these CRDs instead of probing the scheduler on each request.
type FlavorGroupCapacity struct {
metav1.TypeMeta `json:",inline"`

// metadata is a standard object metadata
// +optional
metav1.ObjectMeta `json:"metadata,omitempty,omitzero"`

// spec defines the desired state of FlavorGroupCapacity
// +required
Spec FlavorGroupCapacitySpec `json:"spec"`

// status defines the observed state of FlavorGroupCapacity
// +optional
Status FlavorGroupCapacityStatus `json:"status,omitempty,omitzero"`
}

// +kubebuilder:object:root=true

// FlavorGroupCapacityList contains a list of FlavorGroupCapacity.
type FlavorGroupCapacityList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []FlavorGroupCapacity `json:"items"`
}

func init() {
SchemeBuilder.Register(&FlavorGroupCapacity{}, &FlavorGroupCapacityList{})
}
117 changes: 117 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 24 additions & 0 deletions cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ import (
"github.com/cobaltcore-dev/cortex/internal/scheduling/nova"
"github.com/cobaltcore-dev/cortex/internal/scheduling/pods"
"github.com/cobaltcore-dev/cortex/internal/scheduling/reservations"
"github.com/cobaltcore-dev/cortex/internal/scheduling/reservations/capacity"
"github.com/cobaltcore-dev/cortex/internal/scheduling/reservations/commitments"
commitmentsapi "github.com/cobaltcore-dev/cortex/internal/scheduling/reservations/commitments/api"
"github.com/cobaltcore-dev/cortex/internal/scheduling/reservations/failover"
Expand Down Expand Up @@ -686,6 +687,29 @@ func main() {
"maxVMsToProcess", failoverConfig.MaxVMsToProcess,
"vmSelectionRotationInterval", failoverConfig.VMSelectionRotationInterval)
}
if slices.Contains(mainConfig.EnabledControllers, "capacity-controller") {
setupLog.Info("enabling controller", "controller", "capacity-controller")
capacityConfig := conf.GetConfigOrDie[capacity.Config]()
capacityConfig.ApplyDefaults()

capacityMonitor := capacity.NewMonitor(multiclusterClient)
if err := metrics.Registry.Register(&capacityMonitor); err != nil {
setupLog.Error(err, "failed to register capacity monitor metrics, continuing without metrics")
}

capacityController := capacity.NewController(multiclusterClient, capacityConfig)
if err := mgr.Add(manager.RunnableFunc(func(ctx context.Context) error {
return capacityController.Start(ctx)
})); err != nil {
setupLog.Error(err, "unable to add capacity controller to manager")
os.Exit(1)
}
setupLog.Info("capacity-controller registered",
"schedulerURL", capacityConfig.SchedulerURL,
"reconcileInterval", capacityConfig.ReconcileInterval,
"totalPipeline", capacityConfig.TotalPipeline,
"placeablePipeline", capacityConfig.PlaceablePipeline)
}

// +kubebuilder:scaffold:builder

Expand Down
40 changes: 40 additions & 0 deletions helm/bundles/cortex-nova/templates/pipelines_kvm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -557,4 +557,44 @@ spec:
VM is allocated get a higher weight, encouraging placement on
pre-reserved failover capacity. For non-evacuation requests, this
weigher has no effect.
---
apiVersion: cortex.cloud/v1alpha1
kind: Pipeline
metadata:
name: kvm-report-capacity
spec:
schedulingDomain: nova
description: |
This pipeline is used by the capacity controller to determine the
theoretical maximum capacity of each flavor group per availability zone,
as if all hosts were completely empty. It ignores current VM allocations
and all reservation blockings so that only raw hardware capacity is
considered.
type: filter-weigher
createDecisions: false
# Fetch all placement candidates, ignoring nova's preselection.
ignorePreselection: true
filters:
- name: filter_correct_az
description: |
Restricts host candidates to the requested availability zone.
- name: filter_has_enough_capacity
description: |
Filters hosts that cannot fit the flavor based on raw hardware capacity.
VM allocations and all reservation types are ignored to represent an
empty datacenter scenario.
params:
- {key: ignoreAllocations, boolValue: true}
- {key: ignoredReservationTypes, stringListValue: ["CommittedResourceReservation", "FailoverReservation"]}
- name: filter_has_requested_traits
description: |
Ensures hosts have the hardware traits required by the flavor.
- name: filter_capabilities
description: |
Ensures hosts meet the compute capabilities required by the flavor
extra specs (e.g., architecture, maxphysaddr bits).
- name: filter_status_conditions
description: |
Excludes hosts that are not ready or are disabled.
weighers: []
{{- end }}
9 changes: 9 additions & 0 deletions helm/bundles/cortex-nova/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ cortex: &cortex
- cortex.cloud/v1alpha1/ReservationList
- cortex.cloud/v1alpha1/CommittedResource
- cortex.cloud/v1alpha1/CommittedResourceList
- cortex.cloud/v1alpha1/FlavorGroupCapacity
- cortex.cloud/v1alpha1/FlavorGroupCapacityList
- kvm.cloud.sap/v1/Hypervisor
- kvm.cloud.sap/v1/HypervisorList
- v1/Secret
Expand Down Expand Up @@ -130,6 +132,13 @@ cortex-scheduling-controllers:
- hypervisor-overcommit-controller
- committed-resource-reservations-controller
- failover-reservations-controller
- capacity-controller
# Pipeline used for the empty-state capacity probe (ignores allocations and reservations).
capacityTotalPipeline: "kvm-report-capacity"
# Pipeline used for the current-state capacity probe (considers current VM allocations).
capacityPlaceablePipeline: "kvm-general-purpose-load-balancing"
# How often the capacity controller re-runs its scheduler probes.
capacityReconcileInterval: 5m
enabledTasks:
- nova-history-cleanup-task
# If true, the external scheduler API will limit the list of hosts in its
Expand Down
Loading
Loading