From 6f2a44fccdb21af3a680fac89a40810c764624d4 Mon Sep 17 00:00:00 2001 From: Yan Sun Date: Wed, 18 Mar 2026 17:28:51 -0700 Subject: [PATCH] [Feature] Global image pull secrets injection (#1196) Signed-off-by: yansun1996 --- .wordlist.txt | 1 + Dockerfile | 2 +- api/v1alpha1/deviceconfig_types.go | 7 + api/v1alpha1/zz_generated.deepcopy.go | 5 + ...md-gpu-operator.clusterserviceversion.yaml | 8 + bundle/manifests/amd.com_deviceconfigs.yaml | 22 ++ config/crd/bases/amd.com_deviceconfigs.yaml | 22 ++ ...md-gpu-operator.clusterserviceversion.yaml | 8 + docs/installation/kubernetes-helm.md | 22 +- docs/releasenotes.md | 3 + .../specialized_networks/airgapped-install.md | 6 +- .../gpu-validation-cluster/build/Dockerfile | 2 +- .../k8s-kmm-patch/metadata-patch/values.yaml | 6 +- .../template-patch/deployment.yaml | 26 +- hack/k8s-patch/metadata-patch/Chart.yaml | 2 +- hack/k8s-patch/metadata-patch/values.yaml | 19 ++ .../template-patch/default-deviceconfig.yaml | 10 + hack/k8s-patch/template-patch/deployment.yaml | 7 +- .../template-patch/post-delete-hook.yaml | 7 +- .../template-patch/pre-delete-hook.yaml | 7 +- .../template-patch/pre-upgrade-hook.yaml | 14 +- .../remediation-deployment.yaml | 6 + helm-charts-k8s/Chart.lock | 6 +- helm-charts-k8s/Chart.yaml | 2 +- helm-charts-k8s/README.md | 3 +- .../charts/kmm/templates/deployment.yaml | 26 +- helm-charts-k8s/charts/kmm/values.yaml | 6 +- helm-charts-k8s/crds/deviceconfig-crd.yaml | 22 ++ .../templates/default-deviceconfig.yaml | 10 + helm-charts-k8s/templates/deployment.yaml | 7 +- .../templates/post-delete-hook.yaml | 7 +- .../templates/pre-delete-hook.yaml | 7 +- .../templates/pre-upgrade-hook.yaml | 14 +- .../templates/remediation-deployment.yaml | 6 + helm-charts-k8s/values.yaml | 19 ++ internal/configmanager/configmanager.go | 5 + .../controllers/device_config_reconciler.go | 41 +++ internal/metricsexporter/metricsexporter.go | 5 + internal/nodelabeller/nodelabeller.go | 5 + internal/plugin/plugin.go | 10 + internal/testrunner/testrunner.go | 5 + internal/validator/specValidators.go | 16 + internal/validator/validator.go | 1 + tests/helm-e2e/helm_e2e_test.go | 315 ++++++++++++++++++ 44 files changed, 708 insertions(+), 42 deletions(-) diff --git a/.wordlist.txt b/.wordlist.txt index 2f587f58a..08c23e529 100644 --- a/.wordlist.txt +++ b/.wordlist.txt @@ -82,6 +82,7 @@ hostnames HSIO HTTPS iet +imageRegistrySecrets IfNotPresent IgnoreDaemonSets IgnoreNamespaces diff --git a/Dockerfile b/Dockerfile index 69c101b21..06efd60f9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -32,7 +32,7 @@ COPY LICENSE LICENSE COPY helm-charts-k8s helm-charts-k8s # need to decompress nfd subchart for k8s chart, in preparation for copying out CRD RUN cd helm-charts-k8s/charts && \ - tar -xvzf node-feature-discovery-chart-0.16.1.tgz + tar -xvzf node-feature-discovery-chart-0.18.3.tgz ARG TARGET diff --git a/api/v1alpha1/deviceconfig_types.go b/api/v1alpha1/deviceconfig_types.go index d9059beb8..72c4115e7 100644 --- a/api/v1alpha1/deviceconfig_types.go +++ b/api/v1alpha1/deviceconfig_types.go @@ -944,6 +944,13 @@ type CommonConfigSpec struct { // +optional InitContainerImage string `json:"initContainerImage,omitempty"` + // ImageRegistrySecrets are global secrets used for pull/push images from/to private registries. + // These secrets will be applied to all component pods (device plugin, metrics exporter, + // test runner, config manager, DRA driver, node labeller) in addition to component-specific secrets. + //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="ImageRegistrySecrets",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:imageRegistrySecrets"} + // +optional + ImageRegistrySecrets []v1.LocalObjectReference `json:"imageRegistrySecrets,omitempty"` + // UtilsContainer contains parameters to configure operator's utils container //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="UtilsContainer",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:utilsContainer"} // +optional diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 7007576b9..f29fea96d 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -46,6 +46,11 @@ import ( // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *CommonConfigSpec) DeepCopyInto(out *CommonConfigSpec) { *out = *in + if in.ImageRegistrySecrets != nil { + in, out := &in.ImageRegistrySecrets, &out.ImageRegistrySecrets + *out = make([]v1.LocalObjectReference, len(*in)) + copy(*out, *in) + } in.UtilsContainer.DeepCopyInto(&out.UtilsContainer) } diff --git a/bundle/manifests/amd-gpu-operator.clusterserviceversion.yaml b/bundle/manifests/amd-gpu-operator.clusterserviceversion.yaml index 94133a4d4..790709516 100644 --- a/bundle/manifests/amd-gpu-operator.clusterserviceversion.yaml +++ b/bundle/manifests/amd-gpu-operator.clusterserviceversion.yaml @@ -86,6 +86,14 @@ spec: path: commonConfig x-descriptors: - urn:alm:descriptor:com.amd.deviceconfigs:commonConfig + - description: ImageRegistrySecrets are global secrets used for pull/push images + from/to private registries. These secrets will be applied to all component + pods (device plugin, metrics exporter, test runner, config manager, DRA + driver, node labeller) in addition to component-specific secrets. + displayName: ImageRegistrySecrets + path: commonConfig.imageRegistrySecrets + x-descriptors: + - urn:alm:descriptor:com.amd.deviceconfigs:imageRegistrySecrets - description: InitContainerImage is being used for the operands pods, i.e. metrics exporter, test runner, device plugin, device config manager and node labeller diff --git a/bundle/manifests/amd.com_deviceconfigs.yaml b/bundle/manifests/amd.com_deviceconfigs.yaml index 2712c8855..2737a0a3d 100644 --- a/bundle/manifests/amd.com_deviceconfigs.yaml +++ b/bundle/manifests/amd.com_deviceconfigs.yaml @@ -49,6 +49,28 @@ spec: commonConfig: description: common config properties: + imageRegistrySecrets: + description: |- + ImageRegistrySecrets are global secrets used for pull/push images from/to private registries. + These secrets will be applied to all component pods (device plugin, metrics exporter, + test runner, config manager, DRA driver, node labeller) in addition to component-specific secrets. + items: + description: |- + LocalObjectReference contains enough information to let you locate the + referenced object inside the same namespace. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + type: array initContainerImage: description: InitContainerImage is being used for the operands pods, i.e. metrics exporter, test runner, device plugin, device diff --git a/config/crd/bases/amd.com_deviceconfigs.yaml b/config/crd/bases/amd.com_deviceconfigs.yaml index 4047e3dae..9c862da92 100644 --- a/config/crd/bases/amd.com_deviceconfigs.yaml +++ b/config/crd/bases/amd.com_deviceconfigs.yaml @@ -45,6 +45,28 @@ spec: commonConfig: description: common config properties: + imageRegistrySecrets: + description: |- + ImageRegistrySecrets are global secrets used for pull/push images from/to private registries. + These secrets will be applied to all component pods (device plugin, metrics exporter, + test runner, config manager, DRA driver, node labeller) in addition to component-specific secrets. + items: + description: |- + LocalObjectReference contains enough information to let you locate the + referenced object inside the same namespace. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + type: array initContainerImage: description: InitContainerImage is being used for the operands pods, i.e. metrics exporter, test runner, device plugin, device diff --git a/config/manifests/bases/amd-gpu-operator.clusterserviceversion.yaml b/config/manifests/bases/amd-gpu-operator.clusterserviceversion.yaml index 169090900..77dae3bb0 100644 --- a/config/manifests/bases/amd-gpu-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/amd-gpu-operator.clusterserviceversion.yaml @@ -53,6 +53,14 @@ spec: path: commonConfig x-descriptors: - urn:alm:descriptor:com.amd.deviceconfigs:commonConfig + - description: ImageRegistrySecrets are global secrets used for pull/push images + from/to private registries. These secrets will be applied to all component + pods (device plugin, metrics exporter, test runner, config manager, DRA + driver, node labeller) in addition to component-specific secrets. + displayName: ImageRegistrySecrets + path: commonConfig.imageRegistrySecrets + x-descriptors: + - urn:alm:descriptor:com.amd.deviceconfigs:imageRegistrySecrets - description: InitContainerImage is being used for the operands pods, i.e. metrics exporter, test runner, device plugin, device config manager and node labeller diff --git a/docs/installation/kubernetes-helm.md b/docs/installation/kubernetes-helm.md index b829c6318..7af291cb5 100644 --- a/docs/installation/kubernetes-helm.md +++ b/docs/installation/kubernetes-helm.md @@ -139,7 +139,26 @@ Installation Options ```{tip} 1. Before v1.3.0 the gpu operator helm chart won't provide a default ```DeviceConfig```, you need to take extra step to create a ```DeviceConfig```. -2. Starting from v1.3.0 the ```helm install``` command would support one-step installation + configuration, which would create a default ```DeviceConfig``` with default values, which may not work for all the users with different the deployment scenarios, please refer to {ref}`typical-deployment-scenarios` for more information and get corresponding ```helm install``` commands. +2. Starting from v1.3.0 the ```helm install``` command would support one-step installation + configuration, which would create a default ```DeviceConfig``` with default values, which may not work for all the users with different the deployment scenarios, please refer to {ref}`typical-deployment-scenarios` for more information and get corresponding ```helm install``` commands. + +3. Global Image Pull Secrets (v1.5.0+): If you need to pull images from private registries or avoid Docker Hub rate limits, you can configure global image pull secrets that will be automatically applied to all components: + + ```bash + # Create your image pull secret first + kubectl create secret docker-registry my-registry-secret \ + --docker-server= \ + --docker-username= \ + --docker-password= \ + --namespace=kube-amd-gpu + + # Install with global secret + helm install amd-gpu-operator rocm/gpu-operator-charts \ + --namespace kube-amd-gpu \ + --create-namespace \ + --version=v1.5.0 \ + --set global.imagePullSecrets[0].name=my-registry-secret + ``` + ``` ### 3. Helm Chart Customization Parameters @@ -171,6 +190,7 @@ The following parameters are able to be configued when using the Helm Chart. In | controllerManager.manager.resources.requests.memory | string | `"256Mi"` | Memory requests for the controller manager. Adjust based on observed memory usage | | controllerManager.nodeAffinity.nodeSelectorTerms | list | `[{"key":"node-role.kubernetes.io/control-plane","operator":"Exists"},{"key":"node-role.kubernetes.io/master","operator":"Exists"}]` | Node affinity selector terms config for the AMD GPU operator controller manager, set it to [] if you want to make affinity config empty | | controllerManager.nodeSelector | object | `{}` | Node selector for AMD GPU operator controller manager deployment | +| global.imagePullSecrets | list | `[]` | Global image pull secret(s) applied to all component pods. Automatically inherited by controller, hooks, DeviceConfig components, and KMM. Format: `[{"name": "mySecret"}]` | | installdefaultNFDRule | bool | `true` | Set to true to install default NFD rule for detecting AMD GPU hardware based on pci vendor ID and device ID | | kmm.enabled | bool | `true` | Set to true/false to enable/disable the installation of kernel module management (KMM) operator | | kmm.watch | bool | `true` | Set to true/false to enable/disable GPU operator watching and using KMM resources | diff --git a/docs/releasenotes.md b/docs/releasenotes.md index a19d5d7d0..3fcd8aa0a 100644 --- a/docs/releasenotes.md +++ b/docs/releasenotes.md @@ -12,6 +12,9 @@ - Supports multiple deployment scenarios: use existing KMM installations (`enabled=false, watch=true`), skip KMM entirely for alternative driver solutions (`enabled=false, watch=false`), or install KMM without asking for GPU Operator to use it (`enabled=true, watch=false`) - Fully backward compatible: existing configurations with `kmm.enabled=false` continue to work without changes +- **Node Feature Discovery (NFD) Upgrade** + - Upgraded NFD helm chart dependency from v0.16.1 to v0.18.3 + ## GPU Operator v1.4.1 Release Notes The AMD GPU Operator v1.4.1 release extends platform support to OpenShift v4.20 and Debian 12, and introduces the ability to build `amdgpu` kernel modules directly within air-gapped OpenShift clusters. diff --git a/docs/specialized_networks/airgapped-install.md b/docs/specialized_networks/airgapped-install.md index b745e12c4..a5fdaa3bb 100644 --- a/docs/specialized_networks/airgapped-install.md +++ b/docs/specialized_networks/airgapped-install.md @@ -38,7 +38,7 @@ docker.io/ubuntu: docker.io/busybox:1.36 # Node Feature Discovery -registry.k8s.io/nfd/node-feature-discovery:v0.16.1 +registry.k8s.io/nfd/node-feature-discovery:v0.18.3 # Cert-Manager Images quay.io/jetstack/cert-manager-controller:v1.15.1 @@ -84,7 +84,7 @@ INTERNAL_REGISTRY="internal-registry.example.com" OPERATOR_VERSION="v1.4.1" # GPU operator version, e.g., "v1.5.0" UBUNTU_VERSION="22.04" # e.g., "22.04" KANIKO_VERSION="v1.23.2" -NFD_VERSION="v0.16.1" +NFD_VERSION="v0.18.3" CERT_MANAGER_VERSION="v1.15.1" BUSYBOX_VERSION="1.36" @@ -225,7 +225,7 @@ deviceConfig: node-feature-discovery: image: repository: internal-registry.example.com/nfd/node-feature-discovery - tag: v0.16.1 + tag: v0.18.3 # KMM (Kernel Module Management) image configuration kmm: diff --git a/example/gpu-validation-cluster/build/Dockerfile b/example/gpu-validation-cluster/build/Dockerfile index 6d74ba568..a4c99739b 100644 --- a/example/gpu-validation-cluster/build/Dockerfile +++ b/example/gpu-validation-cluster/build/Dockerfile @@ -4,7 +4,7 @@ FROM ${BASE_IMAGE} ARG K3S_VERSION=v1.35.0+k3s1 ARG MULTUS_CNI_VERSION=v4.2.2 -ARG NFD_VERSION=v0.16.1 +ARG NFD_VERSION=v0.18.3 # Install required system packages RUN apt-get update && apt-get install -y \ diff --git a/hack/k8s-patch/k8s-kmm-patch/metadata-patch/values.yaml b/hack/k8s-patch/k8s-kmm-patch/metadata-patch/values.yaml index 0be1cf5e4..ad5d1d383 100644 --- a/hack/k8s-patch/k8s-kmm-patch/metadata-patch/values.yaml +++ b/hack/k8s-patch/k8s-kmm-patch/metadata-patch/values.yaml @@ -13,9 +13,11 @@ controller: relatedImageWorker: docker.io/rocm/kernel-module-management-worker:latest # -- Image pull secret name for pulling KMM kaniko builder image if registry needs credential to pull image relatedImageBuildPullSecret: "" - # -- Image pull secret name for pulling KMM signer image if registry needs credential to pull image + # -- Image pull secret name for pulling KMM signer image if registry needs credential to pull image. + # If not set and global.imagePullSecrets is configured, the first global secret will be used automatically. relatedImageSignPullSecret: "" - # -- Image pull secret name for pulling KMM worker image if registry needs credential to pull image + # -- Image pull secret name for pulling KMM worker image if registry needs credential to pull image. + # If not set and global.imagePullSecrets is configured, the first global secret will be used automatically. relatedImageWorkerPullSecret: "" image: # -- KMM controller manager image repository diff --git a/hack/k8s-patch/k8s-kmm-patch/template-patch/deployment.yaml b/hack/k8s-patch/k8s-kmm-patch/template-patch/deployment.yaml index a2554ffbc..0cb9246f9 100644 --- a/hack/k8s-patch/k8s-kmm-patch/template-patch/deployment.yaml +++ b/hack/k8s-patch/k8s-kmm-patch/template-patch/deployment.yaml @@ -45,17 +45,17 @@ spec: value: {{ quote .Values.controller.manager.env.relatedImageSign }} - name: KUBERNETES_CLUSTER_DOMAIN value: {{ quote .Values.kubernetesClusterDomain }} - {{- if .Values.controller.manager.env.relatedImageBuildPullSecret }} + {{- if or .Values.controller.manager.env.relatedImageBuildPullSecret .Values.global.imagePullSecrets }} - name: RELATED_IMAGE_BUILD_PULL_SECRET - value: {{ .Values.controller.manager.env.relatedImageBuildPullSecret }} + value: {{ .Values.controller.manager.env.relatedImageBuildPullSecret | default (index .Values.global.imagePullSecrets 0).name | default "" }} {{- end}} - {{- if .Values.controller.manager.env.relatedImageSignPullSecret }} + {{- if or .Values.controller.manager.env.relatedImageSignPullSecret .Values.global.imagePullSecrets }} - name: RELATED_IMAGE_SIGN_PULL_SECRET - value: {{ .Values.controller.manager.env.relatedImageSignPullSecret }} + value: {{ .Values.controller.manager.env.relatedImageSignPullSecret | default (index .Values.global.imagePullSecrets 0).name | default "" }} {{- end}} - {{- if .Values.controller.manager.env.relatedImageWorkerPullSecret }} + {{- if or .Values.controller.manager.env.relatedImageWorkerPullSecret .Values.global.imagePullSecrets }} - name: RELATED_IMAGE_WORKER_PULL_SECRET - value: {{ .Values.controller.manager.env.relatedImageWorkerPullSecret }} + value: {{ .Values.controller.manager.env.relatedImageWorkerPullSecret | default (index .Values.global.imagePullSecrets 0).name | default "" }} {{- end}} {{- if .Values.global.proxy.env | default dict}} {{- range $key, $value := .Values.global.proxy.env }} @@ -90,9 +90,14 @@ spec: - mountPath: /controller_config.yaml name: manager-config subPath: controller_config.yaml - {{- if .Values.controller.manager.imagePullSecrets }} + {{- if or .Values.global.imagePullSecrets .Values.controller.manager.imagePullSecrets }} imagePullSecrets: + {{- range .Values.global.imagePullSecrets }} + - {{ toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.controller.manager.imagePullSecrets }} - name: {{ .Values.controller.manager.imagePullSecrets }} + {{- end }} {{- end}} securityContext: runAsNonRoot: true @@ -184,9 +189,14 @@ spec: - mountPath: /controller_config.yaml name: manager-config subPath: controller_config.yaml - {{- if .Values.webhookServer.webhookServer.imagePullSecrets }} + {{- if or .Values.global.imagePullSecrets .Values.webhookServer.webhookServer.imagePullSecrets }} imagePullSecrets: + {{- range .Values.global.imagePullSecrets }} + - {{ toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.webhookServer.webhookServer.imagePullSecrets }} - name: {{ .Values.webhookServer.webhookServer.imagePullSecrets }} + {{- end }} {{- end}} securityContext: runAsNonRoot: true diff --git a/hack/k8s-patch/metadata-patch/Chart.yaml b/hack/k8s-patch/metadata-patch/Chart.yaml index 41afb478b..607e609cf 100644 --- a/hack/k8s-patch/metadata-patch/Chart.yaml +++ b/hack/k8s-patch/metadata-patch/Chart.yaml @@ -24,7 +24,7 @@ appVersion: "v1.4.0" dependencies: - name: node-feature-discovery - version: v0.16.1 + version: v0.18.3 repository: "https://kubernetes-sigs.github.io/node-feature-discovery/charts" condition: node-feature-discovery.enabled - name: kmm diff --git a/hack/k8s-patch/metadata-patch/values.yaml b/hack/k8s-patch/metadata-patch/values.yaml index e050da3c0..4732ad7fd 100644 --- a/hack/k8s-patch/metadata-patch/values.yaml +++ b/hack/k8s-patch/metadata-patch/values.yaml @@ -1,5 +1,7 @@ # NFD related configs # schema reference: https://github.com/kubernetes-sigs/node-feature-discovery/blob/release-0.16/deployment/helm/node-feature-discovery/values.yaml +# Note: To use global secrets, set imagePullSecrets of NFD subchart itself, global.imagePullSecrets will not be automatically inherited by NFD subchart. +# Example: node-feature-discovery.imagePullSecrets: [{"name": "my-secret"}] node-feature-discovery: # -- Set to true/false to enable/disable the installation of node feature discovery (NFD) operator enabled: true @@ -16,6 +18,9 @@ node-feature-discovery: # -- Set nodeSelector for NFD worker daemonset nodeSelector: {} # KMM related configs +# Note: KMM automatically inherits global.imagePullSecrets. You can override or supplement +# with component-specific secrets using controller.manager.imagePullSecrets and +# webhookServer.webhookServer.imagePullSecrets kmm: # -- Set to true/false to enable/disable the installation of kernel module management (KMM) operator subchart enabled: true @@ -372,5 +377,19 @@ utilsContainer: serviceAccount: annotations: {} global: + # -- Global image pull secret(s) applied to all component pods and subcharts. + # If specified, these secrets will be used by: + # - GPU operator controller manager deployment + # - Remediation workflow controller + # - All helm hooks (pre-upgrade, pre-delete, post-delete) + # - DeviceConfig-managed components (via commonConfig.imageRegistrySecrets) + # - KMM controller and webhook pods (automatically inherited) + # - KMM builder/signer/worker pods (automatically uses first secret as fallback) + # + # Format: [{"name": "myGlobalSecret"}] or [{"name": "secret1"}, {"name": "secret2"}] + # + # Note: For NFD subchart, you must manually set the field to match global secrets: + # node-feature-discovery.imagePullSecrets: [{"name": "myGlobalSecret"}] + imagePullSecrets: [] proxy: env: {} diff --git a/hack/k8s-patch/template-patch/default-deviceconfig.yaml b/hack/k8s-patch/template-patch/default-deviceconfig.yaml index 326d2e1b5..52c57b529 100644 --- a/hack/k8s-patch/template-patch/default-deviceconfig.yaml +++ b/hack/k8s-patch/template-patch/default-deviceconfig.yaml @@ -107,6 +107,16 @@ spec: initContainerImage: {{ . }} {{- end }} + {{- if or .imageRegistrySecrets $.Values.global.imagePullSecrets }} + imageRegistrySecrets: + {{- if $.Values.global.imagePullSecrets }} + {{- toYaml $.Values.global.imagePullSecrets | nindent 6 }} + {{- end }} + {{- if .imageRegistrySecrets }} + {{- toYaml .imageRegistrySecrets | nindent 6 }} + {{- end }} + {{- end }} + {{- with .utilsContainer }} utilsContainer: {{- with .image }} diff --git a/hack/k8s-patch/template-patch/deployment.yaml b/hack/k8s-patch/template-patch/deployment.yaml index 0bd56c848..d12ccf2d5 100644 --- a/hack/k8s-patch/template-patch/deployment.yaml +++ b/hack/k8s-patch/template-patch/deployment.yaml @@ -67,9 +67,14 @@ spec: - mountPath: /controller_manager_config.yaml name: manager-config subPath: controller_manager_config.yaml - {{- if .Values.controllerManager.manager.imagePullSecrets }} + {{- if or .Values.global.imagePullSecrets .Values.controllerManager.manager.imagePullSecrets }} imagePullSecrets: + {{- range .Values.global.imagePullSecrets }} + - {{ toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.controllerManager.manager.imagePullSecrets }} - name: {{ .Values.controllerManager.manager.imagePullSecrets }} + {{- end }} {{- end}} securityContext: runAsNonRoot: true diff --git a/hack/k8s-patch/template-patch/post-delete-hook.yaml b/hack/k8s-patch/template-patch/post-delete-hook.yaml index a85892994..5343d8c0a 100644 --- a/hack/k8s-patch/template-patch/post-delete-hook.yaml +++ b/hack/k8s-patch/template-patch/post-delete-hook.yaml @@ -131,10 +131,15 @@ spec: kubectl delete crds nodemodulesconfigs.kmm.sigs.x-k8s.io fi {{- end }} - {{- if .Values.controllerManager.manager.imagePullSecrets }} + {{- if or .Values.global.imagePullSecrets .Values.controllerManager.manager.imagePullSecrets }} imagePullSecrets: + {{- range .Values.global.imagePullSecrets }} + - {{ toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.controllerManager.manager.imagePullSecrets }} - name: {{ .Values.controllerManager.manager.imagePullSecrets }} {{- end }} + {{- end }} {{- with .Values.controllerManager.manager.tolerations }} tolerations: {{- toYaml . | nindent 8 }} diff --git a/hack/k8s-patch/template-patch/pre-delete-hook.yaml b/hack/k8s-patch/template-patch/pre-delete-hook.yaml index 381e78d73..ee4dab341 100644 --- a/hack/k8s-patch/template-patch/pre-delete-hook.yaml +++ b/hack/k8s-patch/template-patch/pre-delete-hook.yaml @@ -86,9 +86,14 @@ spec: fi # Delete all existing DeviceConfig custom resources kubectl delete deviceconfigs.amd.com --all -A - {{- if .Values.controllerManager.manager.imagePullSecrets }} + {{- if or .Values.global.imagePullSecrets .Values.controllerManager.manager.imagePullSecrets }} imagePullSecrets: + {{- range .Values.global.imagePullSecrets }} + - {{ toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.controllerManager.manager.imagePullSecrets }} - name: {{ .Values.controllerManager.manager.imagePullSecrets }} + {{- end }} {{- end}} {{- with .Values.controllerManager.manager.tolerations }} tolerations: diff --git a/hack/k8s-patch/template-patch/pre-upgrade-hook.yaml b/hack/k8s-patch/template-patch/pre-upgrade-hook.yaml index 639f06212..40cf7c2df 100644 --- a/hack/k8s-patch/template-patch/pre-upgrade-hook.yaml +++ b/hack/k8s-patch/template-patch/pre-upgrade-hook.yaml @@ -105,10 +105,15 @@ spec: echo "All DeviceConfigs are in an allowed state. Proceeding with upgrade." exit 0 fi - {{- if .Values.controllerManager.manager.imagePullSecrets }} + {{- if or .Values.global.imagePullSecrets .Values.controllerManager.manager.imagePullSecrets }} imagePullSecrets: + {{- range .Values.global.imagePullSecrets }} + - {{ toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.controllerManager.manager.imagePullSecrets }} - name: {{ .Values.controllerManager.manager.imagePullSecrets }} {{- end }} + {{- end }} {{- with .Values.controllerManager.manager.tolerations }} tolerations: {{- toYaml . | nindent 8 }} @@ -196,10 +201,15 @@ spec: name: upgrade-crd spec: serviceAccountName: upgrade-crd-hook-sa - {{- if .Values.controllerManager.manager.imagePullSecrets }} + {{- if or .Values.global.imagePullSecrets .Values.controllerManager.manager.imagePullSecrets }} imagePullSecrets: + {{- range .Values.global.imagePullSecrets }} + - {{ toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.controllerManager.manager.imagePullSecrets }} - name: {{ .Values.controllerManager.manager.imagePullSecrets }} {{- end }} + {{- end }} {{- with .Values.controllerManager.manager.tolerations }} tolerations: {{- toYaml . | nindent 8 }} diff --git a/hack/k8s-patch/template-patch/remediation-deployment.yaml b/hack/k8s-patch/template-patch/remediation-deployment.yaml index 9aaf468cb..d84b9b63c 100644 --- a/hack/k8s-patch/template-patch/remediation-deployment.yaml +++ b/hack/k8s-patch/template-patch/remediation-deployment.yaml @@ -337,6 +337,12 @@ spec: securityContext: runAsNonRoot: true serviceAccountName: argo + {{- if .Values.global.imagePullSecrets }} + imagePullSecrets: + {{- range .Values.global.imagePullSecrets }} + - {{ toYaml . | nindent 8 }} + {{- end }} + {{- end }} tolerations: - key: "amd-gpu-unhealthy" operator: "Exists" diff --git a/helm-charts-k8s/Chart.lock b/helm-charts-k8s/Chart.lock index 309e66153..f0c1462ba 100644 --- a/helm-charts-k8s/Chart.lock +++ b/helm-charts-k8s/Chart.lock @@ -1,12 +1,12 @@ dependencies: - name: node-feature-discovery repository: https://kubernetes-sigs.github.io/node-feature-discovery/charts - version: 0.16.1 + version: 0.18.3 - name: kmm repository: file://./charts/kmm version: v1.0.0 - name: remediation-crds repository: file://./charts/remediation-crds version: v1.0.0 -digest: sha256:4c6b1f3224839e54d1523759be597d20ca2fc6508eb17fda2992a95a00e1fd70 -generated: "2026-03-11T23:36:30.598519294Z" +digest: sha256:0806f6b6d7aa21be77bf1c91e720ae3238338a16f107df450a53b02ef940db1b +generated: "2026-03-16T23:06:17.340757063Z" diff --git a/helm-charts-k8s/Chart.yaml b/helm-charts-k8s/Chart.yaml index 41afb478b..607e609cf 100644 --- a/helm-charts-k8s/Chart.yaml +++ b/helm-charts-k8s/Chart.yaml @@ -24,7 +24,7 @@ appVersion: "v1.4.0" dependencies: - name: node-feature-discovery - version: v0.16.1 + version: v0.18.3 repository: "https://kubernetes-sigs.github.io/node-feature-discovery/charts" condition: node-feature-discovery.enabled - name: kmm diff --git a/helm-charts-k8s/README.md b/helm-charts-k8s/README.md index 5aebfd7b3..ecb044385 100644 --- a/helm-charts-k8s/README.md +++ b/helm-charts-k8s/README.md @@ -149,12 +149,13 @@ Kubernetes: `>= 1.29.0-0` |------------|------|---------| | file://./charts/kmm | kmm | v1.0.0 | | file://./charts/remediation-crds | remediation-crds | v1.0.0 | -| https://kubernetes-sigs.github.io/node-feature-discovery/charts | node-feature-discovery | v0.16.1 | +| https://kubernetes-sigs.github.io/node-feature-discovery/charts | node-feature-discovery | v0.18.3 | ## Values | Key | Type | Default | Description | |-----|------|---------|-------------| +| global.imagePullSecrets | list | `[]` | Global image pull secret(s) applied to all component pods and subcharts. If specified, these secrets will be used by: - GPU operator controller manager deployment - Remediation workflow controller - All helm hooks (pre-upgrade, pre-delete, post-delete) - DeviceConfig-managed components (via commonConfig.imageRegistrySecrets) - KMM controller and webhook pods (automatically inherited) - KMM builder/signer/worker pods (automatically uses first secret as fallback) Format: [{"name": "myGlobalSecret"}] or [{"name": "secret1"}, {"name": "secret2"}] Note: For NFD subchart, you must manually set the field to match global secrets: node-feature-discovery.imagePullSecrets: [{"name": "myGlobalSecret"}] | | controllerManager.affinity | object | `{"nodeAffinity":{"preferredDuringSchedulingIgnoredDuringExecution":[{"preference":{"matchExpressions":[{"key":"node-role.kubernetes.io/control-plane","operator":"Exists"}]},"weight":1}]}}` | Deployment affinity configs for controller manager | | controllerManager.manager.image.repository | string | `"docker.io/rocm/gpu-operator"` | AMD GPU operator controller manager image repository | | controllerManager.manager.image.tag | string | `"dev"` | AMD GPU operator controller manager image tag | diff --git a/helm-charts-k8s/charts/kmm/templates/deployment.yaml b/helm-charts-k8s/charts/kmm/templates/deployment.yaml index a2554ffbc..0cb9246f9 100644 --- a/helm-charts-k8s/charts/kmm/templates/deployment.yaml +++ b/helm-charts-k8s/charts/kmm/templates/deployment.yaml @@ -45,17 +45,17 @@ spec: value: {{ quote .Values.controller.manager.env.relatedImageSign }} - name: KUBERNETES_CLUSTER_DOMAIN value: {{ quote .Values.kubernetesClusterDomain }} - {{- if .Values.controller.manager.env.relatedImageBuildPullSecret }} + {{- if or .Values.controller.manager.env.relatedImageBuildPullSecret .Values.global.imagePullSecrets }} - name: RELATED_IMAGE_BUILD_PULL_SECRET - value: {{ .Values.controller.manager.env.relatedImageBuildPullSecret }} + value: {{ .Values.controller.manager.env.relatedImageBuildPullSecret | default (index .Values.global.imagePullSecrets 0).name | default "" }} {{- end}} - {{- if .Values.controller.manager.env.relatedImageSignPullSecret }} + {{- if or .Values.controller.manager.env.relatedImageSignPullSecret .Values.global.imagePullSecrets }} - name: RELATED_IMAGE_SIGN_PULL_SECRET - value: {{ .Values.controller.manager.env.relatedImageSignPullSecret }} + value: {{ .Values.controller.manager.env.relatedImageSignPullSecret | default (index .Values.global.imagePullSecrets 0).name | default "" }} {{- end}} - {{- if .Values.controller.manager.env.relatedImageWorkerPullSecret }} + {{- if or .Values.controller.manager.env.relatedImageWorkerPullSecret .Values.global.imagePullSecrets }} - name: RELATED_IMAGE_WORKER_PULL_SECRET - value: {{ .Values.controller.manager.env.relatedImageWorkerPullSecret }} + value: {{ .Values.controller.manager.env.relatedImageWorkerPullSecret | default (index .Values.global.imagePullSecrets 0).name | default "" }} {{- end}} {{- if .Values.global.proxy.env | default dict}} {{- range $key, $value := .Values.global.proxy.env }} @@ -90,9 +90,14 @@ spec: - mountPath: /controller_config.yaml name: manager-config subPath: controller_config.yaml - {{- if .Values.controller.manager.imagePullSecrets }} + {{- if or .Values.global.imagePullSecrets .Values.controller.manager.imagePullSecrets }} imagePullSecrets: + {{- range .Values.global.imagePullSecrets }} + - {{ toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.controller.manager.imagePullSecrets }} - name: {{ .Values.controller.manager.imagePullSecrets }} + {{- end }} {{- end}} securityContext: runAsNonRoot: true @@ -184,9 +189,14 @@ spec: - mountPath: /controller_config.yaml name: manager-config subPath: controller_config.yaml - {{- if .Values.webhookServer.webhookServer.imagePullSecrets }} + {{- if or .Values.global.imagePullSecrets .Values.webhookServer.webhookServer.imagePullSecrets }} imagePullSecrets: + {{- range .Values.global.imagePullSecrets }} + - {{ toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.webhookServer.webhookServer.imagePullSecrets }} - name: {{ .Values.webhookServer.webhookServer.imagePullSecrets }} + {{- end }} {{- end}} securityContext: runAsNonRoot: true diff --git a/helm-charts-k8s/charts/kmm/values.yaml b/helm-charts-k8s/charts/kmm/values.yaml index 0be1cf5e4..ad5d1d383 100644 --- a/helm-charts-k8s/charts/kmm/values.yaml +++ b/helm-charts-k8s/charts/kmm/values.yaml @@ -13,9 +13,11 @@ controller: relatedImageWorker: docker.io/rocm/kernel-module-management-worker:latest # -- Image pull secret name for pulling KMM kaniko builder image if registry needs credential to pull image relatedImageBuildPullSecret: "" - # -- Image pull secret name for pulling KMM signer image if registry needs credential to pull image + # -- Image pull secret name for pulling KMM signer image if registry needs credential to pull image. + # If not set and global.imagePullSecrets is configured, the first global secret will be used automatically. relatedImageSignPullSecret: "" - # -- Image pull secret name for pulling KMM worker image if registry needs credential to pull image + # -- Image pull secret name for pulling KMM worker image if registry needs credential to pull image. + # If not set and global.imagePullSecrets is configured, the first global secret will be used automatically. relatedImageWorkerPullSecret: "" image: # -- KMM controller manager image repository diff --git a/helm-charts-k8s/crds/deviceconfig-crd.yaml b/helm-charts-k8s/crds/deviceconfig-crd.yaml index c1a242720..05cf63844 100644 --- a/helm-charts-k8s/crds/deviceconfig-crd.yaml +++ b/helm-charts-k8s/crds/deviceconfig-crd.yaml @@ -54,6 +54,28 @@ spec: commonConfig: description: common config properties: + imageRegistrySecrets: + description: |- + ImageRegistrySecrets are global secrets used for pull/push images from/to private registries. + These secrets will be applied to all component pods (device plugin, metrics exporter, + test runner, config manager, DRA driver, node labeller) in addition to component-specific secrets. + items: + description: |- + LocalObjectReference contains enough information to let you locate the + referenced object inside the same namespace. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + type: array initContainerImage: description: InitContainerImage is being used for the operands pods, i.e. metrics exporter, test runner, device plugin, device config diff --git a/helm-charts-k8s/templates/default-deviceconfig.yaml b/helm-charts-k8s/templates/default-deviceconfig.yaml index 326d2e1b5..52c57b529 100644 --- a/helm-charts-k8s/templates/default-deviceconfig.yaml +++ b/helm-charts-k8s/templates/default-deviceconfig.yaml @@ -107,6 +107,16 @@ spec: initContainerImage: {{ . }} {{- end }} + {{- if or .imageRegistrySecrets $.Values.global.imagePullSecrets }} + imageRegistrySecrets: + {{- if $.Values.global.imagePullSecrets }} + {{- toYaml $.Values.global.imagePullSecrets | nindent 6 }} + {{- end }} + {{- if .imageRegistrySecrets }} + {{- toYaml .imageRegistrySecrets | nindent 6 }} + {{- end }} + {{- end }} + {{- with .utilsContainer }} utilsContainer: {{- with .image }} diff --git a/helm-charts-k8s/templates/deployment.yaml b/helm-charts-k8s/templates/deployment.yaml index 0bd56c848..d12ccf2d5 100644 --- a/helm-charts-k8s/templates/deployment.yaml +++ b/helm-charts-k8s/templates/deployment.yaml @@ -67,9 +67,14 @@ spec: - mountPath: /controller_manager_config.yaml name: manager-config subPath: controller_manager_config.yaml - {{- if .Values.controllerManager.manager.imagePullSecrets }} + {{- if or .Values.global.imagePullSecrets .Values.controllerManager.manager.imagePullSecrets }} imagePullSecrets: + {{- range .Values.global.imagePullSecrets }} + - {{ toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.controllerManager.manager.imagePullSecrets }} - name: {{ .Values.controllerManager.manager.imagePullSecrets }} + {{- end }} {{- end}} securityContext: runAsNonRoot: true diff --git a/helm-charts-k8s/templates/post-delete-hook.yaml b/helm-charts-k8s/templates/post-delete-hook.yaml index a85892994..5343d8c0a 100644 --- a/helm-charts-k8s/templates/post-delete-hook.yaml +++ b/helm-charts-k8s/templates/post-delete-hook.yaml @@ -131,10 +131,15 @@ spec: kubectl delete crds nodemodulesconfigs.kmm.sigs.x-k8s.io fi {{- end }} - {{- if .Values.controllerManager.manager.imagePullSecrets }} + {{- if or .Values.global.imagePullSecrets .Values.controllerManager.manager.imagePullSecrets }} imagePullSecrets: + {{- range .Values.global.imagePullSecrets }} + - {{ toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.controllerManager.manager.imagePullSecrets }} - name: {{ .Values.controllerManager.manager.imagePullSecrets }} {{- end }} + {{- end }} {{- with .Values.controllerManager.manager.tolerations }} tolerations: {{- toYaml . | nindent 8 }} diff --git a/helm-charts-k8s/templates/pre-delete-hook.yaml b/helm-charts-k8s/templates/pre-delete-hook.yaml index 381e78d73..ee4dab341 100644 --- a/helm-charts-k8s/templates/pre-delete-hook.yaml +++ b/helm-charts-k8s/templates/pre-delete-hook.yaml @@ -86,9 +86,14 @@ spec: fi # Delete all existing DeviceConfig custom resources kubectl delete deviceconfigs.amd.com --all -A - {{- if .Values.controllerManager.manager.imagePullSecrets }} + {{- if or .Values.global.imagePullSecrets .Values.controllerManager.manager.imagePullSecrets }} imagePullSecrets: + {{- range .Values.global.imagePullSecrets }} + - {{ toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.controllerManager.manager.imagePullSecrets }} - name: {{ .Values.controllerManager.manager.imagePullSecrets }} + {{- end }} {{- end}} {{- with .Values.controllerManager.manager.tolerations }} tolerations: diff --git a/helm-charts-k8s/templates/pre-upgrade-hook.yaml b/helm-charts-k8s/templates/pre-upgrade-hook.yaml index 639f06212..40cf7c2df 100644 --- a/helm-charts-k8s/templates/pre-upgrade-hook.yaml +++ b/helm-charts-k8s/templates/pre-upgrade-hook.yaml @@ -105,10 +105,15 @@ spec: echo "All DeviceConfigs are in an allowed state. Proceeding with upgrade." exit 0 fi - {{- if .Values.controllerManager.manager.imagePullSecrets }} + {{- if or .Values.global.imagePullSecrets .Values.controllerManager.manager.imagePullSecrets }} imagePullSecrets: + {{- range .Values.global.imagePullSecrets }} + - {{ toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.controllerManager.manager.imagePullSecrets }} - name: {{ .Values.controllerManager.manager.imagePullSecrets }} {{- end }} + {{- end }} {{- with .Values.controllerManager.manager.tolerations }} tolerations: {{- toYaml . | nindent 8 }} @@ -196,10 +201,15 @@ spec: name: upgrade-crd spec: serviceAccountName: upgrade-crd-hook-sa - {{- if .Values.controllerManager.manager.imagePullSecrets }} + {{- if or .Values.global.imagePullSecrets .Values.controllerManager.manager.imagePullSecrets }} imagePullSecrets: + {{- range .Values.global.imagePullSecrets }} + - {{ toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.controllerManager.manager.imagePullSecrets }} - name: {{ .Values.controllerManager.manager.imagePullSecrets }} {{- end }} + {{- end }} {{- with .Values.controllerManager.manager.tolerations }} tolerations: {{- toYaml . | nindent 8 }} diff --git a/helm-charts-k8s/templates/remediation-deployment.yaml b/helm-charts-k8s/templates/remediation-deployment.yaml index 9aaf468cb..d84b9b63c 100644 --- a/helm-charts-k8s/templates/remediation-deployment.yaml +++ b/helm-charts-k8s/templates/remediation-deployment.yaml @@ -337,6 +337,12 @@ spec: securityContext: runAsNonRoot: true serviceAccountName: argo + {{- if .Values.global.imagePullSecrets }} + imagePullSecrets: + {{- range .Values.global.imagePullSecrets }} + - {{ toYaml . | nindent 8 }} + {{- end }} + {{- end }} tolerations: - key: "amd-gpu-unhealthy" operator: "Exists" diff --git a/helm-charts-k8s/values.yaml b/helm-charts-k8s/values.yaml index e050da3c0..4732ad7fd 100644 --- a/helm-charts-k8s/values.yaml +++ b/helm-charts-k8s/values.yaml @@ -1,5 +1,7 @@ # NFD related configs # schema reference: https://github.com/kubernetes-sigs/node-feature-discovery/blob/release-0.16/deployment/helm/node-feature-discovery/values.yaml +# Note: To use global secrets, set imagePullSecrets of NFD subchart itself, global.imagePullSecrets will not be automatically inherited by NFD subchart. +# Example: node-feature-discovery.imagePullSecrets: [{"name": "my-secret"}] node-feature-discovery: # -- Set to true/false to enable/disable the installation of node feature discovery (NFD) operator enabled: true @@ -16,6 +18,9 @@ node-feature-discovery: # -- Set nodeSelector for NFD worker daemonset nodeSelector: {} # KMM related configs +# Note: KMM automatically inherits global.imagePullSecrets. You can override or supplement +# with component-specific secrets using controller.manager.imagePullSecrets and +# webhookServer.webhookServer.imagePullSecrets kmm: # -- Set to true/false to enable/disable the installation of kernel module management (KMM) operator subchart enabled: true @@ -372,5 +377,19 @@ utilsContainer: serviceAccount: annotations: {} global: + # -- Global image pull secret(s) applied to all component pods and subcharts. + # If specified, these secrets will be used by: + # - GPU operator controller manager deployment + # - Remediation workflow controller + # - All helm hooks (pre-upgrade, pre-delete, post-delete) + # - DeviceConfig-managed components (via commonConfig.imageRegistrySecrets) + # - KMM controller and webhook pods (automatically inherited) + # - KMM builder/signer/worker pods (automatically uses first secret as fallback) + # + # Format: [{"name": "myGlobalSecret"}] or [{"name": "secret1"}, {"name": "secret2"}] + # + # Note: For NFD subchart, you must manually set the field to match global secrets: + # node-feature-discovery.imagePullSecrets: [{"name": "myGlobalSecret"}] + imagePullSecrets: [] proxy: env: {} diff --git a/internal/configmanager/configmanager.go b/internal/configmanager/configmanager.go index bb34e94fb..6ab5c41a9 100644 --- a/internal/configmanager/configmanager.go +++ b/internal/configmanager/configmanager.go @@ -284,6 +284,11 @@ func (nl *configManager) SetConfigManagerAsDesired(ds *appsv1.DaemonSet, devConf } imagePullSecrets := []v1.LocalObjectReference{} + // Add global secrets first + if len(devConfig.Spec.CommonConfig.ImageRegistrySecrets) > 0 { + imagePullSecrets = append(imagePullSecrets, devConfig.Spec.CommonConfig.ImageRegistrySecrets...) + } + // Add component-specific secret if trSpec.ImageRegistrySecret != nil { imagePullSecrets = append(imagePullSecrets, *trSpec.ImageRegistrySecret) } diff --git a/internal/controllers/device_config_reconciler.go b/internal/controllers/device_config_reconciler.go index 8f9856e25..0fa9fa1c1 100644 --- a/internal/controllers/device_config_reconciler.go +++ b/internal/controllers/device_config_reconciler.go @@ -522,6 +522,18 @@ func (drch *deviceConfigReconcilerHelper) findDeviceConfigsForSecret(ctx context } func (dcrh *deviceConfigReconcilerHelper) hasSecretReference(secretName string, dcfg amdv1alpha1.DeviceConfig) bool { + // Check global secrets + for _, secret := range dcfg.Spec.CommonConfig.ImageRegistrySecrets { + if secret.Name == secretName { + return true + } + } + // Check utils container image registry secret under CommonConfig + if dcfg.Spec.CommonConfig.UtilsContainer.ImageRegistrySecret != nil && + dcfg.Spec.CommonConfig.UtilsContainer.ImageRegistrySecret.Name == secretName { + return true + } + // these secrets are KMM driver build/sign/install related secrets // wrong configuration of them is hard to debug unless dumping logs // when their secrets are corrected up and a secret event kicks in @@ -535,6 +547,35 @@ func (dcrh *deviceConfigReconcilerHelper) hasSecretReference(secretName string, if dcfg.Spec.Driver.ImageSign.CertSecret != nil && dcfg.Spec.Driver.ImageSign.CertSecret.Name == secretName { return true } + + // Check component-specific secrets + if dcfg.Spec.DevicePlugin.ImageRegistrySecret != nil && dcfg.Spec.DevicePlugin.ImageRegistrySecret.Name == secretName { + return true + } + if dcfg.Spec.MetricsExporter.ImageRegistrySecret != nil && dcfg.Spec.MetricsExporter.ImageRegistrySecret.Name == secretName { + return true + } + // Check MetricsExporter RBAC secret + if dcfg.Spec.MetricsExporter.RbacConfig.Secret != nil && + dcfg.Spec.MetricsExporter.RbacConfig.Secret.Name == secretName { + return true + } + if dcfg.Spec.TestRunner.ImageRegistrySecret != nil && dcfg.Spec.TestRunner.ImageRegistrySecret.Name == secretName { + return true + } + // Check TestRunner logs export secrets + for _, secret := range dcfg.Spec.TestRunner.LogsLocation.LogsExportSecrets { + if secret != nil && secret.Name == secretName { + return true + } + } + if dcfg.Spec.ConfigManager.ImageRegistrySecret != nil && dcfg.Spec.ConfigManager.ImageRegistrySecret.Name == secretName { + return true + } + if dcfg.Spec.DRADriver.ImageRegistrySecret != nil && dcfg.Spec.DRADriver.ImageRegistrySecret.Name == secretName { + return true + } + return false } diff --git a/internal/metricsexporter/metricsexporter.go b/internal/metricsexporter/metricsexporter.go index 1b812f50e..23841d2f2 100644 --- a/internal/metricsexporter/metricsexporter.go +++ b/internal/metricsexporter/metricsexporter.go @@ -276,6 +276,11 @@ func (nl *metricsExporter) SetMetricsExporterAsDesired(ds *appsv1.DaemonSet, dev } imagePullSecrets := []v1.LocalObjectReference{} + // Add global secrets first + if len(devConfig.Spec.CommonConfig.ImageRegistrySecrets) > 0 { + imagePullSecrets = append(imagePullSecrets, devConfig.Spec.CommonConfig.ImageRegistrySecrets...) + } + // Add component-specific secret if mSpec.ImageRegistrySecret != nil { imagePullSecrets = append(imagePullSecrets, *mSpec.ImageRegistrySecret) } diff --git a/internal/nodelabeller/nodelabeller.go b/internal/nodelabeller/nodelabeller.go index d2bfc7e82..00ea31f93 100644 --- a/internal/nodelabeller/nodelabeller.go +++ b/internal/nodelabeller/nodelabeller.go @@ -157,6 +157,11 @@ func (nl *nodeLabeller) SetNodeLabellerAsDesired(ds *appsv1.DaemonSet, devConfig } imagePullSecrets := []v1.LocalObjectReference{} + // Add global secrets first + if len(devConfig.Spec.CommonConfig.ImageRegistrySecrets) > 0 { + imagePullSecrets = append(imagePullSecrets, devConfig.Spec.CommonConfig.ImageRegistrySecrets...) + } + // Add component-specific secret if devConfig.Spec.DevicePlugin.ImageRegistrySecret != nil { imagePullSecrets = append(imagePullSecrets, *devConfig.Spec.DevicePlugin.ImageRegistrySecret) } diff --git a/internal/plugin/plugin.go b/internal/plugin/plugin.go index 5bc1e17a7..966bd09f8 100644 --- a/internal/plugin/plugin.go +++ b/internal/plugin/plugin.go @@ -134,6 +134,11 @@ func (dp *devicePlugin) SetDevicePluginAsDesired(ds *appsv1.DaemonSet, devConfig nodeSelector[kmmLabels.GetKernelModuleReadyNodeLabel(devConfig.Namespace, devConfig.Name)] = "" } imagePullSecrets := []v1.LocalObjectReference{} + // Add global secrets first + if len(devConfig.Spec.CommonConfig.ImageRegistrySecrets) > 0 { + imagePullSecrets = append(imagePullSecrets, devConfig.Spec.CommonConfig.ImageRegistrySecrets...) + } + // Add component-specific secret if devConfig.Spec.DevicePlugin.ImageRegistrySecret != nil { imagePullSecrets = append(imagePullSecrets, *devConfig.Spec.DevicePlugin.ImageRegistrySecret) } @@ -306,6 +311,11 @@ func (dp *devicePlugin) SetDRADriverAsDesired(ds *appsv1.DaemonSet, devConfig *a } imagePullSecrets := []v1.LocalObjectReference{} + // Add global secrets first + if len(devConfig.Spec.CommonConfig.ImageRegistrySecrets) > 0 { + imagePullSecrets = append(imagePullSecrets, devConfig.Spec.CommonConfig.ImageRegistrySecrets...) + } + // Add component-specific secret if devConfig.Spec.DRADriver.ImageRegistrySecret != nil { imagePullSecrets = append(imagePullSecrets, *devConfig.Spec.DRADriver.ImageRegistrySecret) } diff --git a/internal/testrunner/testrunner.go b/internal/testrunner/testrunner.go index ab54a58fa..0d3a76a4c 100644 --- a/internal/testrunner/testrunner.go +++ b/internal/testrunner/testrunner.go @@ -270,6 +270,11 @@ func (nl *testRunner) SetTestRunnerAsDesired(ds *appsv1.DaemonSet, devConfig *am } imagePullSecrets := []v1.LocalObjectReference{} + // Add global secrets first + if len(devConfig.Spec.CommonConfig.ImageRegistrySecrets) > 0 { + imagePullSecrets = append(imagePullSecrets, devConfig.Spec.CommonConfig.ImageRegistrySecrets...) + } + // Add component-specific secret if trSpec.ImageRegistrySecret != nil { imagePullSecrets = append(imagePullSecrets, *trSpec.ImageRegistrySecret) } diff --git a/internal/validator/specValidators.go b/internal/validator/specValidators.go index bf62dbb5e..3cf66fac9 100644 --- a/internal/validator/specValidators.go +++ b/internal/validator/specValidators.go @@ -235,3 +235,19 @@ func ValidateRemediationWorkflowSpec(ctx context.Context, client client.Client, return nil } + +// CommonConfigSpec validation +func ValidateCommonConfigSpec(ctx context.Context, client client.Client, devConfig *amdv1alpha1.DeviceConfig) error { + commonConfig := devConfig.Spec.CommonConfig + + // Validate global ImageRegistrySecrets + if len(commonConfig.ImageRegistrySecrets) > 0 { + for i, secretRef := range commonConfig.ImageRegistrySecrets { + if err := validateSecret(ctx, client, &secretRef, devConfig.Namespace); err != nil { + return fmt.Errorf("ImageRegistrySecrets[%d]: %v", i, err) + } + } + } + + return nil +} diff --git a/internal/validator/validator.go b/internal/validator/validator.go index 5ed0be587..5090f3a9d 100644 --- a/internal/validator/validator.go +++ b/internal/validator/validator.go @@ -42,6 +42,7 @@ func NewValidator() ValidatorAPI { "devicePlugin": ValidateDevicePluginSpec, "draDriver": ValidateDRADriverSpec, "remediationWorkflow": ValidateRemediationWorkflowSpec, + "commonConfig": ValidateCommonConfigSpec, } vInst := &validator{ specValidationFuncs: specValidationFuncs, diff --git a/tests/helm-e2e/helm_e2e_test.go b/tests/helm-e2e/helm_e2e_test.go index 86a0ece5d..8aed527a4 100644 --- a/tests/helm-e2e/helm_e2e_test.go +++ b/tests/helm-e2e/helm_e2e_test.go @@ -23,6 +23,7 @@ import ( "os" "os/exec" "reflect" + "slices" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" "github.com/stretchr/testify/assert" @@ -1143,3 +1144,317 @@ func (s *E2ESuite) TestHelmInstallWithKMMDisabled(c *C) { logger.Info("KMM disabled test passed - cleaning up") s.uninstallHelmChart(c, false, nil) } + +func (s *E2ESuite) TestGlobalImagePullSecrets(c *C) { + testName := "TestGlobalImagePullSecrets" + secretName := "test-global-registry-secret" + + // Create a dummy image pull secret + secret := &corev1.Secret{ + ObjectMeta: v1.ObjectMeta{ + Name: secretName, + Namespace: s.ns, + }, + Type: corev1.SecretTypeDockerConfigJson, + Data: map[string][]byte{ + corev1.DockerConfigJsonKey: []byte(`{"auths":{"https://index.dummyRegistry.io/v1/":{"auth":"dummyToken"}}}`), + }, + } + + _, err := s.clientSet.CoreV1().Secrets(s.ns).Create(context.TODO(), secret, v1.CreateOptions{}) + assert.NoError(c, err, fmt.Sprintf("%s: failed to create test secret", testName)) + + // Defer secret deletion + defer func() { + err := s.clientSet.CoreV1().Secrets(s.ns).Delete(context.TODO(), secretName, v1.DeleteOptions{}) + assert.NoError(c, err, fmt.Sprintf("%s: failed to delete test secret", testName)) + }() + + // Install helm chart with global secret for both main chart and NFD subchart + s.installHelmChart(c, false, []string{ + "--set", fmt.Sprintf("global.imagePullSecrets[0].name=%s", secretName), + }) + + // Defer helm chart uninstall + defer func() { + s.uninstallHelmChart(c, false, nil) + }() + + // Verify DeviceConfig has the global secret in commonConfig + devCfgList, err := s.dClient.DeviceConfigs(s.ns).List(v1.ListOptions{}) + assert.NoError(c, err, fmt.Sprintf("%s: failed to list DeviceConfigs", testName)) + assert.True(c, len(devCfgList.Items) == 1, fmt.Sprintf("%s: expected 1 DeviceConfig, got %d", testName, len(devCfgList.Items))) + + devConfig := devCfgList.Items[0] + assert.True(c, len(devConfig.Spec.CommonConfig.ImageRegistrySecrets) == 1, + fmt.Sprintf("%s: expected 1 global secret in commonConfig, got %d", testName, len(devConfig.Spec.CommonConfig.ImageRegistrySecrets))) + assert.Equal(c, secretName, devConfig.Spec.CommonConfig.ImageRegistrySecrets[0].Name, + fmt.Sprintf("%s: expected secret name %s, got %s", testName, secretName, devConfig.Spec.CommonConfig.ImageRegistrySecrets[0].Name)) + + // Verify GPU operator controller manager deployment has the secret + controllerDeploymentList, err := s.clientSet.AppsV1().Deployments(s.ns).List(context.TODO(), v1.ListOptions{ + LabelSelector: "control-plane=controller-manager", + }) + assert.NoError(c, err, fmt.Sprintf("%s: failed to list controller manager deployment", testName)) + assert.True(c, len(controllerDeploymentList.Items) == 1, fmt.Sprintf("%s: expected 1 controller manager deployment, got %d", testName, len(controllerDeploymentList.Items))) + controllerDeployment := controllerDeploymentList.Items[0] + assert.True(c, len(controllerDeployment.Spec.Template.Spec.ImagePullSecrets) >= 1, + fmt.Sprintf("%s: controller manager should have at least 1 imagePullSecret", testName)) + assert.Equal(c, secretName, controllerDeployment.Spec.Template.Spec.ImagePullSecrets[0].Name, + fmt.Sprintf("%s: controller manager imagePullSecret mismatch", testName)) + + // Verify remediation workflow controller deployment has the secret + remediationDeploymentList, err := s.clientSet.AppsV1().Deployments(s.ns).List(context.TODO(), v1.ListOptions{ + LabelSelector: "app=amd-gpu-operator-workflow-controller", + }) + if err == nil && len(remediationDeploymentList.Items) > 0 { + // Remediation might be disabled in some test environments + remediationDeployment := remediationDeploymentList.Items[0] + assert.True(c, len(remediationDeployment.Spec.Template.Spec.ImagePullSecrets) >= 1, + fmt.Sprintf("%s: remediation controller should have at least 1 imagePullSecret", testName)) + assert.Equal(c, secretName, remediationDeployment.Spec.Template.Spec.ImagePullSecrets[0].Name, + fmt.Sprintf("%s: remediation controller imagePullSecret mismatch", testName)) + } + + // Verify KMM controller deployment has the secret + kmmControllerDeploymentList, err := s.clientSet.AppsV1().Deployments(s.ns).List(context.TODO(), v1.ListOptions{ + LabelSelector: "app.kubernetes.io/component=kmm,control-plane=controller", + }) + assert.NoError(c, err, fmt.Sprintf("%s: failed to list KMM controller deployment", testName)) + assert.True(c, len(kmmControllerDeploymentList.Items) == 1, fmt.Sprintf("%s: expected 1 KMM controller deployment, got %d", testName, len(kmmControllerDeploymentList.Items))) + kmmControllerDeployment := kmmControllerDeploymentList.Items[0] + assert.True(c, len(kmmControllerDeployment.Spec.Template.Spec.ImagePullSecrets) >= 1, + fmt.Sprintf("%s: KMM controller should have at least 1 imagePullSecret", testName)) + assert.Equal(c, secretName, kmmControllerDeployment.Spec.Template.Spec.ImagePullSecrets[0].Name, + fmt.Sprintf("%s: KMM controller imagePullSecret mismatch", testName)) + + // Verify KMM webhook deployment has the secret + kmmWebhookDeploymentList, err := s.clientSet.AppsV1().Deployments(s.ns).List(context.TODO(), v1.ListOptions{ + LabelSelector: "app.kubernetes.io/component=kmm,control-plane=webhook-server", + }) + assert.NoError(c, err, fmt.Sprintf("%s: failed to list KMM webhook deployment", testName)) + assert.True(c, len(kmmWebhookDeploymentList.Items) == 1, fmt.Sprintf("%s: expected 1 KMM webhook deployment, got %d", testName, len(kmmWebhookDeploymentList.Items))) + kmmWebhookDeployment := kmmWebhookDeploymentList.Items[0] + assert.True(c, len(kmmWebhookDeployment.Spec.Template.Spec.ImagePullSecrets) >= 1, + fmt.Sprintf("%s: KMM webhook should have at least 1 imagePullSecret", testName)) + assert.Equal(c, secretName, kmmWebhookDeployment.Spec.Template.Spec.ImagePullSecrets[0].Name, + fmt.Sprintf("%s: KMM webhook imagePullSecret mismatch", testName)) + + // Verify KMM deployment has RELATED_IMAGE_*_PULL_SECRET env vars set + kmmContainer := kmmControllerDeployment.Spec.Template.Spec.Containers[0] + envVars := map[string]bool{ + "RELATED_IMAGE_BUILD_PULL_SECRET": false, + "RELATED_IMAGE_SIGN_PULL_SECRET": false, + "RELATED_IMAGE_WORKER_PULL_SECRET": false, + } + for _, env := range kmmContainer.Env { + if _, exists := envVars[env.Name]; exists { + assert.Equal(c, secretName, env.Value, + fmt.Sprintf("%s: KMM %s env var should be %s, got %s", testName, env.Name, secretName, env.Value)) + envVars[env.Name] = true + } + } + for envName, found := range envVars { + assert.True(c, found, fmt.Sprintf("%s: KMM deployment missing %s env var", testName, envName)) + } + + // Verify NFD pods have the secret + nfdWorkerDaemonSetList, err := s.clientSet.AppsV1().DaemonSets(s.ns).List(context.TODO(), v1.ListOptions{ + LabelSelector: "app.kubernetes.io/name=node-feature-discovery,role=worker", + }) + assert.NoError(c, err, fmt.Sprintf("%s: failed to list NFD worker daemonset", testName)) + assert.True(c, len(nfdWorkerDaemonSetList.Items) == 1, fmt.Sprintf("%s: expected 1 NFD worker daemonset, got %d", testName, len(nfdWorkerDaemonSetList.Items))) + nfdWorkerDaemonSet := nfdWorkerDaemonSetList.Items[0] + assert.True(c, len(nfdWorkerDaemonSet.Spec.Template.Spec.ImagePullSecrets) >= 1, + fmt.Sprintf("%s: NFD worker should have at least 1 imagePullSecret", testName)) + assert.Equal(c, secretName, nfdWorkerDaemonSet.Spec.Template.Spec.ImagePullSecrets[0].Name, + fmt.Sprintf("%s: NFD worker imagePullSecret mismatch", testName)) + + nfdMasterDeploymentList, err := s.clientSet.AppsV1().Deployments(s.ns).List(context.TODO(), v1.ListOptions{ + LabelSelector: "app.kubernetes.io/name=node-feature-discovery,app.kubernetes.io/component=nfd-master", + }) + if err == nil && len(nfdMasterDeploymentList.Items) > 0 { + // NFD master might be a deployment or might not exist depending on NFD version + nfdMasterDeployment := nfdMasterDeploymentList.Items[0] + assert.True(c, len(nfdMasterDeployment.Spec.Template.Spec.ImagePullSecrets) >= 1, + fmt.Sprintf("%s: NFD master should have at least 1 imagePullSecret", testName)) + assert.Equal(c, secretName, nfdMasterDeployment.Spec.Template.Spec.ImagePullSecrets[0].Name, + fmt.Sprintf("%s: NFD master imagePullSecret mismatch", testName)) + } + + logger.Infof("%s: All verifications passed successfully", testName) +} + +func (s *E2ESuite) TestGlobalAndComponentSpecificImagePullSecrets(c *C) { + testName := "TestGlobalAndComponentSpecificImagePullSecrets" + globalSecretName := "test-global-registry-secret-v2" + componentSecretName := "test-component-registry-secret-v2" + + // Create a dummy global image pull secret + globalSecret := &corev1.Secret{ + ObjectMeta: v1.ObjectMeta{ + Name: globalSecretName, + Namespace: s.ns, + }, + Type: corev1.SecretTypeDockerConfigJson, + Data: map[string][]byte{ + corev1.DockerConfigJsonKey: []byte(`{"auths":{"https://index.globalRegistry.io/v1/":{"auth":"globalToken"}}}`), + }, + } + + _, err := s.clientSet.CoreV1().Secrets(s.ns).Create(context.TODO(), globalSecret, v1.CreateOptions{}) + assert.NoError(c, err, fmt.Sprintf("%s: failed to create global secret", testName)) + + // Create a dummy component-specific image pull secret + componentSecret := &corev1.Secret{ + ObjectMeta: v1.ObjectMeta{ + Name: componentSecretName, + Namespace: s.ns, + }, + Type: corev1.SecretTypeDockerConfigJson, + Data: map[string][]byte{ + corev1.DockerConfigJsonKey: []byte(`{"auths":{"https://index.componentRegistry.io/v1/":{"auth":"componentToken"}}}`), + }, + } + + _, err = s.clientSet.CoreV1().Secrets(s.ns).Create(context.TODO(), componentSecret, v1.CreateOptions{}) + assert.NoError(c, err, fmt.Sprintf("%s: failed to create component secret", testName)) + + // Defer secrets deletion + defer func() { + err := s.clientSet.CoreV1().Secrets(s.ns).Delete(context.TODO(), globalSecretName, v1.DeleteOptions{}) + assert.NoError(c, err, fmt.Sprintf("%s: failed to delete global secret", testName)) + err = s.clientSet.CoreV1().Secrets(s.ns).Delete(context.TODO(), componentSecretName, v1.DeleteOptions{}) + assert.NoError(c, err, fmt.Sprintf("%s: failed to delete component secret", testName)) + }() + + // Install helm chart with both global and component-specific secrets + s.installHelmChart(c, false, []string{ + "--set", fmt.Sprintf("global.imagePullSecrets[0].name=%s", globalSecretName), + "--set", fmt.Sprintf("controllerManager.manager.imagePullSecrets=%s", componentSecretName), + "--set", fmt.Sprintf("kmm.controller.manager.imagePullSecrets=%s", componentSecretName), + "--set", fmt.Sprintf("kmm.webhookServer.webhookServer.imagePullSecrets=%s", componentSecretName), + "--set", fmt.Sprintf("kmm.controller.manager.env.relatedImageBuildPullSecret=%s", componentSecretName), + "--set", fmt.Sprintf("kmm.controller.manager.env.relatedImageSignPullSecret=%s", componentSecretName), + "--set", fmt.Sprintf("kmm.controller.manager.env.relatedImageWorkerPullSecret=%s", componentSecretName), + "--set", fmt.Sprintf("node-feature-discovery.imagePullSecrets[0].name=%s", componentSecretName), + }) + + // Defer helm chart uninstall + defer func() { + s.uninstallHelmChart(c, false, nil) + }() + + // Verify GPU operator controller manager deployment has both secrets + controllerDeploymentList, err := s.clientSet.AppsV1().Deployments(s.ns).List(context.TODO(), v1.ListOptions{ + LabelSelector: "control-plane=controller-manager", + }) + assert.NoError(c, err, fmt.Sprintf("%s: failed to list controller manager deployment", testName)) + assert.True(c, len(controllerDeploymentList.Items) == 1, fmt.Sprintf("%s: expected 1 controller manager deployment, got %d", testName, len(controllerDeploymentList.Items))) + controllerDeployment := controllerDeploymentList.Items[0] + assert.True(c, len(controllerDeployment.Spec.Template.Spec.ImagePullSecrets) >= 2, + fmt.Sprintf("%s: controller manager should have at least 2 imagePullSecrets, got %d", testName, len(controllerDeployment.Spec.Template.Spec.ImagePullSecrets))) + secretNames := make([]string, 0, len(controllerDeployment.Spec.Template.Spec.ImagePullSecrets)) + for _, secret := range controllerDeployment.Spec.Template.Spec.ImagePullSecrets { + secretNames = append(secretNames, secret.Name) + } + assert.True(c, contains(secretNames, globalSecretName), + fmt.Sprintf("%s: controller manager should have global secret %s", testName, globalSecretName)) + assert.True(c, contains(secretNames, componentSecretName), + fmt.Sprintf("%s: controller manager should have component secret %s", testName, componentSecretName)) + + // Verify KMM controller deployment has both secrets + kmmControllerDeploymentList, err := s.clientSet.AppsV1().Deployments(s.ns).List(context.TODO(), v1.ListOptions{ + LabelSelector: "app.kubernetes.io/component=kmm,control-plane=controller", + }) + assert.NoError(c, err, fmt.Sprintf("%s: failed to list KMM controller deployment", testName)) + assert.True(c, len(kmmControllerDeploymentList.Items) == 1, fmt.Sprintf("%s: expected 1 KMM controller deployment, got %d", testName, len(kmmControllerDeploymentList.Items))) + kmmControllerDeployment := kmmControllerDeploymentList.Items[0] + assert.True(c, len(kmmControllerDeployment.Spec.Template.Spec.ImagePullSecrets) >= 2, + fmt.Sprintf("%s: KMM controller should have at least 2 imagePullSecrets, got %d", testName, len(kmmControllerDeployment.Spec.Template.Spec.ImagePullSecrets))) + kmmSecretNames := make([]string, 0, len(kmmControllerDeployment.Spec.Template.Spec.ImagePullSecrets)) + for _, secret := range kmmControllerDeployment.Spec.Template.Spec.ImagePullSecrets { + kmmSecretNames = append(kmmSecretNames, secret.Name) + } + assert.True(c, contains(kmmSecretNames, globalSecretName), + fmt.Sprintf("%s: KMM controller should have global secret %s", testName, globalSecretName)) + assert.True(c, contains(kmmSecretNames, componentSecretName), + fmt.Sprintf("%s: KMM controller should have component secret %s", testName, componentSecretName)) + + // Verify KMM webhook deployment has both secrets + kmmWebhookDeploymentList, err := s.clientSet.AppsV1().Deployments(s.ns).List(context.TODO(), v1.ListOptions{ + LabelSelector: "app.kubernetes.io/component=kmm,control-plane=webhook-server", + }) + assert.NoError(c, err, fmt.Sprintf("%s: failed to list KMM webhook deployment", testName)) + assert.True(c, len(kmmWebhookDeploymentList.Items) == 1, fmt.Sprintf("%s: expected 1 KMM webhook deployment, got %d", testName, len(kmmWebhookDeploymentList.Items))) + kmmWebhookDeployment := kmmWebhookDeploymentList.Items[0] + assert.True(c, len(kmmWebhookDeployment.Spec.Template.Spec.ImagePullSecrets) >= 2, + fmt.Sprintf("%s: KMM webhook should have at least 2 imagePullSecrets, got %d", testName, len(kmmWebhookDeployment.Spec.Template.Spec.ImagePullSecrets))) + kmmWebhookSecretNames := make([]string, 0, len(kmmWebhookDeployment.Spec.Template.Spec.ImagePullSecrets)) + for _, secret := range kmmWebhookDeployment.Spec.Template.Spec.ImagePullSecrets { + kmmWebhookSecretNames = append(kmmWebhookSecretNames, secret.Name) + } + assert.True(c, contains(kmmWebhookSecretNames, globalSecretName), + fmt.Sprintf("%s: KMM webhook should have global secret %s", testName, globalSecretName)) + assert.True(c, contains(kmmWebhookSecretNames, componentSecretName), + fmt.Sprintf("%s: KMM webhook should have component secret %s", testName, componentSecretName)) + + // Verify NFD worker has component secret (if it exists) + nfdWorkerDaemonSetList, err := s.clientSet.AppsV1().DaemonSets(s.ns).List(context.TODO(), v1.ListOptions{ + LabelSelector: "app.kubernetes.io/name=node-feature-discovery,role=worker", + }) + if err == nil && len(nfdWorkerDaemonSetList.Items) > 0 { + // NFD might be disabled in some test environments + nfdWorkerDaemonSet := nfdWorkerDaemonSetList.Items[0] + assert.True(c, len(nfdWorkerDaemonSet.Spec.Template.Spec.ImagePullSecrets) >= 1, + fmt.Sprintf("%s: NFD worker should have at least 1 imagePullSecret, got %d", testName, len(nfdWorkerDaemonSet.Spec.Template.Spec.ImagePullSecrets))) + nfdWorkerSecretNames := make([]string, 0, len(nfdWorkerDaemonSet.Spec.Template.Spec.ImagePullSecrets)) + for _, secret := range nfdWorkerDaemonSet.Spec.Template.Spec.ImagePullSecrets { + nfdWorkerSecretNames = append(nfdWorkerSecretNames, secret.Name) + } + assert.True(c, contains(nfdWorkerSecretNames, componentSecretName), + fmt.Sprintf("%s: NFD worker should have component secret %s", testName, componentSecretName)) + } + + // Verify NFD master has component secret (if it exists) + nfdMasterDeploymentList, err := s.clientSet.AppsV1().Deployments(s.ns).List(context.TODO(), v1.ListOptions{ + LabelSelector: "app.kubernetes.io/name=node-feature-discovery,app.kubernetes.io/component=nfd-master", + }) + if err == nil && len(nfdMasterDeploymentList.Items) > 0 { + nfdMasterDeployment := nfdMasterDeploymentList.Items[0] + assert.True(c, len(nfdMasterDeployment.Spec.Template.Spec.ImagePullSecrets) >= 1, + fmt.Sprintf("%s: NFD master should have at least 1 imagePullSecret, got %d", testName, len(nfdMasterDeployment.Spec.Template.Spec.ImagePullSecrets))) + nfdMasterSecretNames := make([]string, 0, len(nfdMasterDeployment.Spec.Template.Spec.ImagePullSecrets)) + for _, secret := range nfdMasterDeployment.Spec.Template.Spec.ImagePullSecrets { + nfdMasterSecretNames = append(nfdMasterSecretNames, secret.Name) + } + assert.True(c, contains(nfdMasterSecretNames, componentSecretName), + fmt.Sprintf("%s: NFD master should have component secret %s", testName, componentSecretName)) + } + + // Verify KMM deployment has RELATED_IMAGE_*_PULL_SECRET env vars set to component secret (not global) + kmmContainer := kmmControllerDeployment.Spec.Template.Spec.Containers[0] + envVars := map[string]string{ + "RELATED_IMAGE_BUILD_PULL_SECRET": "", + "RELATED_IMAGE_SIGN_PULL_SECRET": "", + "RELATED_IMAGE_WORKER_PULL_SECRET": "", + } + for _, env := range kmmContainer.Env { + if _, exists := envVars[env.Name]; exists { + envVars[env.Name] = env.Value + } + } + for envName, envValue := range envVars { + assert.True(c, envValue != "", fmt.Sprintf("%s: KMM deployment missing %s env var", testName, envName)) + assert.Equal(c, componentSecretName, envValue, + fmt.Sprintf("%s: KMM %s env var should be component secret %s (not global %s), got %s", + testName, envName, componentSecretName, globalSecretName, envValue)) + } + + logger.Infof("%s: All verifications passed successfully", testName) +} + +// Helper function to check if a slice contains a string +func contains(slice []string, item string) bool { + return slices.Contains(slice, item) +}