Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions operator/internal/controller/pod_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,19 @@ func (r *SkyhookReconciler) UpdateNodeState(ctx context.Context, pod *corev1.Pod
return false, fmt.Errorf("error creating node wrapper: %w", err)
}

// If the node has no nodeState annotation, the annotation was intentionally deleted
// (e.g., manual node reset). The skyhook controller always creates the annotation
// before creating pods, so a missing annotation means external intervention.
// Don't recreate it - let the skyhook controller handle the reset via
// ValidateRunningPackages which will clean up stale pods.
initialState, err := skyhookNode.State()
if err != nil {
return false, fmt.Errorf("error reading node state: %w", err)
}
if initialState == nil {
return false, nil
}

updated := false
if state == v1alpha1.StateComplete {
updated, err = r.HandleCompletePod(ctx, skyhookNode, packagePtr, containerName)
Expand Down
14 changes: 0 additions & 14 deletions operator/internal/controller/skyhook_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -2199,20 +2199,6 @@ func (r *SkyhookReconciler) ApplyPackage(ctx context.Context, logger logr.Logger
}
}

// if stage != v1alpha1.StageApply {
// // If a node gets rest by a user, the about method will return the wrong node state. Above sources it from the skyhook status.
// // check if the node has nothing, reset it then apply the package.
// nodeState, err := skyhookNode.State()
// if err != nil {
// return fmt.Errorf("error getting node state: %w", err)
// }

// _, found := nodeState[_package.GetUniqueName()]
// if !found {
// stage = v1alpha1.StageApply
// }
// }

nextStage := skyhookNode.NextStage(_package)
if nextStage != nil {
stage = *nextStage
Expand Down
64 changes: 64 additions & 0 deletions operator/internal/controller/skyhook_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1578,6 +1578,70 @@ var _ = Describe("Resource Comparison", func() {
})
})

var _ = Describe("pod controller tests", func() {
It("should not recreate annotation when nodeState is missing (node reset)", func() {
nodeName := "test-node-reset"

// Create a node WITHOUT any skyhook nodeState annotation
node := &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: nodeName,
},
}
Expect(k8sClient.Create(ctx, node)).To(Succeed())
defer func() {
_ = k8sClient.Delete(ctx, node)
}()

// Build the package annotation for the pod
pkg := &PackageSkyhook{
PackageRef: v1alpha1.PackageRef{
Name: "bb",
Version: "1.2",
},
Skyhook: "test-skyhook-reset",
Stage: v1alpha1.StageConfig,
Image: "ghcr.io/nvidia/skyhook/agentless",
}
pkgData, err := json.Marshal(pkg)
Expect(err).ToNot(HaveOccurred())

// Create a pod that looks like a completed skyhook pod on the node
pod := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "test-reset-pod",
Namespace: opts.Namespace,
Labels: map[string]string{
fmt.Sprintf("%s/name", v1alpha1.METADATA_PREFIX): "test-skyhook-reset",
},
Annotations: map[string]string{
fmt.Sprintf("%s/package", v1alpha1.METADATA_PREFIX): string(pkgData),
},
},
Spec: corev1.PodSpec{
NodeName: nodeName,
Containers: []corev1.Container{{Name: "test", Image: "busybox"}},
},
}
Expect(k8sClient.Create(ctx, pod)).To(Succeed())
defer func() {
_ = k8sClient.Delete(ctx, pod)
}()

// Call UpdateNodeState - should skip because nodeState annotation is missing
requeue, err := operator.UpdateNodeState(ctx, pod, v1alpha1.StateErroring, "bb-config", 0)
Expect(err).ToNot(HaveOccurred())
Expect(requeue).To(BeFalse())

// Verify the node still has no nodeState annotation
var updatedNode corev1.Node
Expect(k8sClient.Get(ctx, types.NamespacedName{Name: nodeName}, &updatedNode)).To(Succeed())
annotationKey := fmt.Sprintf("%s/nodeState_test-skyhook-reset", v1alpha1.METADATA_PREFIX)
_, hasAnnotation := updatedNode.Annotations[annotationKey]
Expect(hasAnnotation).To(BeFalse(), "nodeState annotation should not be recreated after reset")
})
})

func TestGenerateValidPodNames(t *testing.T) {
g := NewWithT(t)

Expand Down
Loading