From 0000df3a1788723f6a933a8edd3da902032b51b4 Mon Sep 17 00:00:00 2001 From: Paul Coignet Date: Fri, 17 Apr 2026 14:59:14 +0200 Subject: [PATCH 1/3] Read name from UPDATER_TASK --- pkg/fleet/daemon.go | 31 ++++++++++---------- pkg/fleet/daemon_test.go | 52 ++++++++++++++++----------------- pkg/fleet/experiment.go | 22 +++++++++----- pkg/fleet/remote_config.go | 4 +-- pkg/fleet/remote_config_test.go | 8 ++--- 5 files changed, 59 insertions(+), 58 deletions(-) diff --git a/pkg/fleet/daemon.go b/pkg/fleet/daemon.go index d3a1e20c51..9d1a7ce1ce 100644 --- a/pkg/fleet/daemon.go +++ b/pkg/fleet/daemon.go @@ -47,7 +47,6 @@ type Daemon struct { revisionsEnabled bool mu sync.RWMutex configs map[string]installerConfig // keyed by config ID; replaced on each RC update - experimentTarget types.NamespacedName // DDA targeted by the current experiment; set on startExperiment } // NewDaemon creates a new Fleet Daemon. When revisionsEnabled is false, experiment @@ -172,13 +171,12 @@ func (d *Daemon) handleRemoteAPIRequest(ctx context.Context, req remoteAPIReques // resolveOperation looks up the installer config for the request, validates its single // DatadogAgent operation, and fills in the canonical GVK. Returns the operation ready for use. +// The target namespaced name is carried by the UPDATER_TASK itself (not the INSTALLER_CONFIG). func (d *Daemon) resolveOperation(req remoteAPIRequest, signal string) (fleetManagementOperation, error) { - // get params version from req id := req.Params.Version if id == "" { return fleetManagementOperation{}, fmt.Errorf("%s: version is required", signal) } - // match to d.configs[params.version] to get config cfg, err := d.getConfig(id) if err != nil { return fleetManagementOperation{}, fmt.Errorf("%s: %w", signal, err) @@ -204,16 +202,17 @@ func (d *Daemon) resolveOperation(req remoteAPIRequest, signal string) (fleetMan func (d *Daemon) startDatadogAgentExperiment(ctx context.Context, req remoteAPIRequest) error { logger := ctrl.LoggerFrom(ctx).WithValues("id", req.ID) logger.V(1).Info("Starting DatadogAgent experiment", "config", req.Params.Version) + nsn, err := parseTaskNSN(req, "start DatadogAgent experiment") + if err != nil { + return err + } op, err := d.resolveOperation(req, "start DatadogAgent experiment") if err != nil { logger.Error(err, "Failed to resolve operation") return err } - // Store the target DDA for promote/stop signals (which don't carry a config). - d.experimentTarget = op.NamespacedName - - logger = logger.WithValues("namespace", op.NamespacedName.Namespace, "name", op.NamespacedName.Name) + logger = logger.WithValues("namespace", nsn.Namespace, "name", nsn.Name) ctx = ctrl.LoggerInto(ctx, logger) // Check the operation @@ -223,8 +222,8 @@ func (d *Daemon) startDatadogAgentExperiment(ctx context.Context, req remoteAPIR // Fetch current DDA to check signal preconditions. dda := &v2alpha1.DatadogAgent{} - if err := d.client.Get(ctx, op.NamespacedName, dda); err != nil { - return fmt.Errorf("start DatadogAgent experiment: failed to get DatadogAgent %s: %w", op.NamespacedName, err) + if err := d.client.Get(ctx, nsn, dda); err != nil { + return fmt.Errorf("start DatadogAgent experiment: failed to get DatadogAgent %s: %w", nsn, err) } if err := canStart(getExperimentPhase(dda)); err != nil { @@ -253,7 +252,7 @@ func (d *Daemon) startDatadogAgentExperiment(ctx context.Context, req remoteAPIR // Update status: phase=running, record experiment ID. // Re-fetch inside the retry to get the latest ResourceVersion on conflict. if err := retryWithBackoff(ctx, func() error { - if err := d.client.Get(ctx, op.NamespacedName, dda); err != nil { + if err := d.client.Get(ctx, nsn, dda); err != nil { return err } dda.Status.Experiment = &v2alpha1.ExperimentStatus{ @@ -274,9 +273,9 @@ func (d *Daemon) startDatadogAgentExperiment(ctx context.Context, req remoteAPIR } func (d *Daemon) stopDatadogAgentExperiment(ctx context.Context, req remoteAPIRequest) error { - nsn := d.experimentTarget - if nsn.Name == "" { - return fmt.Errorf("stop DatadogAgent experiment: no experiment target set") + nsn, err := parseTaskNSN(req, "stop DatadogAgent experiment") + if err != nil { + return err } ctx = ctrl.LoggerInto(ctx, ctrl.LoggerFrom(ctx).WithValues("id", req.ID, "namespace", nsn.Namespace, "name", nsn.Name)) @@ -317,9 +316,9 @@ func (d *Daemon) stopDatadogAgentExperiment(ctx context.Context, req remoteAPIRe } func (d *Daemon) promoteDatadogAgentExperiment(ctx context.Context, req remoteAPIRequest) error { - nsn := d.experimentTarget - if nsn.Name == "" { - return fmt.Errorf("promote DatadogAgent experiment: no experiment target set") + nsn, err := parseTaskNSN(req, "promote DatadogAgent experiment") + if err != nil { + return err } ctx = ctrl.LoggerInto(ctx, ctrl.LoggerFrom(ctx).WithValues("id", req.ID, "namespace", nsn.Namespace, "name", nsn.Name)) diff --git a/pkg/fleet/daemon_test.go b/pkg/fleet/daemon_test.go index 6708253f5e..9298ef6201 100644 --- a/pkg/fleet/daemon_test.go +++ b/pkg/fleet/daemon_test.go @@ -44,7 +44,6 @@ func testDaemon(dda *v2alpha1.DatadogAgent, configs map[string]installerConfig) client: c, revisionsEnabled: true, configs: configs, - experimentTarget: testDDANSN, }, c } @@ -77,7 +76,6 @@ func testInstallerConfigWithDDA() map[string]installerConfig { { Operation: OperationUpdate, GroupVersionKind: testDDAGVK, - NamespacedName: testDDANSN, Config: json.RawMessage(`{}`), }, }, @@ -90,7 +88,7 @@ func testStartRequest() remoteAPIRequest { ID: "exp-abc", Package: "datadog-operator", Method: methodStartDatadogAgentExperiment, - Params: experimentParams{Version: "test-config"}, + Params: experimentParams{Version: "test-config", NamespacedName: testDDANSN}, } } @@ -111,7 +109,6 @@ func TestStartDatadogAgentExperiment_NoDDAOperation(t *testing.T) { { Operation: OperationUpdate, GroupVersionKind: schema.GroupVersionKind{Group: "other.io", Version: "v1", Kind: "Other"}, - NamespacedName: testDDANSN, Config: json.RawMessage(`{}`), }, }, @@ -221,7 +218,6 @@ func TestStartDatadogAgentExperiment_VersionMatchesInstallerConfig(t *testing.T) { Operation: OperationUpdate, GroupVersionKind: testDDAGVK, - NamespacedName: testDDANSN, Config: json.RawMessage(`{"spec":{"features":{"apm":{"enabled":true}}}}`), }, }, @@ -231,7 +227,7 @@ func TestStartDatadogAgentExperiment_VersionMatchesInstallerConfig(t *testing.T) req := remoteAPIRequest{ ID: "a1b2c3d4-e5f6-7890-abcd-ef1234567890", Method: methodStartDatadogAgentExperiment, - Params: experimentParams{Version: "aaaa-bbbb-cccc"}, + Params: experimentParams{Version: "aaaa-bbbb-cccc", NamespacedName: testDDANSN}, ExpectedState: expectedState{ Stable: "0.0.1", StableConfig: "0.0.1", @@ -271,7 +267,7 @@ func testStopRequest() remoteAPIRequest { ID: "exp-abc", Package: "datadog-operator", Method: methodStopDatadogAgentExperiment, - Params: experimentParams{Version: "test-config"}, + Params: experimentParams{Version: "test-config", NamespacedName: testDDANSN}, } } @@ -337,7 +333,7 @@ func testPromoteRequest() remoteAPIRequest { ID: "exp-abc", Package: "datadog-operator", Method: methodPromoteDatadogAgentExperiment, - Params: experimentParams{Version: "test-config"}, + Params: experimentParams{Version: "test-config", NamespacedName: testDDANSN}, } } @@ -497,33 +493,15 @@ func TestValidateOperation_Valid(t *testing.T) { op := fleetManagementOperation{ Operation: OperationUpdate, GroupVersionKind: testDDAGVK, - NamespacedName: testDDANSN, Config: json.RawMessage(`{}`), } assert.NoError(t, validateOperation(op)) } -func TestValidateOperation_EmptyName(t *testing.T) { - op := fleetManagementOperation{ - GroupVersionKind: testDDAGVK, - NamespacedName: types.NamespacedName{Namespace: "datadog", Name: ""}, - } - assert.Error(t, validateOperation(op)) -} - -func TestValidateOperation_EmptyNamespace(t *testing.T) { - op := fleetManagementOperation{ - GroupVersionKind: testDDAGVK, - NamespacedName: types.NamespacedName{Namespace: "", Name: "datadog-agent"}, - } - assert.Error(t, validateOperation(op)) -} - func TestValidateOperation_EmptyGroup_Allowed(t *testing.T) { // Group is auto-detected from the cluster; empty group is valid input. op := fleetManagementOperation{ GroupVersionKind: schema.GroupVersionKind{Group: "", Version: "", Kind: "DatadogAgent"}, - NamespacedName: testDDANSN, } assert.NoError(t, validateOperation(op)) } @@ -531,11 +509,31 @@ func TestValidateOperation_EmptyGroup_Allowed(t *testing.T) { func TestValidateOperation_WrongKind(t *testing.T) { op := fleetManagementOperation{ GroupVersionKind: schema.GroupVersionKind{Group: "datadoghq.com", Version: "v2alpha1", Kind: "DatadogMonitor"}, - NamespacedName: testDDANSN, } assert.Error(t, validateOperation(op)) } +// --- parseTaskNSN tests --- + +func TestParseTaskNSN_Valid(t *testing.T) { + req := remoteAPIRequest{Params: experimentParams{NamespacedName: testDDANSN}} + nsn, err := parseTaskNSN(req, "test") + require.NoError(t, err) + assert.Equal(t, testDDANSN, nsn) +} + +func TestParseTaskNSN_MissingName(t *testing.T) { + req := remoteAPIRequest{Params: experimentParams{NamespacedName: types.NamespacedName{Namespace: "datadog"}}} + _, err := parseTaskNSN(req, "test") + assert.Error(t, err) +} + +func TestParseTaskNSN_MissingNamespace(t *testing.T) { + req := remoteAPIRequest{Params: experimentParams{NamespacedName: types.NamespacedName{Name: "datadog-agent"}}} + _, err := parseTaskNSN(req, "test") + assert.Error(t, err) +} + // --- canStart tests --- func TestCanStart_NilPhase(t *testing.T) { diff --git a/pkg/fleet/experiment.go b/pkg/fleet/experiment.go index ec4bf24369..4befcca964 100644 --- a/pkg/fleet/experiment.go +++ b/pkg/fleet/experiment.go @@ -12,6 +12,7 @@ import ( "time" apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/util/retry" ctrl "sigs.k8s.io/controller-runtime" @@ -19,21 +20,26 @@ import ( v2alpha1 "github.com/DataDog/datadog-operator/api/datadoghq/v2alpha1" ) -// validateOperation checks that a fleetManagementOperation has the fields -// required to locate and act on a DatadogAgent resource. +// validateOperation checks that a fleetManagementOperation targets a DatadogAgent resource. func validateOperation(op fleetManagementOperation) error { - if op.NamespacedName.Name == "" { - return fmt.Errorf("operation namespaced name must have a non-empty name") - } - if op.NamespacedName.Namespace == "" { - return fmt.Errorf("operation namespaced name must have a non-empty namespace") - } if op.GroupVersionKind.Kind != "DatadogAgent" { return fmt.Errorf("operation kind must be DatadogAgent, got %q", op.GroupVersionKind.Kind) } return nil } +// parseTaskNSN extracts and validates the target namespaced name from an UPDATER_TASK request. +func parseTaskNSN(req remoteAPIRequest, signal string) (types.NamespacedName, error) { + nsn := req.Params.NamespacedName + if nsn.Name == "" { + return types.NamespacedName{}, fmt.Errorf("%s: params.namespaced_name.name is required", signal) + } + if nsn.Namespace == "" { + return types.NamespacedName{}, fmt.Errorf("%s: params.namespaced_name.namespace is required", signal) + } + return nsn, nil +} + // canStart returns whether a start signal is allowed given the current experiment phase. // // Allowed from: nil/empty, rollback, timeout, promoted, aborted (start new experiment). diff --git a/pkg/fleet/remote_config.go b/pkg/fleet/remote_config.go index 47555ae076..3c7e5bee25 100644 --- a/pkg/fleet/remote_config.go +++ b/pkg/fleet/remote_config.go @@ -36,7 +36,6 @@ const ( type fleetManagementOperation struct { Operation Operation `json:"operation"` GroupVersionKind schema.GroupVersionKind `json:"group_version_kind"` - NamespacedName types.NamespacedName `json:"namespaced_name"` Config json.RawMessage `json:"config"` } @@ -63,7 +62,8 @@ type expectedState struct { // experimentParams holds the parsed params for experiment methods. type experimentParams struct { - Version string `json:"version"` + Version string `json:"version"` + NamespacedName types.NamespacedName `json:"namespaced_name"` } // handleInstallerConfigUpdate returns an RC subscription callback that parses diff --git a/pkg/fleet/remote_config_test.go b/pkg/fleet/remote_config_test.go index 883e39edb6..6b840e17e5 100644 --- a/pkg/fleet/remote_config_test.go +++ b/pkg/fleet/remote_config_test.go @@ -29,10 +29,6 @@ var testInstallerConfig = installerConfig{ Version: "v2alpha1", Kind: "DatadogAgent", }, - NamespacedName: types.NamespacedName{ - Namespace: "datadog", - Name: "datadog-agent", - }, Config: json.RawMessage(`{"spec":{"features":{"apm":{"enabled":true}}}}`), }, }, @@ -41,7 +37,9 @@ var testInstallerConfig = installerConfig{ var testRemoteAPIRequest = remoteAPIRequest{ ID: "test", Method: "some_method", - Params: experimentParams{}, + Params: experimentParams{ + NamespacedName: types.NamespacedName{Namespace: "datadog", Name: "datadog-agent"}, + }, } // callbackMock records calls made by the RC handler callbacks. From f30fbac802c81394197358b9f132d9f9988d23d3 Mon Sep 17 00:00:00 2001 From: Paul Coignet Date: Tue, 28 Apr 2026 14:49:18 +0200 Subject: [PATCH 2/3] Handle API request --- pkg/fleet/daemon.go | 43 ++++++++++++++++++++++++++++------------ pkg/fleet/daemon_test.go | 43 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 13 deletions(-) diff --git a/pkg/fleet/daemon.go b/pkg/fleet/daemon.go index 9d1a7ce1ce..ed1bd15f57 100644 --- a/pkg/fleet/daemon.go +++ b/pkg/fleet/daemon.go @@ -47,6 +47,7 @@ type Daemon struct { revisionsEnabled bool mu sync.RWMutex configs map[string]installerConfig // keyed by config ID; replaced on each RC update + taskMu sync.Mutex // serializes UPDATER_TASK execution } // NewDaemon creates a new Fleet Daemon. When revisionsEnabled is false, experiment @@ -72,19 +73,7 @@ func (d *Daemon) Start(ctx context.Context) error { return d.handleConfigs(ctx, configs) })) d.rcClient.Subscribe(state.ProductUpdaterTask, handleUpdaterTaskUpdate(ctx, func(req remoteAPIRequest) error { - d.setTaskState(req.Package, req.ID, pbgo.TaskState_RUNNING, nil) - err := d.handleRemoteAPIRequest(ctx, req) - if err != nil { - var stateErr *stateDoesntMatchError - if errors.As(err, &stateErr) { - d.setTaskState(req.Package, req.ID, pbgo.TaskState_INVALID_STATE, err) - } else { - d.setTaskState(req.Package, req.ID, pbgo.TaskState_ERROR, err) - } - } else { - d.setTaskState(req.Package, req.ID, pbgo.TaskState_DONE, nil) - } - return err + return d.handleTask(ctx, req) })) <-ctx.Done() @@ -98,6 +87,26 @@ func (d *Daemon) NeedLeaderElection() bool { return true } +// handleTask serializes UPDATER_TASK execution so at most one task runs at a time, +// matching the datadog-agent's single-writer model and preventing races in setTaskState. +func (d *Daemon) handleTask(ctx context.Context, req remoteAPIRequest) error { + d.taskMu.Lock() + defer d.taskMu.Unlock() + d.setTaskState(req.Package, req.ID, pbgo.TaskState_RUNNING, nil) + err := d.handleRemoteAPIRequest(ctx, req) + if err != nil { + var stateErr *stateDoesntMatchError + if errors.As(err, &stateErr) { + d.setTaskState(req.Package, req.ID, pbgo.TaskState_INVALID_STATE, err) + } else { + d.setTaskState(req.Package, req.ID, pbgo.TaskState_ERROR, err) + } + } else { + d.setTaskState(req.Package, req.ID, pbgo.TaskState_DONE, nil) + } + return err +} + // handleConfigs replaces the stored installer configs with the latest RC update. // Configs are indexed by their ID so they can be retrieved by task handlers. func (d *Daemon) handleConfigs(ctx context.Context, configs map[string]installerConfig) error { @@ -290,6 +299,10 @@ func (d *Daemon) stopDatadogAgentExperiment(ctx context.Context, req remoteAPIRe if isNoOp, err := canStop(ctx, getExperimentPhase(dda)); err != nil { return fmt.Errorf("stop DatadogAgent experiment: %w", err) } else if isNoOp { + // The spec rollback already happened (e.g. timeout), but the experiment + // config version still needs to be cleared so the backend can issue new tasks. + stable, _ := d.getPackageConfigVersions(req.Package) + d.setPackageConfigVersions(req.Package, stable, "") return nil } @@ -339,6 +352,10 @@ func (d *Daemon) promoteDatadogAgentExperiment(ctx context.Context, req remoteAP if isNoOp, err := canPromote(ctx, getExperimentPhase(dda)); err != nil { return fmt.Errorf("promote DatadogAgent experiment: %w", err) } else if isNoOp { + // The experiment is already in a terminal state. Clear the experiment + // config version so the backend can issue new tasks. + stable, _ := d.getPackageConfigVersions(req.Package) + d.setPackageConfigVersions(req.Package, stable, "") return nil } diff --git a/pkg/fleet/daemon_test.go b/pkg/fleet/daemon_test.go index 9298ef6201..beda0c6bfc 100644 --- a/pkg/fleet/daemon_test.go +++ b/pkg/fleet/daemon_test.go @@ -756,3 +756,46 @@ func TestSetTaskState_NilClient(t *testing.T) { d.setTaskState("datadog-operator", "task-1", pbgo.TaskState_RUNNING, nil) }) } + +// --- handleTask state-transition tests --- + +func testDaemonFull(dda *v2alpha1.DatadogAgent, configs map[string]installerConfig, rcState []*pbgo.PackageState) (*Daemon, client.Client, *mockRCClient) { + d, c := testDaemon(dda, configs) + rc := &mockRCClient{state: rcState} + d.rcClient = rc + return d, c, rc +} + +func TestHandleTask_Done(t *testing.T) { + rc := []*pbgo.PackageState{{Package: "datadog-operator", StableConfigVersion: "0.0.1"}} + d, _, m := testDaemonFull(testDDAObject(""), testInstallerConfigWithDDA(), rc) + req := testStartRequest() + req.ExpectedState = expectedState{StableConfig: "0.0.1"} + require.NoError(t, d.handleTask(context.Background(), req)) + require.NotNil(t, m.state[0].Task) + assert.Equal(t, pbgo.TaskState_DONE, m.state[0].Task.State) +} + +func TestHandleTask_Error(t *testing.T) { + rc := []*pbgo.PackageState{{Package: "datadog-operator", StableConfigVersion: "0.0.1"}} + d, _, m := testDaemonFull(testDDAObject(""), testInstallerConfigWithDDA(), rc) + req := testStartRequest() + req.Method = "unknown/method" + req.ExpectedState = expectedState{StableConfig: "0.0.1"} + err := d.handleTask(context.Background(), req) + require.Error(t, err) + require.NotNil(t, m.state[0].Task) + assert.Equal(t, pbgo.TaskState_ERROR, m.state[0].Task.State) + require.NotNil(t, m.state[0].Task.Error) +} + +func TestHandleTask_InvalidState(t *testing.T) { + rc := []*pbgo.PackageState{{Package: "datadog-operator", StableConfigVersion: "0.0.1"}} + d, _, m := testDaemonFull(testDDAObject(""), testInstallerConfigWithDDA(), rc) + req := testStartRequest() + req.ExpectedState = expectedState{StableConfig: "wrong-version"} + err := d.handleTask(context.Background(), req) + require.Error(t, err) + require.NotNil(t, m.state[0].Task) + assert.Equal(t, pbgo.TaskState_INVALID_STATE, m.state[0].Task.State) +} From 7b9195986192bb069ee166bad63a47d114e91c20 Mon Sep 17 00:00:00 2001 From: Paul Coignet Date: Wed, 29 Apr 2026 15:37:26 +0200 Subject: [PATCH 3/3] [FA] fix: promote no-op path now correctly handles Promoted vs rolled-back phase When `promoteDatadogAgentExperiment` hits the no-op branch, it previously always cleared the experiment config version. If the phase was already `Promoted` (e.g. after a daemon crash between status update and `setPackageConfigVersions`), the experiment version should move to stable instead so the backend's expected_state baseline stays correct. --- pkg/fleet/daemon.go | 16 ++++++++++++---- pkg/fleet/daemon_test.go | 27 +++++++++++++++++++++------ 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/pkg/fleet/daemon.go b/pkg/fleet/daemon.go index ed1bd15f57..1f09fc8b52 100644 --- a/pkg/fleet/daemon.go +++ b/pkg/fleet/daemon.go @@ -352,10 +352,18 @@ func (d *Daemon) promoteDatadogAgentExperiment(ctx context.Context, req remoteAP if isNoOp, err := canPromote(ctx, getExperimentPhase(dda)); err != nil { return fmt.Errorf("promote DatadogAgent experiment: %w", err) } else if isNoOp { - // The experiment is already in a terminal state. Clear the experiment - // config version so the backend can issue new tasks. - stable, _ := d.getPackageConfigVersions(req.Package) - d.setPackageConfigVersions(req.Package, stable, "") + stable, exp := d.getPackageConfigVersions(req.Package) + if getExperimentPhase(dda) == v2alpha1.ExperimentPhasePromoted { + // The experiment was already promoted (e.g. daemon crashed between the + // status update and setPackageConfigVersions). Finish the promotion by + // moving the experiment version to stable so the backend gets the correct + // baseline for future expected_state checks. + d.setPackageConfigVersions(req.Package, exp, "") + } else { + // The experiment was rolled back (stopped/rollback/timeout). Just clear + // the experiment version so the backend can issue new tasks. + d.setPackageConfigVersions(req.Package, stable, "") + } return nil } diff --git a/pkg/fleet/daemon_test.go b/pkg/fleet/daemon_test.go index beda0c6bfc..68f90c33ae 100644 --- a/pkg/fleet/daemon_test.go +++ b/pkg/fleet/daemon_test.go @@ -361,39 +361,54 @@ func TestPromoteDatadogAgentExperiment_NoExperimentVersion(t *testing.T) { } func TestPromoteDatadogAgentExperiment_NoOp_Rollback(t *testing.T) { - d, c := testDaemon(testDDAObject(v2alpha1.ExperimentPhaseRollback), testInstallerConfigWithDDA()) - d.rcClient = &mockRCClient{state: []*pbgo.PackageState{ + rc := &mockRCClient{state: []*pbgo.PackageState{ {Package: "datadog-operator", StableConfigVersion: "stable-1", ExperimentConfigVersion: "exp-1"}, }} + d, c := testDaemon(testDDAObject(v2alpha1.ExperimentPhaseRollback), testInstallerConfigWithDDA()) + d.rcClient = rc require.NoError(t, d.promoteDatadogAgentExperiment(context.Background(), testPromoteRequest())) dda := &v2alpha1.DatadogAgent{} require.NoError(t, c.Get(context.Background(), testDDANSN, dda)) assert.Equal(t, v2alpha1.ExperimentPhaseRollback, dda.Status.Experiment.Phase) + // Rolled back: stable stays, experiment cleared. + assert.Equal(t, "stable-1", rc.state[0].StableConfigVersion) + assert.Equal(t, "", rc.state[0].ExperimentConfigVersion) } func TestPromoteDatadogAgentExperiment_NoOp_Timeout(t *testing.T) { - d, c := testDaemon(testDDAObject(v2alpha1.ExperimentPhaseTimeout), testInstallerConfigWithDDA()) - d.rcClient = &mockRCClient{state: []*pbgo.PackageState{ + rc := &mockRCClient{state: []*pbgo.PackageState{ {Package: "datadog-operator", StableConfigVersion: "stable-1", ExperimentConfigVersion: "exp-1"}, }} + d, c := testDaemon(testDDAObject(v2alpha1.ExperimentPhaseTimeout), testInstallerConfigWithDDA()) + d.rcClient = rc require.NoError(t, d.promoteDatadogAgentExperiment(context.Background(), testPromoteRequest())) dda := &v2alpha1.DatadogAgent{} require.NoError(t, c.Get(context.Background(), testDDANSN, dda)) assert.Equal(t, v2alpha1.ExperimentPhaseTimeout, dda.Status.Experiment.Phase) + // Timed out: stable stays, experiment cleared. + assert.Equal(t, "stable-1", rc.state[0].StableConfigVersion) + assert.Equal(t, "", rc.state[0].ExperimentConfigVersion) } func TestPromoteDatadogAgentExperiment_NoOp_Promoted(t *testing.T) { - d, c := testDaemon(testDDAObject(v2alpha1.ExperimentPhasePromoted), testInstallerConfigWithDDA()) - d.rcClient = &mockRCClient{state: []*pbgo.PackageState{ + // Simulates daemon crash between status update and setPackageConfigVersions: + // phase=Promoted but installer state still has the experiment version. + // The no-op path must complete the promotion by moving exp → stable. + rc := &mockRCClient{state: []*pbgo.PackageState{ {Package: "datadog-operator", StableConfigVersion: "stable-1", ExperimentConfigVersion: "exp-1"}, }} + d, c := testDaemon(testDDAObject(v2alpha1.ExperimentPhasePromoted), testInstallerConfigWithDDA()) + d.rcClient = rc require.NoError(t, d.promoteDatadogAgentExperiment(context.Background(), testPromoteRequest())) dda := &v2alpha1.DatadogAgent{} require.NoError(t, c.Get(context.Background(), testDDANSN, dda)) assert.Equal(t, v2alpha1.ExperimentPhasePromoted, dda.Status.Experiment.Phase) + // Promoted: experiment version moves to stable, experiment cleared. + assert.Equal(t, "exp-1", rc.state[0].StableConfigVersion) + assert.Equal(t, "", rc.state[0].ExperimentConfigVersion) } func TestPromoteDatadogAgentExperiment_Success_Running(t *testing.T) {