Skip to content

Commit d93524d

Browse files
Retry VM migration to next available host, and dont stop VM on failure
1 parent a289bb0 commit d93524d

1 file changed

Lines changed: 13 additions & 12 deletions

File tree

engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,8 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
469469

470470
static final ConfigKey<Integer> StartRetry = new ConfigKey<Integer>("Advanced", Integer.class, "start.retry", "10",
471471
"Number of times to retry create and start commands", true);
472+
static final ConfigKey<Integer> MigrateRetry = new ConfigKey<Integer>("Advanced", Integer.class, "migrate.retry", "3",
473+
"Number of times to retry migrating the vm", true);
472474
static final ConfigKey<Integer> VmOpWaitInterval = new ConfigKey<Integer>("Advanced", Integer.class, "vm.op.wait.interval", "120",
473475
"Time (in seconds) to wait before checking if a previous operation has succeeded", true);
474476

@@ -3908,7 +3910,7 @@ private void orchestrateMigrateAway(final String vmUuid, final long srcHostId, f
39083910

39093911
final Long hostId = vm.getHostId();
39103912
if (hostId == null) {
3911-
String message = String.format("Unable to migrate %s due to it does not have a host id.", vm.toString());
3913+
String message = String.format("Unable to migrate %s due to it does not have a host id.", vm);
39123914
logger.warn(message);
39133915
throw new CloudRuntimeException(message);
39143916
}
@@ -3928,13 +3930,16 @@ private void orchestrateMigrateAway(final String vmUuid, final long srcHostId, f
39283930
DataCenterDeployment plan = getMigrationDeployment(vm, host, poolId, excludes);
39293931

39303932
DeployDestination dest = null;
3931-
while (true) {
39323933

3934+
int retry = MigrateRetry.value();
3935+
int retryAttempt = 0;
3936+
while (++retryAttempt <= retry) {
3937+
logger.debug("Migrate VM {}, attempt #{}", vm, retryAttempt);
39333938
try {
39343939
plan.setMigrationPlan(true);
39353940
dest = _dpMgr.planDeployment(profile, plan, excludes, planner);
39363941
} catch (final AffinityConflictException e2) {
3937-
String message = String.format("Unable to create deployment, affinity rules associated to the %s conflict.", vm.toString());
3942+
String message = String.format("Unable to create deployment, affinity rules associated to the %s conflict.", vm);
39383943
logger.warn(message, e2);
39393944
throw new CloudRuntimeException(message, e2);
39403945
}
@@ -3951,15 +3956,11 @@ private void orchestrateMigrateAway(final String vmUuid, final long srcHostId, f
39513956
} catch (ResourceUnavailableException | ConcurrentOperationException e) {
39523957
logger.warn("Unable to migrate {} to {} due to [{}]", vm.toString(), dest.getHost().toString(), e.getMessage(), e);
39533958
}
3954-
3955-
try {
3956-
advanceStop(vmUuid, true);
3957-
throw new CloudRuntimeException("Unable to migrate " + vm);
3958-
} catch (final ResourceUnavailableException | ConcurrentOperationException | OperationTimedoutException e) {
3959-
logger.error("Unable to stop {} due to [{}].", vm.toString(), e.getMessage(), e);
3960-
throw new CloudRuntimeException("Unable to migrate " + vm);
3961-
}
39623959
}
3960+
3961+
String message = String.format("Unable to migrate %s after %d attempts.", vm, retry);
3962+
logger.warn(message);
3963+
throw new CloudRuntimeException(message);
39633964
}
39643965

39653966
/**
@@ -5298,7 +5299,7 @@ public String getConfigComponentName() {
52985299

52995300
@Override
53005301
public ConfigKey<?>[] getConfigKeys() {
5301-
return new ConfigKey<?>[] { ClusterDeltaSyncInterval, StartRetry, VmDestroyForcestop, VmOpCancelInterval, VmOpCleanupInterval, VmOpCleanupWait,
5302+
return new ConfigKey<?>[] { ClusterDeltaSyncInterval, StartRetry, MigrateRetry, VmDestroyForcestop, VmOpCancelInterval, VmOpCleanupInterval, VmOpCleanupWait,
53025303
VmOpLockStateRetry, VmOpWaitInterval, ExecuteInSequence, VmJobCheckInterval, VmJobTimeout, VmJobStateReportInterval,
53035304
VmConfigDriveLabel, VmConfigDriveOnPrimaryPool, VmConfigDriveForceHostCacheUse, VmConfigDriveUseHostCacheOnUnsupportedPool,
53045305
HaVmRestartHostUp, ResourceCountRunningVMsonly, AllowExposeHypervisorHostname, AllowExposeHypervisorHostnameAccountLevel, SystemVmRootDiskSize,

0 commit comments

Comments
 (0)