Skip to content

Commit de1eff9

Browse files
committed
[server] Allow restarting workspaces with forceDefaultImage=true even when another instance is already running
Fixes #3777
1 parent f1c815a commit de1eff9

File tree

1 file changed

+62
-5
lines changed

1 file changed

+62
-5
lines changed

components/server/src/workspace/gitpod-server-impl.ts

Lines changed: 62 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -396,17 +396,21 @@ export class GitpodServerImpl<Client extends GitpodClient, Server extends Gitpod
396396
const user = this.checkAndBlockUser();
397397
await this.checkTermsAcceptance();
398398

399-
log.info({ userId: user.id, workspaceId }, 'startWorkspace');
399+
const logCtx = { userId: user.id, workspaceId };
400+
log.info(logCtx, 'startWorkspace');
400401

401402
const mayStartPromise = this.mayStartWorkspace({ span }, user, this.workspaceDb.trace({ span }).findRegularRunningInstances(user.id));
402403
const workspace = await this.internalGetWorkspace(workspaceId, this.workspaceDb.trace({ span }));
403404
await this.guardAccess({ kind: "workspace", subject: workspace }, "get");
404405

405406
const runningInstance = await this.workspaceDb.trace({ span }).findRunningInstance(workspace.id);
406-
if (runningInstance) {
407-
// We already have a running workspace.
408-
// Note: ownership doesn't matter here as this is basically a noop. It's not StartWorkspace's concern
409-
// to guard workspace access - just to prevent non-owners from starting workspaces.
407+
if (runningInstance && !options.forceDefaultImage) {
408+
// We already have a running workspace, and we're not forcing the default image.
409+
// Notes:
410+
// * ownership doesn't matter here as this is basically a noop. It's not StartWorkspace's concern
411+
// to guard workspace access - just to prevent non-owners from starting workspaces.
412+
// * forceDefaultImage now always interrupts a previously-running instance, even if the previous
413+
// instance already had options.forceDefaultImage === true.
410414

411415
await this.guardAccess({ kind: "workspaceInstance", subject: runningInstance, workspaceOwnerID: workspace.ownerId, workspaceIsShared: workspace.shareable || false }, "get");
412416
return {
@@ -433,6 +437,14 @@ export class GitpodServerImpl<Client extends GitpodClient, Server extends Gitpod
433437

434438
await mayStartPromise;
435439

440+
if (runningInstance) {
441+
// We already had a running workspace. This may happen if we're forcing the default image.
442+
// In that case, we first stop the previous workspace, and wait for it to be completely gone.
443+
await this.internalStopWorkspaceAndWaitForInstance({ span }, workspaceId, workspace.ownerId).catch(err => {
444+
log.error(logCtx, "internalStopWorkspaceAndWaitForInstance error: ", err);
445+
});
446+
}
447+
436448
// at this point we're about to actually start a new workspace
437449
return await this.workspaceStarter.startWorkspace({ span }, workspace, user, await envVars, {
438450
forceDefaultImage: !!options.forceDefaultImage
@@ -469,6 +481,51 @@ export class GitpodServerImpl<Client extends GitpodClient, Server extends Gitpod
469481
}
470482
}
471483

484+
protected async internalStopWorkspaceAndWaitForInstance(ctx: TraceContext, workspaceId: string, ownerId?: string, policy?: StopWorkspacePolicy): Promise<void> {
485+
const runningInstance = await this.workspaceDb.trace(ctx).findRunningInstance(workspaceId);
486+
let instanceStoppedPromise = Promise.resolve();
487+
if (runningInstance) {
488+
// If a previous workspace instance is still running, we'll wait for it to be completely gone.
489+
let toDispose: Disposable | undefined;
490+
let timeout: NodeJS.Timeout | undefined;
491+
instanceStoppedPromise = new Promise<void>((resolve, reject) => {
492+
toDispose = this.messageBusIntegration.listenForWorkspaceInstanceUpdates(
493+
ownerId,
494+
(ctx: TraceContext, instance: WorkspaceInstance) => {
495+
if (instance.workspaceId !== runningInstance.workspaceId || instance.id !== runningInstance.id) {
496+
return;
497+
}
498+
if (instance.status.phase === 'stopped') {
499+
resolve();
500+
}
501+
}
502+
);
503+
// Time out if the instance still isn't stopped after 5 minutes
504+
timeout = setTimeout(() => {
505+
reject(new Error('Timed out while waiting for previous workspace instance to stop'));
506+
}, 1000 * 60 * 5);
507+
}).finally(() => {
508+
if (toDispose) {
509+
toDispose.dispose();
510+
}
511+
if (timeout) {
512+
clearTimeout(timeout);
513+
}
514+
});
515+
if (!runningInstance.region) {
516+
// If there is an instance, but it doesn't have a region, it means that the instance wasn't actually
517+
// started yet, i.e. it didn't go through `actuallyStartWorkspace` yet.
518+
// Instead of calling ws-manager, we just wait for some time -- we would not know which workspace
519+
// manager to talk to after all. There's a chance for a race condition here (some other `server`
520+
// instance could be starting the instance currently), so in lieu of proper cross-server-instance-
521+
// locking we wait for 10 seconds to prevent that race.
522+
await new Promise(resolve => setTimeout(resolve, 10 * 1000));
523+
}
524+
}
525+
await this.internalStopWorkspace(ctx, workspaceId, ownerId, policy);
526+
await instanceStoppedPromise;
527+
}
528+
472529
protected async internalStopWorkspace(ctx: TraceContext, workspaceId: string, ownerId?: string, policy?: StopWorkspacePolicy): Promise<void> {
473530
const instance = await this.workspaceDb.trace(ctx).findRunningInstance(workspaceId);
474531
if (!instance) {

0 commit comments

Comments
 (0)