@@ -396,17 +396,21 @@ export class GitpodServerImpl<Client extends GitpodClient, Server extends Gitpod
396
396
const user = this . checkAndBlockUser ( ) ;
397
397
await this . checkTermsAcceptance ( ) ;
398
398
399
- log . info ( { userId : user . id , workspaceId } , 'startWorkspace' ) ;
399
+ const logCtx = { userId : user . id , workspaceId } ;
400
+ log . info ( logCtx , 'startWorkspace' ) ;
400
401
401
402
const mayStartPromise = this . mayStartWorkspace ( { span } , user , this . workspaceDb . trace ( { span } ) . findRegularRunningInstances ( user . id ) ) ;
402
403
const workspace = await this . internalGetWorkspace ( workspaceId , this . workspaceDb . trace ( { span } ) ) ;
403
404
await this . guardAccess ( { kind : "workspace" , subject : workspace } , "get" ) ;
404
405
405
406
const runningInstance = await this . workspaceDb . trace ( { span } ) . findRunningInstance ( workspace . id ) ;
406
- if ( runningInstance ) {
407
- // We already have a running workspace.
408
- // Note: ownership doesn't matter here as this is basically a noop. It's not StartWorkspace's concern
409
- // to guard workspace access - just to prevent non-owners from starting workspaces.
407
+ if ( runningInstance && ! options . forceDefaultImage ) {
408
+ // We already have a running workspace, and we're not forcing the default image.
409
+ // Notes:
410
+ // * ownership doesn't matter here as this is basically a noop. It's not StartWorkspace's concern
411
+ // to guard workspace access - just to prevent non-owners from starting workspaces.
412
+ // * forceDefaultImage now always interrupts a previously-running instance, even if the previous
413
+ // instance already had options.forceDefaultImage === true.
410
414
411
415
await this . guardAccess ( { kind : "workspaceInstance" , subject : runningInstance , workspaceOwnerID : workspace . ownerId , workspaceIsShared : workspace . shareable || false } , "get" ) ;
412
416
return {
@@ -433,6 +437,14 @@ export class GitpodServerImpl<Client extends GitpodClient, Server extends Gitpod
433
437
434
438
await mayStartPromise ;
435
439
440
+ if ( runningInstance ) {
441
+ // We already had a running workspace. This may happen if we're forcing the default image.
442
+ // In that case, we first stop the previous workspace, and wait for it to be completely gone.
443
+ await this . internalStopWorkspaceAndWaitForInstance ( { span } , workspaceId , workspace . ownerId ) . catch ( err => {
444
+ log . error ( logCtx , "internalStopWorkspaceAndWaitForInstance error: " , err ) ;
445
+ } ) ;
446
+ }
447
+
436
448
// at this point we're about to actually start a new workspace
437
449
return await this . workspaceStarter . startWorkspace ( { span } , workspace , user , await envVars , {
438
450
forceDefaultImage : ! ! options . forceDefaultImage
@@ -469,6 +481,51 @@ export class GitpodServerImpl<Client extends GitpodClient, Server extends Gitpod
469
481
}
470
482
}
471
483
484
+ protected async internalStopWorkspaceAndWaitForInstance ( ctx : TraceContext , workspaceId : string , ownerId ?: string , policy ?: StopWorkspacePolicy ) : Promise < void > {
485
+ const runningInstance = await this . workspaceDb . trace ( ctx ) . findRunningInstance ( workspaceId ) ;
486
+ let instanceStoppedPromise = Promise . resolve ( ) ;
487
+ if ( runningInstance ) {
488
+ // If a previous workspace instance is still running, we'll wait for it to be completely gone.
489
+ let toDispose : Disposable | undefined ;
490
+ let timeout : NodeJS . Timeout | undefined ;
491
+ instanceStoppedPromise = new Promise < void > ( ( resolve , reject ) => {
492
+ toDispose = this . messageBusIntegration . listenForWorkspaceInstanceUpdates (
493
+ ownerId ,
494
+ ( ctx : TraceContext , instance : WorkspaceInstance ) => {
495
+ if ( instance . workspaceId !== runningInstance . workspaceId || instance . id !== runningInstance . id ) {
496
+ return ;
497
+ }
498
+ if ( instance . status . phase === 'stopped' ) {
499
+ resolve ( ) ;
500
+ }
501
+ }
502
+ ) ;
503
+ // Time out if the instance still isn't stopped after 5 minutes
504
+ timeout = setTimeout ( ( ) => {
505
+ reject ( new Error ( 'Timed out while waiting for previous workspace instance to stop' ) ) ;
506
+ } , 1000 * 60 * 5 ) ;
507
+ } ) . finally ( ( ) => {
508
+ if ( toDispose ) {
509
+ toDispose . dispose ( ) ;
510
+ }
511
+ if ( timeout ) {
512
+ clearTimeout ( timeout ) ;
513
+ }
514
+ } ) ;
515
+ if ( ! runningInstance . region ) {
516
+ // If there is an instance, but it doesn't have a region, it means that the instance wasn't actually
517
+ // started yet, i.e. it didn't go through `actuallyStartWorkspace` yet.
518
+ // Instead of calling ws-manager, we just wait for some time -- we would not know which workspace
519
+ // manager to talk to after all. There's a chance for a race condition here (some other `server`
520
+ // instance could be starting the instance currently), so in lieu of proper cross-server-instance-
521
+ // locking we wait for 10 seconds to prevent that race.
522
+ await new Promise ( resolve => setTimeout ( resolve , 10 * 1000 ) ) ;
523
+ }
524
+ }
525
+ await this . internalStopWorkspace ( ctx , workspaceId , ownerId , policy ) ;
526
+ await instanceStoppedPromise ;
527
+ }
528
+
472
529
protected async internalStopWorkspace ( ctx : TraceContext , workspaceId : string , ownerId ?: string , policy ?: StopWorkspacePolicy ) : Promise < void > {
473
530
const instance = await this . workspaceDb . trace ( ctx ) . findRunningInstance ( workspaceId ) ;
474
531
if ( ! instance ) {
0 commit comments