Skip to content

[bridge] Stop stuck workspace instances #12894

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions components/ws-manager-bridge/src/bridge.ts
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,12 @@ export class WorkspaceManagerBridge implements Disposable {

// Control running workspace instances against ws-manager
try {
await this.controlRunningInstances(ctx, runningInstances, clientProvider);
await this.controlRunningInstances(
ctx,
runningInstances,
clientProvider,
this.config.maxTimeToRunningPhaseSeconds,
);

disconnectStarted = Number.MAX_SAFE_INTEGER; // Reset disconnect period
} catch (err) {
Expand Down Expand Up @@ -489,6 +494,7 @@ export class WorkspaceManagerBridge implements Disposable {
parentCtx: TraceContext,
runningInstances: RunningWorkspaceInfo[],
clientProvider: ClientProvider,
maxTimeToRunningPhaseSeconds: number,
) {
const installation = this.config.installation;

Expand All @@ -506,7 +512,12 @@ export class WorkspaceManagerBridge implements Disposable {

for (const [instanceId, ri] of runningInstancesIdx.entries()) {
const instance = ri.latestInstance;
if (instance.status.phase !== "running") {
if (
!(
instance.status.phase === "running" ||
durationLongerThanSeconds(Date.parse(instance.creationTime), maxTimeToRunningPhaseSeconds)
)
) {
log.debug({ instanceId }, "Skipping instance", {
phase: instance.status.phase,
creationTime: instance.creationTime,
Expand Down Expand Up @@ -601,11 +612,10 @@ export class WorkspaceManagerBridge implements Disposable {

protected async markWorkspaceInstanceAsStopped(ctx: TraceContext, info: RunningWorkspaceInfo, now: Date) {
const nowISO = now.toISOString();
info.latestInstance.status.phase = "stopped";
info.latestInstance.stoppingTime = nowISO;
info.latestInstance.stoppedTime = nowISO;
info.latestInstance.status.phase = "stopped";
await this.workspaceDB.trace(ctx).storeInstance(info.latestInstance);

await this.messagebus.notifyOnInstanceUpdate(ctx, info.workspace.ownerId, info.latestInstance);
await this.prebuildUpdater.stopPrebuildInstance(ctx, info.latestInstance);
}
Expand Down
3 changes: 3 additions & 0 deletions components/ws-manager-bridge/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,7 @@ export interface Configuration {

// clusterSyncIntervalSeconds configures how often we sync workspace cluster information
clusterSyncIntervalSeconds: number;

// maxTimeToRunningPhaseSeconds is the timeout duration
maxTimeToRunningPhaseSeconds: number;
}
5 changes: 3 additions & 2 deletions install/installer/cmd/testdata/render/aws-setup/output.golden
Original file line number Diff line number Diff line change
Expand Up @@ -4576,7 +4576,8 @@ data:
"buildingPhaseSeconds": 3600,
"unknownPhaseSeconds": 600
},
"clusterSyncIntervalSeconds": 60
"clusterSyncIntervalSeconds": 60,
"maxTimeToRunningPhaseSeconds": 3600
}
kind: ConfigMap
metadata:
Expand Down Expand Up @@ -8439,7 +8440,7 @@ spec:
template:
metadata:
annotations:
gitpod.io/checksum_config: f5d95af2308e35194d44403c42e269fbe9e4c596d138796fa95f705ec3b4d352
gitpod.io/checksum_config: 846469058fd082ae195479935c67054553bb448d05cf029a19af738503147aa7
creationTimestamp: null
labels:
app: gitpod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4436,7 +4436,8 @@ data:
"buildingPhaseSeconds": 3600,
"unknownPhaseSeconds": 600
},
"clusterSyncIntervalSeconds": 60
"clusterSyncIntervalSeconds": 60,
"maxTimeToRunningPhaseSeconds": 3600
}
kind: ConfigMap
metadata:
Expand Down Expand Up @@ -8285,7 +8286,7 @@ spec:
template:
metadata:
annotations:
gitpod.io/checksum_config: f5d95af2308e35194d44403c42e269fbe9e4c596d138796fa95f705ec3b4d352
gitpod.io/checksum_config: 846469058fd082ae195479935c67054553bb448d05cf029a19af738503147aa7
creationTimestamp: null
labels:
app: gitpod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5363,7 +5363,8 @@ data:
"buildingPhaseSeconds": 3600,
"unknownPhaseSeconds": 600
},
"clusterSyncIntervalSeconds": 60
"clusterSyncIntervalSeconds": 60,
"maxTimeToRunningPhaseSeconds": 3600
}
kind: ConfigMap
metadata:
Expand Down Expand Up @@ -9817,7 +9818,7 @@ spec:
metadata:
annotations:
gitpod.io: hello
gitpod.io/checksum_config: fdb0c0adb6ea187bd587fb7c286084b4853439fdfafa7f32bbdcacee744e5c3e
gitpod.io/checksum_config: 1af8949463afb559e837a140ecdb4ca73db58e3da8fb0680570a1cdada3c3abe
hello: world
creationTimestamp: null
labels:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4623,7 +4623,8 @@ data:
"buildingPhaseSeconds": 3600,
"unknownPhaseSeconds": 600
},
"clusterSyncIntervalSeconds": 60
"clusterSyncIntervalSeconds": 60,
"maxTimeToRunningPhaseSeconds": 3600
}
kind: ConfigMap
metadata:
Expand Down Expand Up @@ -8711,7 +8712,7 @@ spec:
template:
metadata:
annotations:
gitpod.io/checksum_config: bacace964780fd56bdd04b997df0017c24563c2aac6bffd10962c297b54c91ba
gitpod.io/checksum_config: 19a15d16ea83f7cd1c61948f0e433f3f72ce175524ea639c1c050a485f17f74d
creationTimestamp: null
labels:
app: gitpod
Expand Down
5 changes: 3 additions & 2 deletions install/installer/cmd/testdata/render/gcp-setup/output.golden
Original file line number Diff line number Diff line change
Expand Up @@ -4395,7 +4395,8 @@ data:
"buildingPhaseSeconds": 3600,
"unknownPhaseSeconds": 600
},
"clusterSyncIntervalSeconds": 60
"clusterSyncIntervalSeconds": 60,
"maxTimeToRunningPhaseSeconds": 3600
}
kind: ConfigMap
metadata:
Expand Down Expand Up @@ -8208,7 +8209,7 @@ spec:
template:
metadata:
annotations:
gitpod.io/checksum_config: 6d44bef12145215794685e52c3139f04195bcf8e7ca1f1d7b3803da845e837eb
gitpod.io/checksum_config: f0b5a025d81fce48adb9fd1ad8445422fa80b136acba6b2eb69263df3388c388
creationTimestamp: null
labels:
app: gitpod
Expand Down
5 changes: 3 additions & 2 deletions install/installer/cmd/testdata/render/minimal/output.golden
Original file line number Diff line number Diff line change
Expand Up @@ -4843,7 +4843,8 @@ data:
"buildingPhaseSeconds": 3600,
"unknownPhaseSeconds": 600
},
"clusterSyncIntervalSeconds": 60
"clusterSyncIntervalSeconds": 60,
"maxTimeToRunningPhaseSeconds": 3600
}
kind: ConfigMap
metadata:
Expand Down Expand Up @@ -9086,7 +9087,7 @@ spec:
template:
metadata:
annotations:
gitpod.io/checksum_config: bacace964780fd56bdd04b997df0017c24563c2aac6bffd10962c297b54c91ba
gitpod.io/checksum_config: 19a15d16ea83f7cd1c61948f0e433f3f72ce175524ea639c1c050a485f17f74d
creationTimestamp: null
labels:
app: gitpod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4855,7 +4855,8 @@ data:
"buildingPhaseSeconds": 3600,
"unknownPhaseSeconds": 600
},
"clusterSyncIntervalSeconds": 60
"clusterSyncIntervalSeconds": 60,
"maxTimeToRunningPhaseSeconds": 3600
}
kind: ConfigMap
metadata:
Expand Down Expand Up @@ -9098,7 +9099,7 @@ spec:
template:
metadata:
annotations:
gitpod.io/checksum_config: bacace964780fd56bdd04b997df0017c24563c2aac6bffd10962c297b54c91ba
gitpod.io/checksum_config: 19a15d16ea83f7cd1c61948f0e433f3f72ce175524ea639c1c050a485f17f74d
creationTimestamp: null
labels:
app: gitpod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5176,7 +5176,8 @@ data:
"buildingPhaseSeconds": 3600,
"unknownPhaseSeconds": 600
},
"clusterSyncIntervalSeconds": 60
"clusterSyncIntervalSeconds": 60,
"maxTimeToRunningPhaseSeconds": 3600
}
kind: ConfigMap
metadata:
Expand Down Expand Up @@ -9530,7 +9531,7 @@ spec:
template:
metadata:
annotations:
gitpod.io/checksum_config: bacace964780fd56bdd04b997df0017c24563c2aac6bffd10962c297b54c91ba
gitpod.io/checksum_config: 19a15d16ea83f7cd1c61948f0e433f3f72ce175524ea639c1c050a485f17f74d
creationTimestamp: null
labels:
app: gitpod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4846,7 +4846,8 @@ data:
"buildingPhaseSeconds": 3600,
"unknownPhaseSeconds": 600
},
"clusterSyncIntervalSeconds": 60
"clusterSyncIntervalSeconds": 60,
"maxTimeToRunningPhaseSeconds": 3600
}
kind: ConfigMap
metadata:
Expand Down Expand Up @@ -9089,7 +9090,7 @@ spec:
template:
metadata:
annotations:
gitpod.io/checksum_config: bacace964780fd56bdd04b997df0017c24563c2aac6bffd10962c297b54c91ba
gitpod.io/checksum_config: 19a15d16ea83f7cd1c61948f0e433f3f72ce175524ea639c1c050a485f17f74d
creationTimestamp: null
labels:
app: gitpod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ func configmap(ctx *common.RenderContext) ([]runtime.Object, error) {
EmulatePreparingIntervalSeconds: 10,
StaticBridges: WSManagerList(ctx),
ClusterSyncIntervalSeconds: 60,
MaxTimeToRunningPhaseSeconds: 60 * 60,
}

fc, err := common.ToJSONString(wsmbcfg)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ type Configuration struct {
EmulatePreparingIntervalSeconds int32 `json:"emulatePreparingIntervalSeconds"`
Timeouts Timeouts `json:"timeouts"`
ClusterSyncIntervalSeconds int32 `json:"clusterSyncIntervalSeconds"`
MaxTimeToRunningPhaseSeconds int32 `json:"maxTimeToRunningPhaseSeconds"`
}

type ClusterService struct {
Expand Down