Skip to content

Commit fd7720d

Browse files
geroplroboquat
authored andcommitted
[server] Only alert on explicit system failures
1 parent 912410c commit fd7720d

File tree

2 files changed

+12
-8
lines changed

2 files changed

+12
-8
lines changed

components/server/src/prometheus-metrics.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,11 @@ const instanceStartsFailedTotal = new prometheusClient.Counter({
148148
registers: [prometheusClient.register],
149149
});
150150

151-
export type FailedInstanceStartReason = "clusterSelectionFailed" | "startOnClusterFailed" | "other";
151+
export type FailedInstanceStartReason =
152+
| "clusterSelectionFailed"
153+
| "startOnClusterFailed"
154+
| "imageBuildFailed"
155+
| "other";
152156
export function increaseFailedInstanceStartCounter(reason: FailedInstanceStartReason) {
153157
instanceStartsFailedTotal.inc({ reason });
154158
}

components/server/src/workspace/workspace-starter.ts

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -591,12 +591,11 @@ export class WorkspaceStarter {
591591
if (rethrow) {
592592
throw err;
593593
} else {
594-
log.error("error starting instance", err, { instanceId: instance.id });
595-
let failedReason: FailedInstanceStartReason = "other";
594+
TraceContext.setError({ span }, err);
595+
log.error({ userId: user.id, instanceId: instance.id }, "error starting instance", err);
596596
if (err instanceof StartInstanceError) {
597-
failedReason = err.reason;
597+
increaseFailedInstanceStartCounter(err.reason);
598598
}
599-
increaseFailedInstanceStartCounter(failedReason);
600599
}
601600

602601
return { instanceID: instance.id };
@@ -765,7 +764,7 @@ export class WorkspaceStarter {
765764
ideConfig: IDEConfig,
766765
pvcEnabledForPrebuilds: boolean,
767766
): Promise<WorkspaceInstance> {
768-
const span = TraceContext.startSpan("buildWorkspaceImage", ctx);
767+
const span = TraceContext.startSpan("newInstance", ctx);
769768
//#endregion IDE resolution TODO(ak) move to IDE service
770769
// TODO: Compatible with ide-config not deployed, need revert after ide-config deployed
771770
delete ideConfig.ideOptions.options["code-latest"];
@@ -1264,18 +1263,19 @@ export class WorkspaceStarter {
12641263

12651264
TraceContext.setError({ span }, err);
12661265
const looksLikeUserError = (msg: string): boolean => {
1267-
return msg.startsWith("build failed:");
1266+
return msg.startsWith("build failed:") || msg.startsWith("headless task failed:");
12681267
};
12691268
if (looksLikeUserError(message)) {
12701269
log.debug(
12711270
{ instanceId: instance.id, userId: user.id, workspaceId: workspace.id },
12721271
`workspace image build failed: ${message}`,
12731272
);
12741273
} else {
1275-
log.warn(
1274+
log.error(
12761275
{ instanceId: instance.id, userId: user.id, workspaceId: workspace.id },
12771276
`workspace image build failed: ${message}`,
12781277
);
1278+
err = new StartInstanceError("imageBuildFailed", err);
12791279
}
12801280
this.analytics.track({
12811281
userId: user.id,

0 commit comments

Comments
 (0)