Skip to content

Reliably fetch logs for imagebuild and prebuild itself #11026

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 61 additions & 22 deletions components/dashboard/src/components/PrebuildLogs.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import EventEmitter from "events";
import React, { Suspense, useEffect, useState } from "react";
import {
Workspace,
WorkspaceInstance,
DisposableCollection,
WorkspaceImageBuild,
Expand All @@ -21,13 +20,13 @@ import { PrebuildStatus } from "../projects/Prebuilds";
const WorkspaceLogs = React.lazy(() => import("./WorkspaceLogs"));

export interface PrebuildLogsProps {
// The workspace ID of the "prebuild" workspace
workspaceId: string | undefined;
onIgnorePrebuild?: () => void;
children?: React.ReactNode;
}

export default function PrebuildLogs(props: PrebuildLogsProps) {
const [workspace, setWorkspace] = useState<Workspace | undefined>();
const [workspaceInstance, setWorkspaceInstance] = useState<WorkspaceInstance | undefined>();
const [error, setError] = useState<Error | undefined>();
const [logsEmitter] = useState(new EventEmitter());
Expand All @@ -44,7 +43,6 @@ export default function PrebuildLogs(props: PrebuildLogsProps) {
const info = await getGitpodService().server.getWorkspace(props.workspaceId);
const pbws = await getGitpodService().server.findPrebuildByWorkspaceID(props.workspaceId);
if (info.latestInstance) {
setWorkspace(info.workspace);
setWorkspaceInstance(info.latestInstance);
}
if (pbws) {
Expand Down Expand Up @@ -74,36 +72,45 @@ export default function PrebuildLogs(props: PrebuildLogsProps) {
},
}),
);
if (info.latestInstance) {
disposables.push(
watchHeadlessLogs(
info.latestInstance.id,
(chunk) => {
logsEmitter.emit("logs", chunk);
},
async () => workspaceInstance?.status.phase === "stopped",
),
);
}
} catch (err) {
console.error(err);
setError(err);
}
})();
return function cleanUp() {
return function cleanup() {
disposables.dispose();
};
}, [props.workspaceId]);
}, [logsEmitter, props.workspaceId]);

useEffect(() => {
switch (workspaceInstance?.status.phase) {
// Building means we're building the Docker image for the workspace so the workspace hasn't started yet.
const workspaceId = props.workspaceId;
if (!workspaceId || !workspaceInstance?.status.phase) {
return;
}

const disposables = new DisposableCollection();
switch (workspaceInstance.status.phase) {
// "building" means we're building the Docker image for the prebuild's workspace so the workspace hasn't started yet.
case "building":
case "stopped":
getGitpodService().server.watchWorkspaceImageBuildLogs(workspace!.id);
// Try to grab image build logs
disposables.push(retryWatchWorkspaceImageBuildLogs(workspaceId));
break;
// When we're "running" we want to switch to the logs from the actual prebuild workspace, instead
case "running":
disposables.push(
watchHeadlessLogs(
workspaceInstance.id,
(chunk) => {
logsEmitter.emit("logs", chunk);
},
async () => workspaceInstance?.status.phase === "stopped",
),
);
}
}, [props.workspaceId, workspaceInstance?.status.phase]);
return function cleanup() {
disposables.dispose();
};
}, [logsEmitter, props.workspaceId, workspaceInstance?.id, workspaceInstance?.status.phase]);

return (
<div className="rounded-xl overflow-hidden bg-gray-100 dark:bg-gray-800 flex flex-col">
Expand All @@ -121,7 +128,39 @@ export default function PrebuildLogs(props: PrebuildLogsProps) {
);
}

export function watchHeadlessLogs(
function retryWatchWorkspaceImageBuildLogs(workspaceId: string): Disposable {
let abortImageLogs = false;
(async () => {
// Linear backoff + abort for re-trying fetching of imagebuild logs
const initialDelaySeconds = 1;
const backoffFactor = 1.2;
const maxBackoffSeconds = 5;
let delayInSeconds = initialDelaySeconds;

while (true) {
delayInSeconds = Math.min(delayInSeconds * backoffFactor, maxBackoffSeconds);

console.debug("re-trying image build logs");
// eslint-disable-next-line
await new Promise((resolve) => {
setTimeout(resolve, delayInSeconds * 1000);
});
if (abortImageLogs) {
return;
}
try {
await getGitpodService().server.watchWorkspaceImageBuildLogs(workspaceId);
} catch (err) {
console.error("watchWorkspaceImageBuildLogs", err);
}
}
})();
return Disposable.create(() => {
abortImageLogs = true;
});
}

function watchHeadlessLogs(
instanceId: string,
onLog: (chunk: string) => void,
checkIsDone: () => Promise<boolean>,
Expand Down
19 changes: 8 additions & 11 deletions components/dashboard/src/start/CreateWorkspace.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
* See License-AGPL.txt in the project root for license information.
*/

import EventEmitter from "events";
import React, { useEffect, useContext, useState } from "react";
import {
CreateWorkspaceMode,
WorkspaceCreationResult,
RunningWorkspacePrebuildStarting,
ContextURL,
DisposableCollection,
} from "@gitpod/gitpod-protocol";
import { ErrorCodes } from "@gitpod/gitpod-protocol/lib/messaging/error";
import Modal from "../components/Modal";
Expand All @@ -21,7 +21,7 @@ import StartWorkspace, { parseProps } from "./StartWorkspace";
import { openAuthorizeWindow } from "../provider-utils";
import { SelectAccountPayload } from "@gitpod/gitpod-protocol/lib/auth";
import { SelectAccountModal } from "../settings/SelectAccountModal";
import PrebuildLogs, { watchHeadlessLogs } from "../components/PrebuildLogs";
import PrebuildLogs from "../components/PrebuildLogs";
import CodeText from "../components/CodeText";
import FeedbackComponent from "../feedback-form/FeedbackComponent";
import { isGitpodIo } from "../utils";
Expand Down Expand Up @@ -459,19 +459,15 @@ interface RunningPrebuildViewProps {
}

function RunningPrebuildView(props: RunningPrebuildViewProps) {
const [logsEmitter] = useState(new EventEmitter());
const workspaceId = props.runningPrebuild.workspaceID;

useEffect(() => {
const disposables = watchHeadlessLogs(
props.runningPrebuild.instanceID,
(chunk) => logsEmitter.emit("logs", chunk),
async () => false,
);
const disposables = new DisposableCollection();

disposables.push(
getGitpodService().registerClient({
onInstanceUpdate: (update) => {
if (update.workspaceId !== props.runningPrebuild.workspaceID) {
if (update.workspaceId !== workspaceId) {
return;
}
if (update.status.phase === "stopped") {
Expand All @@ -484,13 +480,14 @@ function RunningPrebuildView(props: RunningPrebuildViewProps) {
return function cleanup() {
disposables.dispose();
};
}, []);
// eslint-disable-next-line
}, [workspaceId]);

return (
<StartPage title="Prebuild in Progress">
{/* TODO(gpl) Copied around in Start-/CreateWorkspace. This should properly go somewhere central. */}
<div className="mt-6 w-11/12 lg:w-3/5 overflow-hidden">
<PrebuildLogs workspaceId={props.runningPrebuild.workspaceID} onIgnorePrebuild={props.onIgnorePrebuild}>
<PrebuildLogs workspaceId={workspaceId} onIgnorePrebuild={props.onIgnorePrebuild}>
<button className="secondary" onClick={() => props.onIgnorePrebuild && props.onIgnorePrebuild()}>
Skip Prebuild
</button>
Expand Down
21 changes: 19 additions & 2 deletions components/server/src/workspace/gitpod-server-impl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ import { InstallationAdminTelemetryDataProvider } from "../installation-admin/te
import { LicenseEvaluator } from "@gitpod/licensor/lib";
import { Feature } from "@gitpod/licensor/lib/api";
import { Currency } from "@gitpod/gitpod-protocol/lib/plans";
import { getExperimentsClientForBackend } from "@gitpod/gitpod-protocol/lib/experiments/configcat-server";

// shortcut
export const traceWI = (ctx: TraceContext, wi: Omit<LogContext, "userId">) => TraceContext.setOWI(ctx, wi); // userId is already taken care of in WebsocketConnectionManager
Expand Down Expand Up @@ -1547,10 +1548,25 @@ export class GitpodServerImpl implements GitpodServerWithTracing, Disposable {

const logInfo = instance.imageBuildInfo?.log;
if (!logInfo) {
const isOldImageBuildLogsMechanismDeprecated = await getExperimentsClientForBackend().getValueAsync(
"deprecateOldImageLogsMechanism",
false,
{
userId: user.id,
projectId: workspace.projectId,
},
);
if (isOldImageBuildLogsMechanismDeprecated) {
log.error(logCtx, "cannot watch imagebuild logs for workspaceId: no image build info available");
throw new ResponseError(
ErrorCodes.HEADLESS_LOG_NOT_YET_AVAILABLE,
"cannot watch imagebuild logs for workspaceId",
);
}

// during roll-out this is our fall-back case.
// Afterwards we might want to do some spinning-lock and re-check for a certain period (30s?) to give db-sync
// a change to move the imageBuildLogInfo across the globe.

log.warn(logCtx, "imageBuild logs: fallback!");
ctx.span?.setTag("workspace.imageBuild.logs.fallback", true);
await this.deprecatedDoWatchWorkspaceImageBuildLogs(ctx, logCtx, workspace);
Expand Down Expand Up @@ -1589,7 +1605,8 @@ export class GitpodServerImpl implements GitpodServerWithTracing, Disposable {
aborted,
);
} catch (err) {
log.error(logCtx, "cannot watch imagebuild logs for workspaceId", err);
// This error is most likely a temporary one (too early). We defer to the client whether they want to keep on trying or not.
log.debug(logCtx, "cannot watch imagebuild logs for workspaceId", err);
throw new ResponseError(
ErrorCodes.HEADLESS_LOG_NOT_YET_AVAILABLE,
"cannot watch imagebuild logs for workspaceId",
Expand Down
7 changes: 6 additions & 1 deletion components/server/src/workspace/workspace-starter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1027,7 +1027,12 @@ export class WorkspaceStarter {
.catch((err) => log.error("error writing image build log info to the DB", err));
})
.catch((err) =>
log.warn("image build: never received log info", err, {
// TODO (gpl) This error happens quite often, but looks like it's mostly triggered by user errors:
// The image build fails (e.g. bc the base image cannot be pulled) so fast that we never received the log meta info.
// We switch this to "debug" for now. Going forward, we should:
// 1. turn this into a metric to feat the "image build reliability" SLI
// 2. fix the image-builder implementation
log.debug("image build: never received log info", err, {
instanceId: instance.id,
workspaceId: instance.workspaceId,
}),
Expand Down