diff --git a/.env.development b/.env.development index d49c7951..9ba1073a 100644 --- a/.env.development +++ b/.env.development @@ -34,7 +34,7 @@ AUTH_URL="http://localhost:3000" # Sentry # SENTRY_BACKEND_DSN="" # NEXT_PUBLIC_SENTRY_WEBAPP_DSN="" -# SENTRY_ENVIRONMENT="dev" +SENTRY_ENVIRONMENT="dev" # NEXT_PUBLIC_SENTRY_ENVIRONMENT="dev" # SENTRY_AUTH_TOKEN= diff --git a/packages/backend/src/connectionManager.ts b/packages/backend/src/connectionManager.ts index 527ac6c0..e6188159 100644 --- a/packages/backend/src/connectionManager.ts +++ b/packages/backend/src/connectionManager.ts @@ -1,4 +1,4 @@ -import { Connection, ConnectionSyncStatus, PrismaClient, Prisma } from "@sourcebot/db"; +import { Connection, ConnectionSyncStatus, PrismaClient, Prisma, RepoIndexingStatus } from "@sourcebot/db"; import { Job, Queue, Worker } from 'bullmq'; import { Settings } from "./types.js"; import { ConnectionConfig } from "@sourcebot/schemas/v3/connection.type"; @@ -160,7 +160,7 @@ export class ConnectionManager implements IConnectionManager { } } - const { repoData, notFound } = result; + let { repoData, notFound } = result; // Push the information regarding not found users, orgs, and repos to the connection's syncStatusMetadata. Note that // this won't be overwritten even if the connection job fails @@ -174,7 +174,7 @@ export class ConnectionManager implements IConnectionManager { }); // Filter out any duplicates by external_id and external_codeHostUrl. - repoData.filter((repo, index, self) => { + repoData = repoData.filter((repo, index, self) => { return index === self.findIndex(r => r.external_id === repo.external_id && r.external_codeHostUrl === repo.external_codeHostUrl @@ -263,6 +263,14 @@ export class ConnectionManager implements IConnectionManager { private async onSyncJobFailed(job: Job | undefined, err: unknown) { this.logger.info(`Connection sync job failed with error: ${err}`); + Sentry.captureException(err, { + tags: { + repoId: job?.data.repo.id, + jobId: job?.id, + queue: QUEUE_NAME, + } + }); + if (job) { const { connectionId } = job.data; diff --git a/packages/backend/src/repoManager.ts b/packages/backend/src/repoManager.ts index 231a32c6..ff8cd585 100644 --- a/packages/backend/src/repoManager.ts +++ b/packages/backend/src/repoManager.ts @@ -130,15 +130,25 @@ export class RepoManager implements IRepoManager { const thresholdDate = new Date(Date.now() - this.settings.reindexIntervalMs); const repos = await this.db.repo.findMany({ where: { - repoIndexingStatus: { - in: [ - RepoIndexingStatus.NEW, - RepoIndexingStatus.INDEXED - ] - }, OR: [ - { indexedAt: null }, - { indexedAt: { lt: thresholdDate } }, + // "NEW" is really a misnomer here - it just means that the repo needs to be indexed + // immediately. In most cases, this will be because the repo was just created and + // is indeed "new". However, it could also be that a "retry" was requested on a failed + // index. So, we don't want to block on the indexedAt timestamp here. + { + repoIndexingStatus: RepoIndexingStatus.NEW, + }, + // When the repo has already been indexed, we only want to reindex if the reindexing + // interval has elapsed (or if the date isn't set for some reason). + { + AND: [ + { repoIndexingStatus: RepoIndexingStatus.INDEXED }, + { OR: [ + { indexedAt: null }, + { indexedAt: { lt: thresholdDate } }, + ]} + ] + } ] }, include: { @@ -335,7 +345,15 @@ export class RepoManager implements IRepoManager { } private async onIndexJobFailed(job: Job | undefined, err: unknown) { - this.logger.info(`Repo index job failed (id: ${job?.id ?? 'unknown'})`); + this.logger.info(`Repo index job failed (id: ${job?.id ?? 'unknown'}) with error: ${err}`); + Sentry.captureException(err, { + tags: { + repoId: job?.data.repo.id, + jobId: job?.id, + queue: REPO_INDEXING_QUEUE, + } + }); + if (job) { this.promClient.activeRepoIndexingJobs.dec(); this.promClient.repoIndexingFailTotal.inc(); @@ -474,6 +492,13 @@ export class RepoManager implements IRepoManager { private async onGarbageCollectionJobFailed(job: Job | undefined, err: unknown) { this.logger.info(`Garbage collection job failed (id: ${job?.id ?? 'unknown'}) with error: ${err}`); + Sentry.captureException(err, { + tags: { + repoId: job?.data.repo.id, + jobId: job?.id, + queue: REPO_GC_QUEUE, + } + }); if (job) { this.promClient.activeRepoGarbageCollectionJobs.dec(); diff --git a/packages/web/src/app/posthogProvider.tsx b/packages/web/src/app/posthogProvider.tsx index 404c0ec6..c8198b7b 100644 --- a/packages/web/src/app/posthogProvider.tsx +++ b/packages/web/src/app/posthogProvider.tsx @@ -38,8 +38,8 @@ export function PostHogProvider({ children, disabled }: PostHogProviderProps) { // @see next.config.mjs for path rewrites to the "/ingest" route. api_host: "/ingest", person_profiles: 'identified_only', - capture_pageview: false, // @nocheckin Disable automatic pageview capture if we're not in public demo mode - autocapture: false, // Disable automatic event capture + capture_pageview: false, + autocapture: false, // eslint-disable-next-line @typescript-eslint/no-explicit-any sanitize_properties: (properties: Record, _event: string) => { // https://posthog.com/docs/libraries/js#config diff --git a/packages/web/src/initialize.ts b/packages/web/src/initialize.ts index fa5b51a4..42fac9d6 100644 --- a/packages/web/src/initialize.ts +++ b/packages/web/src/initialize.ts @@ -1,4 +1,4 @@ -import { ConnectionSyncStatus, OrgRole, Prisma } from '@sourcebot/db'; +import { ConnectionSyncStatus, OrgRole, Prisma, RepoIndexingStatus } from '@sourcebot/db'; import { env } from './env.mjs'; import { prisma } from "@/prisma"; import { SINGLE_TENANT_USER_ID, SINGLE_TENANT_ORG_ID, SINGLE_TENANT_ORG_DOMAIN, SINGLE_TENANT_ORG_NAME, SINGLE_TENANT_USER_EMAIL } from './lib/constants'; @@ -105,6 +105,13 @@ const initSingleTenancy = async () => { name: key, orgId: SINGLE_TENANT_ORG_ID, } + }, + include: { + repos: { + include: { + repo: true, + } + } } }); @@ -137,6 +144,21 @@ const initSingleTenancy = async () => { }); console.log(`Upserted connection with name '${key}'. Connection ID: ${connectionDb.id}`); + + // Re-try any repos that failed to index. + const failedRepos = currentConnection?.repos.filter(repo => repo.repo.repoIndexingStatus === RepoIndexingStatus.FAILED).map(repo => repo.repo.id) ?? []; + if (failedRepos.length > 0) { + await prisma.repo.updateMany({ + where: { + id: { + in: failedRepos, + } + }, + data: { + repoIndexingStatus: RepoIndexingStatus.NEW, + } + }) + } } } }