diff --git a/packages/backend/src/config.ts b/packages/backend/src/config.ts deleted file mode 100644 index a885a61a..00000000 --- a/packages/backend/src/config.ts +++ /dev/null @@ -1,139 +0,0 @@ -import { PrismaClient } from '@sourcebot/db'; -import { readFile } from 'fs/promises'; -import stripJsonComments from 'strip-json-comments'; -import { getGitHubReposFromConfig } from "./github.js"; -import { getGitLabReposFromConfig, GITLAB_CLOUD_HOSTNAME } from "./gitlab.js"; -import { SourcebotConfigurationSchema } from "@sourcebot/schemas/v2/index.type"; -import { AppContext } from "./types.js"; -import { getTokenFromConfig, isRemotePath, marshalBool } from "./utils.js"; - -export const fetchConfigFromPath = async (configPath: string, signal: AbortSignal) => { - const configContent = await (async () => { - if (isRemotePath(configPath)) { - const response = await fetch(configPath, { - signal, - }); - if (!response.ok) { - throw new Error(`Failed to fetch config file ${configPath}: ${response.statusText}`); - } - return response.text(); - } else { - return readFile(configPath, { - encoding: 'utf-8', - signal, - }); - } - })(); - - const config = JSON.parse(stripJsonComments(configContent)) as SourcebotConfigurationSchema; - return config; -} - -export const syncConfig = async (config: SourcebotConfigurationSchema, db: PrismaClient, signal: AbortSignal, ctx: AppContext) => { - for (const repoConfig of config.repos ?? []) { - switch (repoConfig.type) { - case 'github': { - const token = repoConfig.token ? getTokenFromConfig(repoConfig.token, ctx) : undefined; - const gitHubRepos = await getGitHubReposFromConfig(repoConfig, signal, ctx); - const hostUrl = repoConfig.url ?? 'https://github.com'; - const hostname = repoConfig.url ? new URL(repoConfig.url).hostname : 'github.com'; - const tenantId = repoConfig.tenantId ?? 0; - - await Promise.all(gitHubRepos.map((repo) => { - const repoName = `${hostname}/${repo.full_name}`; - const cloneUrl = new URL(repo.clone_url!); - if (token) { - cloneUrl.username = token; - } - - const data = { - external_id: repo.id.toString(), - external_codeHostType: 'github', - external_codeHostUrl: hostUrl, - cloneUrl: cloneUrl.toString(), - name: repoName, - isFork: repo.fork, - isArchived: !!repo.archived, - tenantId: tenantId, - metadata: { - 'zoekt.web-url-type': 'github', - 'zoekt.web-url': repo.html_url, - 'zoekt.name': repoName, - 'zoekt.github-stars': (repo.stargazers_count ?? 0).toString(), - 'zoekt.github-watchers': (repo.watchers_count ?? 0).toString(), - 'zoekt.github-subscribers': (repo.subscribers_count ?? 0).toString(), - 'zoekt.github-forks': (repo.forks_count ?? 0).toString(), - 'zoekt.archived': marshalBool(repo.archived), - 'zoekt.fork': marshalBool(repo.fork), - 'zoekt.public': marshalBool(repo.private === false) - }, - }; - - return db.repo.upsert({ - where: { - external_id_external_codeHostUrl: { - external_id: repo.id.toString(), - external_codeHostUrl: hostUrl, - }, - }, - create: data, - update: data, - }) - })); - - break; - } - case 'gitlab': { - const hostUrl = repoConfig.url ?? 'https://gitlab.com'; - const hostname = repoConfig.url ? new URL(repoConfig.url).hostname : GITLAB_CLOUD_HOSTNAME; - const token = repoConfig.token ? getTokenFromConfig(repoConfig.token, ctx) : undefined; - const gitLabRepos = await getGitLabReposFromConfig(repoConfig, ctx); - - await Promise.all(gitLabRepos.map((project) => { - const repoName = `${hostname}/${project.path_with_namespace}`; - const isFork = project.forked_from_project !== undefined; - - const cloneUrl = new URL(project.http_url_to_repo); - if (token) { - cloneUrl.username = 'oauth2'; - cloneUrl.password = token; - } - - const data = { - external_id: project.id.toString(), - external_codeHostType: 'gitlab', - external_codeHostUrl: hostUrl, - cloneUrl: cloneUrl.toString(), - name: repoName, - tenantId: 0, // TODO: add support for tenantId in GitLab config - isFork, - isArchived: !!project.archived, - metadata: { - 'zoekt.web-url-type': 'gitlab', - 'zoekt.web-url': project.web_url, - 'zoekt.name': repoName, - 'zoekt.gitlab-stars': project.star_count?.toString() ?? '0', - 'zoekt.gitlab-forks': project.forks_count?.toString() ?? '0', - 'zoekt.archived': marshalBool(project.archived), - 'zoekt.fork': marshalBool(isFork), - 'zoekt.public': marshalBool(project.visibility === 'public'), - } - } - - return db.repo.upsert({ - where: { - external_id_external_codeHostUrl: { - external_id: project.id.toString(), - external_codeHostUrl: hostUrl, - }, - }, - create: data, - update: data, - }) - })); - - break; - } - } - } -} \ No newline at end of file diff --git a/packages/backend/src/connectionManager.ts b/packages/backend/src/connectionManager.ts new file mode 100644 index 00000000..c4b25c72 --- /dev/null +++ b/packages/backend/src/connectionManager.ts @@ -0,0 +1,193 @@ +import { Connection, ConnectionSyncStatus, PrismaClient, Prisma } from "@sourcebot/db"; +import { Job, Queue, Worker } from 'bullmq'; +import { AppContext, Settings, WithRequired } from "./types.js"; +import { ConnectionConfig } from "@sourcebot/schemas/v3/connection.type"; +import { createLogger } from "./logger.js"; +import os from 'os'; +import { Redis } from 'ioredis'; +import { getTokenFromConfig, marshalBool } from "./utils.js"; +import { getGitHubReposFromConfig } from "./github.js"; + +interface IConnectionManager { + scheduleConnectionSync: (connection: Connection) => Promise; + dispose: () => void; +} + +const QUEUE_NAME = 'connectionSyncQueue'; + +type JobPayload = { + connectionId: number, + orgId: number, + config: ConnectionConfig, +}; + +export class ConnectionManager implements IConnectionManager { + private queue = new Queue(QUEUE_NAME); + private worker: Worker; + private logger = createLogger('ConnectionManager'); + + constructor( + private db: PrismaClient, + settings: Settings, + redis: Redis, + private context: AppContext, + ) { + const numCores = os.cpus().length; + this.worker = new Worker(QUEUE_NAME, this.runSyncJob.bind(this), { + connection: redis, + concurrency: numCores * settings.configSyncConcurrencyMultiple, + }); + this.worker.on('completed', this.onSyncJobCompleted.bind(this)); + this.worker.on('failed', this.onSyncJobFailed.bind(this)); + } + + public async scheduleConnectionSync(connection: Connection) { + await this.db.$transaction(async (tx) => { + await tx.connection.update({ + where: { id: connection.id }, + data: { syncStatus: ConnectionSyncStatus.IN_SYNC_QUEUE }, + }); + + const connectionConfig = connection.config as unknown as ConnectionConfig; + + await this.queue.add('connectionSyncJob', { + connectionId: connection.id, + orgId: connection.orgId, + config: connectionConfig, + }); + this.logger.info(`Added job to queue for connection ${connection.id}`); + }).catch((err: unknown) => { + this.logger.error(`Failed to add job to queue for connection ${connection.id}: ${err}`); + }); + } + + private async runSyncJob(job: Job) { + const { config, orgId } = job.data; + // @note: We aren't actually doing anything with this atm. + const abortController = new AbortController(); + + type RepoData = WithRequired; + const repoData: RepoData[] = await (async () => { + switch (config.type) { + case 'github': { + const token = config.token ? getTokenFromConfig(config.token, this.context) : undefined; + const gitHubRepos = await getGitHubReposFromConfig(config, abortController.signal, this.context); + const hostUrl = config.url ?? 'https://github.com'; + const hostname = config.url ? new URL(config.url).hostname : 'github.com'; + + return gitHubRepos.map((repo) => { + const repoName = `${hostname}/${repo.full_name}`; + const cloneUrl = new URL(repo.clone_url!); + if (token) { + cloneUrl.username = token; + } + + const record: RepoData = { + external_id: repo.id.toString(), + external_codeHostType: 'github', + external_codeHostUrl: hostUrl, + cloneUrl: cloneUrl.toString(), + name: repoName, + isFork: repo.fork, + isArchived: !!repo.archived, + org: { + connect: { + id: orgId, + }, + }, + connections: { + create: { + connectionId: job.data.connectionId, + } + }, + metadata: { + 'zoekt.web-url-type': 'github', + 'zoekt.web-url': repo.html_url, + 'zoekt.name': repoName, + 'zoekt.github-stars': (repo.stargazers_count ?? 0).toString(), + 'zoekt.github-watchers': (repo.watchers_count ?? 0).toString(), + 'zoekt.github-subscribers': (repo.subscribers_count ?? 0).toString(), + 'zoekt.github-forks': (repo.forks_count ?? 0).toString(), + 'zoekt.archived': marshalBool(repo.archived), + 'zoekt.fork': marshalBool(repo.fork), + 'zoekt.public': marshalBool(repo.private === false) + }, + }; + + return record; + }) + } + } + })(); + + // @note: to handle orphaned Repos we delete all RepoToConnection records for this connection, + // and then recreate them when we upsert the repos. For example, if a repo is no-longer + // captured by the connection's config (e.g., it was deleted, marked archived, etc.), it won't + // appear in the repoData array above, and so the RepoToConnection record won't be re-created. + // Repos that have no RepoToConnection records are considered orphaned and can be deleted. + await this.db.$transaction(async (tx) => { + await tx.connection.update({ + where: { + id: job.data.connectionId, + }, + data: { + repos: { + deleteMany: {} + } + } + }); + + await Promise.all(repoData.map((repo) => { + return tx.repo.upsert({ + where: { + external_id_external_codeHostUrl: { + external_id: repo.external_id, + external_codeHostUrl: repo.external_codeHostUrl, + }, + }, + create: repo, + update: repo as Prisma.RepoUpdateInput, + }); + })); + + }); + } + + + private async onSyncJobCompleted(job: Job) { + this.logger.info(`Connection sync job ${job.id} completed`); + const { connectionId } = job.data; + + await this.db.connection.update({ + where: { + id: connectionId, + }, + data: { + syncStatus: ConnectionSyncStatus.SYNCED, + syncedAt: new Date() + } + }) + } + + private async onSyncJobFailed(job: Job | undefined, err: unknown) { + this.logger.info(`Connection sync job failed with error: ${err}`); + if (job) { + const { connectionId } = job.data; + await this.db.connection.update({ + where: { + id: connectionId, + }, + data: { + syncStatus: ConnectionSyncStatus.FAILED, + syncedAt: new Date() + } + }) + } + } + + public dispose() { + this.worker.close(); + this.queue.close(); + } +} + diff --git a/packages/backend/src/constants.ts b/packages/backend/src/constants.ts index 0c983120..407ec462 100644 --- a/packages/backend/src/constants.ts +++ b/packages/backend/src/constants.ts @@ -7,7 +7,7 @@ export const DEFAULT_SETTINGS: Settings = { maxFileSize: 2 * 1024 * 1024, // 2MB in bytes autoDeleteStaleRepos: true, reindexIntervalMs: 1000 * 60, - resyncIntervalMs: 1000 * 60 * 60 * 24, // 1 day in milliseconds + resyncConnectionPollingIntervalMs: 1000, indexConcurrencyMultiple: 3, configSyncConcurrencyMultiple: 3, } \ No newline at end of file diff --git a/packages/backend/src/github.ts b/packages/backend/src/github.ts index 104aff5c..6680ce15 100644 --- a/packages/backend/src/github.ts +++ b/packages/backend/src/github.ts @@ -1,5 +1,5 @@ import { Octokit } from "@octokit/rest"; -import { GitHubConfig } from "@sourcebot/schemas/v2/index.type" +import { GithubConnectionConfig } from "@sourcebot/schemas/v3/github.type"; import { createLogger } from "./logger.js"; import { AppContext } from "./types.js"; import { getTokenFromConfig, measure } from "./utils.js"; @@ -25,7 +25,7 @@ export type OctokitRepository = { size?: number, } -export const getGitHubReposFromConfig = async (config: GitHubConfig, signal: AbortSignal, ctx: AppContext) => { +export const getGitHubReposFromConfig = async (config: GithubConnectionConfig, signal: AbortSignal, ctx: AppContext) => { const token = config.token ? getTokenFromConfig(config.token, ctx) : undefined; const octokit = new Octokit({ @@ -93,9 +93,9 @@ export const shouldExcludeRepo = ({ } : { repo: OctokitRepository, include?: { - topics?: GitHubConfig['topics'] + topics?: GithubConnectionConfig['topics'] }, - exclude?: GitHubConfig['exclude'] + exclude?: GithubConnectionConfig['exclude'] }) => { let reason = ''; const repoName = repo.full_name; @@ -202,8 +202,9 @@ const getReposOwnedByUsers = async (users: string[], isAuthenticated: boolean, o logger.debug(`Found ${data.length} owned by user ${user} in ${durationMs}ms.`); return data; } catch (e) { + // @todo: handle rate limiting errors logger.error(`Failed to fetch repository info for user ${user}.`, e); - return []; + throw e; } }))).flat(); @@ -226,8 +227,9 @@ const getReposForOrgs = async (orgs: string[], octokit: Octokit, signal: AbortSi logger.debug(`Found ${data.length} in org ${org} in ${durationMs}ms.`); return data; } catch (e) { + // @todo: handle rate limiting errors logger.error(`Failed to fetch repository info for org ${org}.`, e); - return []; + throw e; } }))).flat(); @@ -252,8 +254,9 @@ const getRepos = async (repoList: string[], octokit: Octokit, signal: AbortSigna return [result.data]; } catch (e) { + // @todo: handle rate limiting errors logger.error(`Failed to fetch repository info for ${repo}.`, e); - return []; + throw e; } }))).flat(); diff --git a/packages/backend/src/main.ts b/packages/backend/src/main.ts index 34f0e28c..7e9d2f80 100644 --- a/packages/backend/src/main.ts +++ b/packages/backend/src/main.ts @@ -1,18 +1,16 @@ -import { ConfigSyncStatus, PrismaClient, Repo, Config, RepoIndexingStatus, Prisma } from '@sourcebot/db'; -import { existsSync, watch } from 'fs'; -import { fetchConfigFromPath, syncConfig } from "./config.js"; +import { ConnectionSyncStatus, PrismaClient, Repo, RepoIndexingStatus } from '@sourcebot/db'; +import { existsSync } from 'fs'; import { cloneRepository, fetchRepository } from "./git.js"; import { createLogger } from "./logger.js"; import { captureEvent } from "./posthog.js"; import { AppContext } from "./types.js"; -import { getRepoPath, isRemotePath, measure } from "./utils.js"; +import { getRepoPath, measure } from "./utils.js"; import { indexGitRepository } from "./zoekt.js"; import { DEFAULT_SETTINGS } from './constants.js'; import { Queue, Worker, Job } from 'bullmq'; import { Redis } from 'ioredis'; import * as os from 'os'; -import { SOURCEBOT_TENANT_MODE } from './environment.js'; -import { SourcebotConfigurationSchema } from "@sourcebot/schemas/v2/index.type" +import { ConnectionManager } from './connectionManager.js'; const logger = createLogger('main'); @@ -58,23 +56,6 @@ const syncGitRepository = async (repo: Repo, ctx: AppContext) => { } } -async function addConfigsToQueue(db: PrismaClient, queue: Queue, configs: Config[]) { - for (const config of configs) { - await db.$transaction(async (tx) => { - await tx.config.update({ - where: { id: config.id }, - data: { syncStatus: ConfigSyncStatus.IN_SYNC_QUEUE }, - }); - - // Add the job to the queue - await queue.add('configSyncJob', config); - logger.info(`Added job to queue for config ${config.id}`); - }).catch((err: unknown) => { - logger.error(`Failed to add job to queue for config ${config.id}: ${err}`); - }); - } -} - async function addReposToQueue(db: PrismaClient, queue: Queue, repos: Repo[]) { for (const repo of repos) { await db.$transaction(async (tx) => { @@ -93,79 +74,6 @@ async function addReposToQueue(db: PrismaClient, queue: Queue, repos: Repo[]) { } export const main = async (db: PrismaClient, context: AppContext) => { - let abortController = new AbortController(); - let isSyncing = false; - const _syncConfig = async (dbConfig?: Config | undefined) => { - - // Fetch config object and update syncing status - let config: SourcebotConfigurationSchema; - switch (SOURCEBOT_TENANT_MODE) { - case 'single': - logger.info(`Syncing configuration file ${context.configPath} ...`); - - if (isSyncing) { - abortController.abort(); - abortController = new AbortController(); - } - config = await fetchConfigFromPath(context.configPath, abortController.signal); - isSyncing = true; - break; - case 'multi': - if(!dbConfig) { - throw new Error('config object is required in multi tenant mode'); - } - config = dbConfig.data as SourcebotConfigurationSchema - db.config.update({ - where: { - id: dbConfig.id, - }, - data: { - syncStatus: ConfigSyncStatus.SYNCING, - } - }) - break; - default: - throw new Error(`Invalid SOURCEBOT_TENANT_MODE: ${SOURCEBOT_TENANT_MODE}`); - } - - // Attempt to sync the config, handle failure cases - try { - const { durationMs } = await measure(() => syncConfig(config, db, abortController.signal, context)) - logger.info(`Synced configuration in ${durationMs / 1000}s`); - isSyncing = false; - } catch (err: any) { - switch(SOURCEBOT_TENANT_MODE) { - case 'single': - if (err.name === "AbortError") { - // @note: If we're aborting, we don't want to set isSyncing to false - // since it implies another sync is in progress. - } else { - isSyncing = false; - logger.error(`Failed to sync configuration file with error:`); - console.log(err); - } - break; - case 'multi': - if (dbConfig) { - await db.config.update({ - where: { - id: dbConfig.id, - }, - data: { - syncStatus: ConfigSyncStatus.FAILED, - } - }) - logger.error(`Failed to sync configuration ${dbConfig.id} with error: ${err}`); - } else { - logger.error(`DB config undefined. Failed to sync configuration with error: ${err}`); - } - break; - default: - throw new Error(`Invalid SOURCEBOT_TENANT_MODE: ${SOURCEBOT_TENANT_MODE}`); - } - } - } - ///////////////////////////// // Init Redis ///////////////////////////// @@ -182,71 +90,18 @@ export const main = async (db: PrismaClient, context: AppContext) => { process.exit(1); }); - ///////////////////////////// - // Setup config sync watchers - ///////////////////////////// - switch (SOURCEBOT_TENANT_MODE) { - case 'single': - // Re-sync on file changes if the config file is local - if (!isRemotePath(context.configPath)) { - watch(context.configPath, () => { - logger.info(`Config file ${context.configPath} changed. Re-syncing...`); - _syncConfig(); - }); + const connectionManager = new ConnectionManager(db, DEFAULT_SETTINGS, redis, context); + setInterval(async () => { + const configs = await db.connection.findMany({ + where: { + syncStatus: ConnectionSyncStatus.SYNC_NEEDED, } - - // Re-sync at a fixed interval - setInterval(() => { - _syncConfig(); - }, DEFAULT_SETTINGS.resyncIntervalMs); - - // Sync immediately on startup - await _syncConfig(); - break; - case 'multi': - // Setup config sync queue and workers - const configSyncQueue = new Queue('configSyncQueue'); - const numCores = os.cpus().length; - const numWorkers = numCores * DEFAULT_SETTINGS.configSyncConcurrencyMultiple; - logger.info(`Detected ${numCores} cores. Setting config sync max concurrency to ${numWorkers}`); - const configSyncWorker = new Worker('configSyncQueue', async (job: Job) => { - const config = job.data as Config; - await _syncConfig(config); - }, { connection: redis, concurrency: numWorkers }); - configSyncWorker.on('completed', async (job: Job) => { - logger.info(`Config sync job ${job.id} completed`); - - const config = job.data as Config; - await db.config.update({ - where: { - id: config.id, - }, - data: { - syncStatus: ConfigSyncStatus.SYNCED, - syncedAt: new Date() - } - }) - }); - configSyncWorker.on('failed', (job: Job | undefined, err: unknown) => { - logger.info(`Config sync job failed with error: ${err}`); - }); - - setInterval(async () => { - const configs = await db.config.findMany({ - where: { - syncStatus: ConfigSyncStatus.SYNC_NEEDED, - } - }); - - logger.info(`Found ${configs.length} configs to sync...`); - addConfigsToQueue(db, configSyncQueue, configs); - }, 1000); - break; - default: - throw new Error(`Invalid SOURCEBOT_TENANT_MODE: ${SOURCEBOT_TENANT_MODE}`); - } - - + }); + for (const config of configs) { + await connectionManager.scheduleConnectionSync(config); + } + }, DEFAULT_SETTINGS.resyncConnectionPollingIntervalMs); + ///////////////////////// // Setup repo indexing ///////////////////////// @@ -318,7 +173,6 @@ export const main = async (db: PrismaClient, context: AppContext) => { ] } }); - logger.info(`Found ${repos.length} repos to index...`); addReposToQueue(db, indexQueue, repos); diff --git a/packages/backend/src/types.ts b/packages/backend/src/types.ts index a6e80b81..71d8fff8 100644 --- a/packages/backend/src/types.ts +++ b/packages/backend/src/types.ts @@ -71,9 +71,9 @@ export type Settings = { */ reindexIntervalMs: number; /** - * The interval (in milliseconds) at which the configuration file should be re-synced. + * The polling rate (in milliseconds) at which the db should be checked for connections that need to be re-synced. */ - resyncIntervalMs: number; + resyncConnectionPollingIntervalMs: number; /** * The multiple of the number of CPUs to use for indexing. */ @@ -87,4 +87,7 @@ export type Settings = { // @see : https://stackoverflow.com/a/61132308 export type DeepPartial = T extends object ? { [P in keyof T]?: DeepPartial; -} : T; \ No newline at end of file +} : T; + +// @see: https://stackoverflow.com/a/69328045 +export type WithRequired = T & { [P in K]-?: T[P] }; \ No newline at end of file diff --git a/packages/backend/src/zoekt.ts b/packages/backend/src/zoekt.ts index e5c7ffc5..088ae03c 100644 --- a/packages/backend/src/zoekt.ts +++ b/packages/backend/src/zoekt.ts @@ -11,11 +11,9 @@ export const indexGitRepository = async (repo: Repo, ctx: AppContext) => { 'HEAD' ]; - const tenantId = repo.tenantId ?? 0; - const shardPrefix = `${tenantId}_${repo.id}`; - + const shardPrefix = `${repo.orgId}_${repo.id}`; const repoPath = getRepoPath(repo, ctx); - const command = `zoekt-git-index -allow_missing_branches -index ${ctx.indexPath} -file_limit ${DEFAULT_SETTINGS.maxFileSize} -branches ${revisions.join(',')} -tenant_id ${tenantId} -shard_prefix ${shardPrefix} ${repoPath}`; + const command = `zoekt-git-index -allow_missing_branches -index ${ctx.indexPath} -file_limit ${DEFAULT_SETTINGS.maxFileSize} -branches ${revisions.join(',')} -tenant_id ${repo.orgId} -shard_prefix ${shardPrefix} ${repoPath}`; return new Promise<{ stdout: string, stderr: string }>((resolve, reject) => { exec(command, (error, stdout, stderr) => { diff --git a/packages/db/prisma/migrations/20250124045320_rename_config_to_connection/migration.sql b/packages/db/prisma/migrations/20250124045320_rename_config_to_connection/migration.sql new file mode 100644 index 00000000..059b5ccd --- /dev/null +++ b/packages/db/prisma/migrations/20250124045320_rename_config_to_connection/migration.sql @@ -0,0 +1,33 @@ +/* + Warnings: + + - You are about to drop the `Config` table. If the table is not empty, all the data it contains will be lost. + +*/ +-- CreateEnum +CREATE TYPE "ConnectionSyncStatus" AS ENUM ('SYNC_NEEDED', 'IN_SYNC_QUEUE', 'SYNCING', 'SYNCED', 'FAILED'); + +-- DropForeignKey +ALTER TABLE "Config" DROP CONSTRAINT "Config_orgId_fkey"; + +-- DropTable +DROP TABLE "Config"; + +-- DropEnum +DROP TYPE "ConfigSyncStatus"; + +-- CreateTable +CREATE TABLE "Connection" ( + "id" SERIAL NOT NULL, + "config" JSONB NOT NULL, + "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "updatedAt" TIMESTAMP(3) NOT NULL, + "syncedAt" TIMESTAMP(3), + "syncStatus" "ConnectionSyncStatus" NOT NULL DEFAULT 'SYNC_NEEDED', + "orgId" INTEGER NOT NULL, + + CONSTRAINT "Connection_pkey" PRIMARY KEY ("id") +); + +-- AddForeignKey +ALTER TABLE "Connection" ADD CONSTRAINT "Connection_orgId_fkey" FOREIGN KEY ("orgId") REFERENCES "Org"("id") ON DELETE CASCADE ON UPDATE CASCADE; diff --git a/packages/db/prisma/migrations/20250124063518_remove_repo_tenant_id/migration.sql b/packages/db/prisma/migrations/20250124063518_remove_repo_tenant_id/migration.sql new file mode 100644 index 00000000..4a61724d --- /dev/null +++ b/packages/db/prisma/migrations/20250124063518_remove_repo_tenant_id/migration.sql @@ -0,0 +1,10 @@ +/* + Warnings: + + - You are about to drop the column `tenantId` on the `Repo` table. All the data in the column will be lost. + - Made the column `orgId` on table `Repo` required. This step will fail if there are existing NULL values in that column. + +*/ +-- AlterTable +ALTER TABLE "Repo" DROP COLUMN "tenantId", +ALTER COLUMN "orgId" SET NOT NULL; diff --git a/packages/db/prisma/migrations/20250124173816_relate_connection_and_repo/migration.sql b/packages/db/prisma/migrations/20250124173816_relate_connection_and_repo/migration.sql new file mode 100644 index 00000000..b61d111f --- /dev/null +++ b/packages/db/prisma/migrations/20250124173816_relate_connection_and_repo/migration.sql @@ -0,0 +1,14 @@ +-- CreateTable +CREATE TABLE "RepoToConnection" ( + "addedAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "connectionId" INTEGER NOT NULL, + "repoId" INTEGER NOT NULL, + + CONSTRAINT "RepoToConnection_pkey" PRIMARY KEY ("connectionId","repoId") +); + +-- AddForeignKey +ALTER TABLE "RepoToConnection" ADD CONSTRAINT "RepoToConnection_connectionId_fkey" FOREIGN KEY ("connectionId") REFERENCES "Connection"("id") ON DELETE CASCADE ON UPDATE CASCADE; + +-- AddForeignKey +ALTER TABLE "RepoToConnection" ADD CONSTRAINT "RepoToConnection_repoId_fkey" FOREIGN KEY ("repoId") REFERENCES "Repo"("id") ON DELETE CASCADE ON UPDATE CASCADE; diff --git a/packages/db/prisma/schema.prisma b/packages/db/prisma/schema.prisma index 7490155d..ec536d0b 100644 --- a/packages/db/prisma/schema.prisma +++ b/packages/db/prisma/schema.prisma @@ -18,7 +18,7 @@ enum RepoIndexingStatus { FAILED } -enum ConfigSyncStatus { +enum ConnectionSyncStatus { SYNC_NEEDED IN_SYNC_QUEUE SYNCING @@ -36,7 +36,7 @@ model Repo { isArchived Boolean metadata Json cloneUrl String - tenantId Int + connections RepoToConnection[] repoIndexingStatus RepoIndexingStatus @default(NEW) @@ -47,34 +47,47 @@ model Repo { // The base url of the external service (e.g., https://github.com) external_codeHostUrl String - org Org? @relation(fields: [orgId], references: [id], onDelete: Cascade) - orgId Int? + org Org @relation(fields: [orgId], references: [id], onDelete: Cascade) + orgId Int @@unique([external_id, external_codeHostUrl]) } -model Config { - id Int @id @default(autoincrement()) - data Json - createdAt DateTime @default(now()) - updatedAt DateTime @updatedAt +model Connection { + id Int @id @default(autoincrement()) + config Json + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt syncedAt DateTime? + repos RepoToConnection[] + + syncStatus ConnectionSyncStatus @default(SYNC_NEEDED) + + // The organization that owns this connection + org Org @relation(fields: [orgId], references: [id], onDelete: Cascade) + orgId Int +} + +model RepoToConnection { + addedAt DateTime @default(now()) + + connection Connection @relation(fields: [connectionId], references: [id], onDelete: Cascade) + connectionId Int - syncStatus ConfigSyncStatus @default(SYNC_NEEDED) + repo Repo @relation(fields: [repoId], references: [id], onDelete: Cascade) + repoId Int - // The organization that owns this config - org Org @relation(fields: [orgId], references: [id], onDelete: Cascade) - orgId Int + @@id([connectionId, repoId]) } model Org { - id Int @id @default(autoincrement()) - name String - createdAt DateTime @default(now()) - updatedAt DateTime @updatedAt - members UserToOrg[] - configs Config[] - repos Repo[] + id Int @id @default(autoincrement()) + name String + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt + members UserToOrg[] + connections Connection[] + repos Repo[] } enum OrgRole { diff --git a/packages/schemas/src/v3/connection.schema.ts b/packages/schemas/src/v3/connection.schema.ts new file mode 100644 index 00000000..0a86ecf2 --- /dev/null +++ b/packages/schemas/src/v3/connection.schema.ts @@ -0,0 +1,207 @@ +// THIS IS A AUTO-GENERATED FILE. DO NOT MODIFY MANUALLY! +const schema = { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "ConnectionConfig", + "oneOf": [ + { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "title": "GithubConnectionConfig", + "properties": { + "type": { + "const": "github", + "description": "GitHub Configuration" + }, + "token": { + "description": "A Personal Access Token (PAT).", + "examples": [ + "secret-token", + { + "env": "ENV_VAR_CONTAINING_TOKEN" + } + ], + "anyOf": [ + { + "type": "string" + }, + { + "type": "object", + "properties": { + "env": { + "type": "string", + "description": "The name of the environment variable that contains the token." + } + }, + "required": [ + "env" + ], + "additionalProperties": false + } + ] + }, + "url": { + "type": "string", + "format": "url", + "default": "https://github.com", + "description": "The URL of the GitHub host. Defaults to https://github.com", + "examples": [ + "https://github.com", + "https://github.example.com" + ], + "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$" + }, + "users": { + "type": "array", + "items": { + "type": "string", + "pattern": "^[\\w.-]+$" + }, + "examples": [ + [ + "torvalds", + "DHH" + ] + ], + "description": "List of users to sync with. All repositories that the user owns will be synced, unless explicitly defined in the `exclude` property." + }, + "orgs": { + "type": "array", + "items": { + "type": "string", + "pattern": "^[\\w.-]+$" + }, + "examples": [ + [ + "my-org-name" + ], + [ + "sourcebot-dev", + "commaai" + ] + ], + "description": "List of organizations to sync with. All repositories in the organization visible to the provided `token` (if any) will be synced, unless explicitly defined in the `exclude` property." + }, + "repos": { + "type": "array", + "items": { + "type": "string", + "pattern": "^[\\w.-]+\\/[\\w.-]+$" + }, + "description": "List of individual repositories to sync with. Expected to be formatted as '{orgName}/{repoName}' or '{userName}/{repoName}'." + }, + "topics": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "description": "List of repository topics to include when syncing. Only repositories that match at least one of the provided `topics` will be synced. If not specified, all repositories will be synced, unless explicitly defined in the `exclude` property. Glob patterns are supported.", + "examples": [ + [ + "docs", + "core" + ] + ] + }, + "exclude": { + "type": "object", + "properties": { + "forks": { + "type": "boolean", + "default": false, + "description": "Exclude forked repositories from syncing." + }, + "archived": { + "type": "boolean", + "default": false, + "description": "Exclude archived repositories from syncing." + }, + "repos": { + "type": "array", + "items": { + "type": "string" + }, + "default": [], + "description": "List of individual repositories to exclude from syncing. Glob patterns are supported." + }, + "topics": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of repository topics to exclude when syncing. Repositories that match one of the provided `topics` will be excluded from syncing. Glob patterns are supported.", + "examples": [ + [ + "tests", + "ci" + ] + ] + }, + "size": { + "type": "object", + "description": "Exclude repositories based on their disk usage. Note: the disk usage is calculated by GitHub and may not reflect the actual disk usage when cloned.", + "properties": { + "min": { + "type": "integer", + "description": "Minimum repository size (in bytes) to sync (inclusive). Repositories less than this size will be excluded from syncing." + }, + "max": { + "type": "integer", + "description": "Maximum repository size (in bytes) to sync (inclusive). Repositories greater than this size will be excluded from syncing." + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false + }, + "revisions": { + "type": "object", + "description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed.", + "properties": { + "branches": { + "type": "array", + "description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported.", + "items": { + "type": "string" + }, + "examples": [ + [ + "main", + "release/*" + ], + [ + "**" + ] + ], + "default": [] + }, + "tags": { + "type": "array", + "description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported.", + "items": { + "type": "string" + }, + "examples": [ + [ + "latest", + "v2.*.*" + ], + [ + "**" + ] + ], + "default": [] + } + }, + "additionalProperties": false + } + }, + "required": [ + "type" + ], + "additionalProperties": false + } + ] +} as const; +export { schema as connectionSchema }; \ No newline at end of file diff --git a/packages/schemas/src/v3/connection.type.ts b/packages/schemas/src/v3/connection.type.ts new file mode 100644 index 00000000..30c0ff27 --- /dev/null +++ b/packages/schemas/src/v3/connection.type.ts @@ -0,0 +1,88 @@ +// THIS IS A AUTO-GENERATED FILE. DO NOT MODIFY MANUALLY! + +export type ConnectionConfig = GithubConnectionConfig; + +export interface GithubConnectionConfig { + /** + * GitHub Configuration + */ + type: "github"; + /** + * A Personal Access Token (PAT). + */ + token?: + | string + | { + /** + * The name of the environment variable that contains the token. + */ + env: string; + }; + /** + * The URL of the GitHub host. Defaults to https://github.com + */ + url?: string; + /** + * List of users to sync with. All repositories that the user owns will be synced, unless explicitly defined in the `exclude` property. + */ + users?: string[]; + /** + * List of organizations to sync with. All repositories in the organization visible to the provided `token` (if any) will be synced, unless explicitly defined in the `exclude` property. + */ + orgs?: string[]; + /** + * List of individual repositories to sync with. Expected to be formatted as '{orgName}/{repoName}' or '{userName}/{repoName}'. + */ + repos?: string[]; + /** + * List of repository topics to include when syncing. Only repositories that match at least one of the provided `topics` will be synced. If not specified, all repositories will be synced, unless explicitly defined in the `exclude` property. Glob patterns are supported. + * + * @minItems 1 + */ + topics?: string[]; + exclude?: { + /** + * Exclude forked repositories from syncing. + */ + forks?: boolean; + /** + * Exclude archived repositories from syncing. + */ + archived?: boolean; + /** + * List of individual repositories to exclude from syncing. Glob patterns are supported. + */ + repos?: string[]; + /** + * List of repository topics to exclude when syncing. Repositories that match one of the provided `topics` will be excluded from syncing. Glob patterns are supported. + */ + topics?: string[]; + /** + * Exclude repositories based on their disk usage. Note: the disk usage is calculated by GitHub and may not reflect the actual disk usage when cloned. + */ + size?: { + /** + * Minimum repository size (in bytes) to sync (inclusive). Repositories less than this size will be excluded from syncing. + */ + min?: number; + /** + * Maximum repository size (in bytes) to sync (inclusive). Repositories greater than this size will be excluded from syncing. + */ + max?: number; + }; + }; + revisions?: GitRevisions; +} +/** + * The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. + */ +export interface GitRevisions { + /** + * List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. + */ + branches?: string[]; + /** + * List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. + */ + tags?: string[]; +} diff --git a/packages/schemas/src/v3/github.schema.ts b/packages/schemas/src/v3/github.schema.ts index 0c88f81b..546e10a8 100644 --- a/packages/schemas/src/v3/github.schema.ts +++ b/packages/schemas/src/v3/github.schema.ts @@ -2,7 +2,7 @@ const schema = { "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", - "title": "GitHubConfig", + "title": "GithubConnectionConfig", "properties": { "type": { "const": "github", @@ -99,10 +99,6 @@ const schema = { ] ] }, - "tenantId": { - "type": "number", - "description": "@nocheckin" - }, "exclude": { "type": "object", "properties": { diff --git a/packages/schemas/src/v3/github.type.ts b/packages/schemas/src/v3/github.type.ts index d9c7e1aa..d7355c5b 100644 --- a/packages/schemas/src/v3/github.type.ts +++ b/packages/schemas/src/v3/github.type.ts @@ -1,6 +1,6 @@ // THIS IS A AUTO-GENERATED FILE. DO NOT MODIFY MANUALLY! -export interface GitHubConfig { +export interface GithubConnectionConfig { /** * GitHub Configuration */ @@ -38,10 +38,6 @@ export interface GitHubConfig { * @minItems 1 */ topics?: string[]; - /** - * @nocheckin - */ - tenantId?: number; exclude?: { /** * Exclude forked repositories from syncing. diff --git a/packages/web/src/actions.ts b/packages/web/src/actions.ts index 6027ac96..717a74fe 100644 --- a/packages/web/src/actions.ts +++ b/packages/web/src/actions.ts @@ -121,10 +121,10 @@ export const createConnection = async (config: string): Promise<{ id: number } | } satisfies ServiceError; } - const connection = await prisma.config.create({ + const connection = await prisma.connection.create({ data: { orgId: orgId, - data: parsedConfig, + config: parsedConfig, } }); diff --git a/schemas/v3/connection.json b/schemas/v3/connection.json new file mode 100644 index 00000000..a40c76e1 --- /dev/null +++ b/schemas/v3/connection.json @@ -0,0 +1,9 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "ConnectionConfig", + "oneOf": [ + { + "$ref": "./github.json" + } + ] +} \ No newline at end of file diff --git a/schemas/v3/github.json b/schemas/v3/github.json index bc0c1d44..dad32b1c 100644 --- a/schemas/v3/github.json +++ b/schemas/v3/github.json @@ -1,7 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", - "title": "GitHubConfig", + "title": "GithubConnectionConfig", "properties": { "type": { "const": "github", @@ -81,10 +81,6 @@ ] ] }, - "tenantId": { - "type": "number", - "description": "@nocheckin" - }, "exclude": { "type": "object", "properties": {