Skip to content

Improved Gitlab All in case of many repositories #372

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 98 additions & 66 deletions packages/backend/src/gitlab.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { PrismaClient } from "@sourcebot/db";
import { processPromiseResults, throwIfAnyFailed } from "./connectionUtils.js";
import * as Sentry from "@sentry/node";
import { env } from "./env.js";
import { log } from "console";

const logger = createLogger('gitlab');
export const GITLAB_CLOUD_HOSTNAME = "gitlab.com";
Expand Down Expand Up @@ -45,15 +46,27 @@ export const getGitLabReposFromConfig = async (config: GitlabConnectionConfig, o
if (config.all === true) {
if (hostname !== GITLAB_CLOUD_HOSTNAME) {
try {
logger.debug(`Fetching all projects visible in ${config.url}...`);
const { durationMs, data: _projects } = await measure(async () => {
const fetchFn = () => api.Projects.all({
perPage: 100,
});
return fetchWithRetry(fetchFn, `all projects in ${config.url}`, logger);
// Fetch all groups
logger.debug(`Fetching all groups visible in ${config.url}...`);
const { durationMs: groupsDuration, data: _groups } = await measure(async () => {
const fetchFn = () => api.Groups.all({ perPage: 100, allAvailable: true });
return fetchWithRetry(fetchFn, `all groups in ${config.url}`, logger);
});
logger.debug(`Found ${_groups.length} groups in ${groupsDuration}ms.`);

config.groups = _groups.map(g => g.full_path);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Avoid mutating input parameters.

Modifying the config object directly creates unexpected side effects for callers. Consider storing the discovered groups and users in local variables instead.

-                config.groups = _groups.map(g => g.full_path);
+                const discoveredGroups = _groups.map(g => g.full_path);
+                config = { ...config, groups: discoveredGroups };
-                config.users = _users.map(u => u.username);
+                const discoveredUsers = _users.map(u => u.username);
+                config = { ...config, users: discoveredUsers };

Also applies to: 69-69

🤖 Prompt for AI Agents
In packages/backend/src/gitlab.ts at lines 57 and 69, avoid directly mutating
the input parameter `config` by assigning to its properties. Instead, create
local variables to hold the groups and users data derived from `_groups` and
other sources, and use these local variables within the function. This prevents
side effects on the caller's `config` object and maintains functional purity.


logger.debug(`Found these groups: ${config.groups.join('\n')}`);

// Fetch all users - too much for sourcebot/gitlab
logger.debug(`Fetching all users visible in ${config.url}...`);
const { durationMs: usersDuration, data: _users } = await measure(async () => {
const fetchFn = () => api.Users.all({ perPage: 100, withoutProjects: false });
return fetchWithRetry(fetchFn, `all users in ${config.url}`, logger);
});
logger.debug(`Found ${_projects.length} projects in ${durationMs}ms.`);
allRepos = allRepos.concat(_projects);
logger.debug(`Found ${_users.length} users in ${usersDuration}ms.`);

config.users = _users.map(u => u.username);
} catch (e) {
Sentry.captureException(e);
logger.error(`Failed to fetch all projects visible in ${config.url}.`, e);
Expand All @@ -65,76 +78,96 @@ export const getGitLabReposFromConfig = async (config: GitlabConnectionConfig, o
}

if (config.groups) {
const results = await Promise.allSettled(config.groups.map(async (group) => {
try {
logger.debug(`Fetching project info for group ${group}...`);
const { durationMs, data } = await measure(async () => {
const fetchFn = () => api.Groups.allProjects(group, {
perPage: 100,
includeSubgroups: true
const batchSize = 10;
const allResults = [];

// Process groups in batches of 10
for (let i = 0; i < config.groups.length; i += batchSize) {
const batch = config.groups.slice(i, i + batchSize);
logger.debug(`Processing batch ${i/batchSize + 1} of ${Math.ceil(config.groups.length/batchSize)} (${batch.length} groups)`);

const batchResults = await Promise.allSettled(batch.map(async (group) => {
try {
logger.debug(`Fetching project info for group ${group}...`);
const { durationMs, data } = await measure(async () => {
const fetchFn = () => api.Groups.allProjects(group, {
perPage: 100,
includeSubgroups: true
});
return fetchWithRetry(fetchFn, `group ${group}`, logger);
});
return fetchWithRetry(fetchFn, `group ${group}`, logger);
});
logger.debug(`Found ${data.length} projects in group ${group} in ${durationMs}ms.`);
return {
type: 'valid' as const,
data
};
} catch (e: any) {
Sentry.captureException(e);
logger.error(`Failed to fetch projects for group ${group}.`, e);

const status = e?.cause?.response?.status;
if (status === 404) {
logger.error(`Group ${group} not found or no access`);
logger.debug(`Found ${data.length} projects in group ${group} in ${durationMs}ms.`);
return {
type: 'notFound' as const,
value: group
type: 'valid' as const,
data
};
}
throw e;
}
}));
} catch (e: any) {
Sentry.captureException(e);
logger.error(`Failed to fetch projects for group ${group}.`, e);

throwIfAnyFailed(results);
const { validItems: validRepos, notFoundItems: notFoundOrgs } = processPromiseResults(results);
const status = e?.cause?.response?.status;
if (status === 404) {
logger.error(`Group ${group} not found or no access`);
return {
type: 'notFound' as const,
value: group
};
}
throw e;
}
}));
allResults.push(...batchResults);
}
const { validItems: validRepos, notFoundItems: notFoundOrgs } = processPromiseResults(allResults);
allRepos = allRepos.concat(validRepos);
notFound.orgs = notFoundOrgs;
logger.debug(`Found ${validRepos.length} valid repositories in groups.`);
logger.debug(`Not found groups: ${notFoundOrgs.join(', ')}`);
logger.debug(`These repositories will be downloaded: ${allRepos.map(repo => repo.path_with_namespace).join('\n')}`);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: synced not downloaded to be consistent with language elsewhere

}

if (config.users) {
const results = await Promise.allSettled(config.users.map(async (user) => {
try {
logger.debug(`Fetching project info for user ${user}...`);
const { durationMs, data } = await measure(async () => {
const fetchFn = () => api.Users.allProjects(user, {
perPage: 100,
const batchSize = 10;
const allResults = [];

// Process users in batches of 10
for (let i = 0; i < config.users.length; i += batchSize) {
const batch = config.users.slice(i, i + batchSize);
logger.debug(`Processing batch ${i/batchSize + 1} of ${Math.ceil(config.users.length/batchSize)} (${batch.length} users)`);

const batchResults = await Promise.allSettled(batch.map(async (user) => {
try {
logger.debug(`Fetching project info for user ${user}...`);
const { durationMs, data } = await measure(async () => {
const fetchFn = () => api.Users.allProjects(user, {
perPage: 100,
});
return fetchWithRetry(fetchFn, `user ${user}`, logger);
});
return fetchWithRetry(fetchFn, `user ${user}`, logger);
});
logger.debug(`Found ${data.length} projects owned by user ${user} in ${durationMs}ms.`);
return {
type: 'valid' as const,
data
};
} catch (e: any) {
Sentry.captureException(e);
logger.error(`Failed to fetch projects for user ${user}.`, e);

const status = e?.cause?.response?.status;
if (status === 404) {
logger.error(`User ${user} not found or no access`);
logger.debug(`Found ${data.length} projects owned by user ${user} in ${durationMs}ms.`);
return {
type: 'notFound' as const,
value: user
type: 'valid' as const,
data
};
}
throw e;
}
}));
} catch (e: any) {
Sentry.captureException(e);
logger.error(`Failed to fetch projects for user ${user}.`, e);

throwIfAnyFailed(results);
const { validItems: validRepos, notFoundItems: notFoundUsers } = processPromiseResults(results);
const status = e?.cause?.response?.status;
if (status === 404) {
logger.error(`User ${user} not found or no access`);
return {
type: 'notFound' as const,
value: user
};
}
throw e;
}
}));

allResults.push(...batchResults);
}
const { validItems: validRepos, notFoundItems: notFoundUsers } = processPromiseResults(allResults);
allRepos = allRepos.concat(validRepos);
notFound.users = notFoundUsers;
}
Expand Down Expand Up @@ -169,7 +202,6 @@ export const getGitLabReposFromConfig = async (config: GitlabConnectionConfig, o
}
}));

throwIfAnyFailed(results);
const { validItems: validRepos, notFoundItems: notFoundRepos } = processPromiseResults(results);
allRepos = allRepos.concat(validRepos);
notFound.repos = notFoundRepos;
Expand Down
4 changes: 3 additions & 1 deletion packages/backend/src/repoManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,9 @@ export class RepoManager implements IRepoManager {
});

if (repos.length > 0) {
await this.scheduleRepoIndexingBulk(repos);
for (let i = 0; i < repos.length; i += 100) {
await this.scheduleRepoIndexingBulk(repos.slice(i, i + 100));
}
}
}

Expand Down
22 changes: 12 additions & 10 deletions packages/web/src/actions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -920,15 +920,17 @@ export const flagConnectionForSync = async (connectionId: number, domain: string
export const flagReposForIndex = async (repoIds: number[], domain: string) => sew(() =>
withAuth((userId) =>
withOrgMembership(userId, domain, async ({ org }) => {
await prisma.repo.updateMany({
where: {
id: { in: repoIds },
orgId: org.id,
},
data: {
repoIndexingStatus: RepoIndexingStatus.NEW,
}
});
for (let i = 0; i < repoIds.length; i += 1000) {
await prisma.repo.updateMany({
where: {
id: { in: repoIds.slice(i, i + 1000) },
orgId: org.id,
},
data: {
repoIndexingStatus: RepoIndexingStatus.NEW,
}
});
}

return {
success: true,
Expand Down Expand Up @@ -2152,4 +2154,4 @@ export const encryptValue = async (value: string) => {

export const decryptValue = async (iv: string, encryptedValue: string) => {
return decrypt(iv, encryptedValue);
}
}
20 changes: 11 additions & 9 deletions packages/web/src/initialize.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,16 +67,18 @@ const syncConnections = async (connections?: { [key: string]: ConnectionConfig }
// Re-try any repos that failed to index.
const failedRepos = currentConnection?.repos.filter(repo => repo.repo.repoIndexingStatus === RepoIndexingStatus.FAILED).map(repo => repo.repo.id) ?? [];
if (failedRepos.length > 0) {
await prisma.repo.updateMany({
where: {
id: {
in: failedRepos,
for (let i = 0; i < failedRepos.length; i += 100) {
await prisma.repo.updateMany({
where: {
id: {
in: failedRepos.slice(i, i + 100),
}
},
data: {
repoIndexingStatus: RepoIndexingStatus.NEW,
}
},
data: {
repoIndexingStatus: RepoIndexingStatus.NEW,
}
})
})
}
}
}
}
Expand Down