diff --git a/.env.development b/.env.development
index e49cbea6..ae6fdbb3 100644
--- a/.env.development
+++ b/.env.development
@@ -24,7 +24,7 @@ AUTH_URL="http://localhost:3000"
# AUTH_GOOGLE_CLIENT_SECRET=""
# Email
-# EMAIL_FROM="" # The from address for transactional emails.
+# EMAIL_FROM_ADDRESS="" # The from address for transactional emails.
# SMTP_CONNECTION_URL="" # The SMTP connection URL for transactional emails.
# PostHog
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 3459eab0..3627186c 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -37,4 +37,4 @@
A `.sourcebot` directory will be created and zoekt will begin to index the repositories found in the `config.json` file.
-8. Start searching at `http://localhost:3000`.
\ No newline at end of file
+8. Start searching at `http://localhost:3000`.
diff --git a/Dockerfile b/Dockerfile
index 13aafbe8..f7f8b5a9 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -164,21 +164,15 @@ ENV NODE_ENV=production
ENV NEXT_TELEMETRY_DISABLED=1
ENV DATA_DIR=/data
ENV DATA_CACHE_DIR=$DATA_DIR/.sourcebot
-ENV DB_DATA_DIR=$DATA_CACHE_DIR/db
+ENV DATABASE_DATA_DIR=$DATA_CACHE_DIR/db
ENV REDIS_DATA_DIR=$DATA_CACHE_DIR/redis
-ENV DB_NAME=sourcebot
-ENV DATABASE_URL="postgresql://postgres@localhost:5432/$DB_NAME"
+ENV DATABASE_URL="postgresql://postgres@localhost:5432/sourcebot"
ENV REDIS_URL="redis://localhost:6379"
ENV SRC_TENANT_ENFORCEMENT_MODE=strict
# Valid values are: debug, info, warn, error
ENV SOURCEBOT_LOG_LEVEL=info
-# Configures the sub-path of the domain to serve Sourcebot from.
-# For example, if DOMAIN_SUB_PATH is set to "/sb", Sourcebot
-# will serve from http(s)://example.com/sb
-ENV DOMAIN_SUB_PATH=/
-
# Sourcebot collects anonymous usage data using [PostHog](https://posthog.com/). Uncomment this line to disable.
# ENV SOURCEBOT_TELEMETRY_DISABLED=1
diff --git a/README.md b/README.md
index eabe0bdb..fda59ed0 100644
--- a/README.md
+++ b/README.md
@@ -5,12 +5,37 @@
+
+
+ Sourcebot uses Github Discussions for Support and Feature Requests.
+
+
+
+
+
-Blazingly fast code search 🏎️
-
-
-
-
+
@@ -23,391 +48,69 @@ Blazingly fast code search 🏎️
# About
-Sourcebot is a fast code indexing and search tool for your codebases. It is built ontop of the [zoekt](https://github.com/sourcegraph/zoekt) indexer, originally authored by Han-Wen Nienhuys and now [maintained by Sourcegraph](https://sourcegraph.com/blog/sourcegraph-accepting-zoekt-maintainership).
+Sourcebot is the open source Sourcegraph alternative. Index all your repos and branches across multiple code hosts (GitHub, GitLab, Gitea, or Gerrit) and search through them using a blazingly fast interface.
https://github.com/user-attachments/assets/98d46192-5469-430f-ad9e-5c042adbb10d
## Features
- 💻 **One-command deployment**: Get started instantly using Docker on your own machine.
-- 🔍 **Multi-repo search**: Effortlessly index and search through multiple public and private repositories in GitHub, GitLab, Gitea, or Gerrit.
+- 🔍 **Multi-repo search**: Index and search through multiple public and private repositories and branches on GitHub, GitLab, Gitea, or Gerrit.
- ⚡**Lightning fast performance**: Built on top of the powerful [Zoekt](https://github.com/sourcegraph/zoekt) search engine.
-- 📂 **Full file visualization**: Instantly view the entire file when selecting any search result.
- 🎨 **Modern web app**: Enjoy a sleek interface with features like syntax highlighting, light/dark mode, and vim-style navigation
+- 📂 **Full file visualization**: Instantly view the entire file when selecting any search result.
You can try out our public hosted demo [here](https://sourcebot.dev/search)!
-# Getting Started
-
-Get started with a single docker command:
-
-```
-docker run -p 3000:3000 --rm --name sourcebot ghcr.io/sourcebot-dev/sourcebot:latest
-```
+# Deply Sourcebot
-Navigate to `localhost:3000` to start searching the Sourcebot repo. Want to search your own repos? Checkout how to [configure Sourcebot](#configuring-sourcebot).
-
-
-What does this command do?
+Sourcebot can be deployed in seconds using our official docker image. Visit our [docs](https://docs.sourcebot.dev/self-hosting/overview) for more information.
-- Pull and run the Sourcebot docker image from [ghcr.io/sourcebot-dev/sourcebot:latest](https://github.com/sourcebot-dev/sourcebot/pkgs/container/sourcebot). Make sure you have [docker installed](https://docs.docker.com/get-started/get-docker/).
-- Read the repos listed in [default config](./default-config.json) and start indexing them.
-- Map port 3000 between your machine and the docker image.
-- Starts the web server on port 3000.
-
-
-## Configuring Sourcebot
-
-Sourcebot supports indexing and searching through public and private repositories hosted on
-
-
-
- GitHub, GitLab, Gitea, and Gerrit. This section will guide you through configuring the repositories that Sourcebot indexes.
-
-1. Create a new folder on your machine that stores your configs and `.sourcebot` cache, and navigate into it:
- ```sh
- mkdir sourcebot_workspace
- cd sourcebot_workspace
- ```
-
-2. Create a new config following the [configuration schema](./schemas/v2/index.json) to specify which repositories Sourcebot should index. For example, let's index llama.cpp:
-
- ```sh
- touch my_config.json
- echo '{
- "$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v2/index.json",
- "repos": [
- {
- "type": "github",
- "repos": [
- "ggerganov/llama.cpp"
- ]
- }
- ]
- }' > my_config.json
- ```
-
->[!NOTE]
-> Sourcebot can also index all repos owned by a organization, user, group, etc., instead of listing them individually. For examples, see the [configs](./configs) directory. For additional usage information, see the [configuration schema](./schemas/v2/index.json).
-
-3. Run Sourcebot and point it to the new config you created with the `-e CONFIG_PATH` flag:
-
- ```sh
- docker run -p 3000:3000 --rm --name sourcebot -v $(pwd):/data -e CONFIG_PATH=/data/my_config.json ghcr.io/sourcebot-dev/sourcebot:latest
- ```
-
-
- What does this command do?
-
- - Pull and run the Sourcebot docker image from [ghcr.io/sourcebot-dev/sourcebot:latest](https://github.com/sourcebot-dev/sourcebot/pkgs/container/sourcebot).
- - Mount the current directory (`-v $(pwd):/data`) to allow Sourcebot to persist the `.sourcebot` cache.
- - Mirrors (clones) llama.cpp at `HEAD` into `.sourcebot/github/ggerganov/llama.cpp`.
- - Indexes llama.cpp into a .zoekt index file in `.sourcebot/index/`.
- - Map port 3000 between your machine and the docker image.
- - Starts the web server on port 3000.
-
-
-
- You should see a `.sourcebot` folder in your current directory. This folder stores a cache of the repositories zoekt has indexed. The `HEAD` commit of a repository is re-indexed [every hour](./packages/backend/src/constants.ts). Indexing private repos? See [Providing an access token](#providing-an-access-token).
-
-
-
-## Providing an access token
-This will depend on the code hosting platform you're using:
-
-
-
-
-
-
-
- GitHub
-
-
-In order to index private repositories, you'll need to generate a GitHub Personal Access Token (PAT). Create a new PAT [here](https://github.com/settings/tokens/new) and make sure you select the `repo` scope:
-
-
-
-Next, update your configuration with the `token` field:
+1. Create a config
```json
-{
- "$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v2/index.json",
- "repos": [
- {
+touch config.json
+echo '{
+ "$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
+ "connections": {
+ // Comments are supported
+ "starter-connection": {
"type": "github",
- "token": "ghp_mytoken",
- ...
- }
- ]
-}
-```
-
-You can also pass tokens as environment variables:
-```json
-{
- "$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v2/index.json",
- "repos": [
- {
- "type": "github",
- "token": {
- // note: this env var can be named anything. It
- // doesn't need to be `GITHUB_TOKEN`.
- "env": "GITHUB_TOKEN"
- },
- ...
- }
- ]
-}
-```
-
-You'll need to pass this environment variable each time you run Sourcebot:
-
-
-docker run -e GITHUB_TOKEN=ghp_mytoken /* additional args */ ghcr.io/sourcebot-dev/sourcebot:latest
-
-
-
-
- GitLab
-
-Generate a GitLab Personal Access Token (PAT) [here](https://gitlab.com/-/user_settings/personal_access_tokens) and make sure you select the `read_api` scope:
-
-
-
-Next, update your configuration with the `token` field:
-```json
-{
- "$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v2/index.json",
- "repos": [
- {
- "type": "gitlab",
- "token": "glpat-mytoken",
- ...
+ "repos": [
+ "sourcebot-dev/sourcebot"
+ ]
}
- ]
-}
+ }
+}' > config.jsono
```
-You can also pass tokens as environment variables:
-```json
-{
- "$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v2/index.json",
- "repos": [
- {
- "type": "gitlab",
- "token": {
- // note: this env var can be named anything. It
- // doesn't need to be `GITLAB_TOKEN`.
- "env": "GITLAB_TOKEN"
- },
- ...
- }
- ]
-}
+2. Run the docker container
+```sh
+docker run -p 3000:3000 --pull=always --rm -v $(pwd):/data -e CONFIG_PATH=/data/config.json --name sourcebot ghcr.io/sourcebot-dev/sourcebot:latest
```
-
-You'll need to pass this environment variable each time you run Sourcebot:
-
-
-docker run -e GITLAB_TOKEN=glpat-mytoken /* additional args */ ghcr.io/sourcebot-dev/sourcebot:latest
-
-
-
-
- Gitea
-
-Generate a Gitea access token [here](http://gitea.com/user/settings/applications). At minimum, you'll need to select the `read:repository` scope, but `read:user` and `read:organization` are required for the `user` and `org` fields of your config file:
-
-
-
-Next, update your configuration with the `token` field:
-```json
-{
- "$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v2/index.json",
- "repos": [
- {
- "type": "gitea",
- "token": "my-secret-token",
- ...
- }
- ]
-}
-```
-
-You can also pass tokens as environment variables:
-```json
-{
- "$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v2/index.json",
- "repos": [
- {
- "type": "gitea",
- "token": {
- // note: this env var can be named anything. It
- // doesn't need to be `GITEA_TOKEN`.
- "env": "GITEA_TOKEN"
- },
- ...
- }
- ]
-}
-```
-
-You'll need to pass this environment variable each time you run Sourcebot:
-
-
-docker run -e GITEA_TOKEN=my-secret-token /* additional args */ ghcr.io/sourcebot-dev/sourcebot:latest
-
-
-
+What does this command do?
-
- Gerrit
-Gerrit authentication is not yet currently supported.
+- Pull and run the Sourcebot docker image from [ghcr.io/sourcebot-dev/sourcebot:latest](https://github.com/sourcebot-dev/sourcebot/pkgs/container/sourcebot).
+- Mount the current directory (`-v $(pwd):/data`) to allow Sourcebot to persist the `.sourcebot` cache.
+- Clones sourcebot at `HEAD` into `.sourcebot/github/sourcebot-dev/sourcebot`.
+- Indexes sourcebot into a .zoekt index file in `.sourcebot/index/`.
+- Map port 3000 between your machine and the docker image.
+- Starts the web server on port 3000.
+
+3. Start searching at `http://localhost:3000`
+
-
-
-## Using a self-hosted GitLab / GitHub instance
-
-If you're using a self-hosted GitLab or GitHub instance with a custom domain, you can specify the domain in your config file. See [configs/self-hosted.json](configs/self-hosted.json) for examples.
-
-## Searching multiple branches
-
-By default, Sourcebot will index the default branch. To configure Sourcebot to index multiple branches (or tags), the `revisions` field can be used:
-
-```jsonc
-{
- "$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v2/index.json",
- "repos": [
- {
- "type": "github",
- "revisions": {
- // Index the `main` branch and any branches matching the `releases/*` glob pattern.
- "branches": [
- "main",
- "releases/*"
- ],
- // Index the `latest` tag and any tags matching the `v*.*.*` glob pattern.
- "tags": [
- "latest",
- "v*.*.*"
- ]
- },
- "repos": [
- "my_org/repo_a",
- "my_org/repo_b"
- ]
- }
- ]
-}
-```
-
-For each repository (in this case, `repo_a` and `repo_b`), Sourcebot will index all branches and tags matching the `branches` and `tags` patterns provided. Any branches or tags that don't match the patterns will be ignored and not indexed.
-
-To search on a specific revision, use the `revision` filter in the search bar:
-
-
-
-
-
-
-## Searching a local directory
-
-Local directories can be searched by using the `local` type in your config file:
+To learn how to configure Sourcebot to index your own repos, please refer to our [docs](https://docs.sourcebot.dev/self-hosting/overview).
-```jsonc
-{
- "$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v2/index.json",
- "repos": [
- {
- "type": "local",
- "path": "/repos/my-repo",
- // re-index files when a change is detected
- "watch": true,
- "exclude": {
- // exclude paths from being indexed
- "paths": [
- "node_modules",
- "build"
- ]
- }
- }
- ]
-}
-```
+> [!NOTE]
+> Sourcebot collects [anonymous usage data](https://sourcebot.dev/search/search?query=captureEvent%5C(%20repo%3Asourcebot) by default to help us improve the product. No sensitive data is collected, but if you'd like to disable this you can do so by setting the `SOURCEBOT_TELEMETRY_DISABLED` environment
+> variable to `false`. Please refer to our [telemetry docs](https://docs.sourcebot.dev/self-hosting/overview#telemetry) for more information.
-You'll need to mount the directory as a volume when running Sourcebot:
-
-
-docker run -v /path/to/my-repo:/repos/my-repo /* additional args */ ghcr.io/sourcebot-dev/sourcebot:latest
-
-
-## Build from source
+# Build from source
>[!NOTE]
-> Building from source is only required if you'd like to contribute. The recommended way to use Sourcebot is to use the [pre-built docker image](https://github.com/sourcebot-dev/sourcebot/pkgs/container/sourcebot).
-
-1. Install go, NodeJS, [redis](https://redis.io/), and [postgres](https://www.postgresql.org/). Note that a NodeJS version of at least `21.1.0` is required.
-
-2. Install [ctags](https://github.com/universal-ctags/ctags) (required by zoekt)
- ```sh
- // macOS:
- brew install universal-ctags
-
- // Linux:
- snap install universal-ctags
- ```
-
-3. Clone the repository with submodules:
- ```sh
- git clone --recurse-submodules https://github.com/sourcebot-dev/sourcebot.git
- ```
-
-4. Run `make` to build zoekt and install dependencies:
- ```sh
- cd sourcebot
- make
- ```
-
- The zoekt binaries and web dependencies are placed into `bin` and `node_modules` respectively.
-
-5. Create a `config.json` file at the repository root. See [Configuring Sourcebot](#configuring-sourcebot) for more information.
-
-6. Create `.env.local` files in the `packages/backend` and `packages/web` directories with the following contents:
- ```sh
- # You can use https://acte.ltd/utils/randomkeygen to generate a key ("Encryption key 256")
- SOURCEBOT_ENCRYPTION_KEY="32-byte-secret-key"
- ```
-
-7. Start Sourcebot with the command:
- ```sh
- yarn dev
- ```
-
- A `.sourcebot` directory will be created and zoekt will begin to index the repositories found given `config.json`.
-
-8. Start searching at `http://localhost:3000`.
-
-## Telemetry
-
-By default, Sourcebot collects anonymized usage data through [PostHog](https://posthog.com/) to help us improve the performance and reliability of our tool. We do not collect or transmit [any information related to your codebase](https://sourcebot.dev/search/search?query=captureEvent%20repo%3Asourcebot%20case%3Ano). In addition, all events are [sanitized](./packages/web/src/app/posthogProvider.tsx) to ensure that no sensitive or identifying details leave your machine. The data we collect includes general usage statistics and metadata such as query performance (e.g., search duration, error rates) to monitor the application's health and functionality. This information helps us better understand how Sourcebot is used and where improvements can be made :)
-
-If you'd like to disable all telemetry, you can do so by setting the environment variable `SOURCEBOT_TELEMETRY_DISABLED` to `1` in the docker run command:
-
-
-docker run -e SOURCEBOT_TELEMETRY_DISABLED=1 /* additional args */ ghcr.io/sourcebot-dev/sourcebot:latest
-
-
-Or if you are [building locally](#build-from-source), create a `.env.local` file at the repository root with the following contents:
-```sh
-SOURCEBOT_TELEMETRY_DISABLED=1
-NEXT_PUBLIC_SOURCEBOT_TELEMETRY_DISABLED=1
-```
-
-## Attributions
-
-Sourcebot makes use of the following libraries:
+> Building from source is only required if you'd like to contribute. If you'd just like to use Sourcebot, we recommend checking out our self-hosting [docs](https://docs.sourcebot.dev/self-hosting/overview).
-- [@vscode/codicons](https://github.com/microsoft/vscode-codicons) under the [CC BY 4.0 License](https://github.com/microsoft/vscode-codicons/blob/main/LICENSE).
+If you'd like to build from source, please checkout the `CONTRIBUTING.md` file for more information.
diff --git a/docs/docs.json b/docs/docs.json
index 8811f9dc..64358876 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -4,7 +4,7 @@
"name": "Sourcebot",
"colors": {
"primary": "#851EE7",
- "light": "#851EE7",
+ "light": "#FFFFFF",
"dark": "#851EE7"
},
"favicon": "/fav.svg",
@@ -21,22 +21,25 @@
"group": "General",
"pages": [
"docs/overview",
- "docs/getting-started"
+ "docs/getting-started",
+ "docs/getting-started-selfhost"
]
},
{
- "group": "Connections",
+ "group": "Connecting your code",
"pages": [
"docs/connections/overview",
"docs/connections/github",
- "docs/connections/gitlab"
+ "docs/connections/gitlab",
+ "docs/connections/gitea",
+ "docs/connections/gerrit",
+ "docs/connections/request-new"
]
},
{
- "group": "Access Tokens",
+ "group": "More",
"pages": [
- "docs/access-tokens/overview",
- "docs/access-tokens/secrets"
+ "docs/more/roles-and-permissions"
]
}
]
@@ -48,16 +51,41 @@
{
"group": "Getting Started",
"pages": [
- "self-hosting/overview"
+ "self-hosting/overview",
+ "self-hosting/configuration"
]
},
{
- "group": "Deployment",
+ "group": "More",
"pages": [
- "self-hosting/deployment/source"
+ "self-hosting/more/authentication",
+ "self-hosting/more/tenancy",
+ "self-hosting/more/transactional-emails",
+ "self-hosting/more/declarative-config"
+ ]
+ },
+ {
+ "group": "Security",
+ "pages": [
+ ]
+ },
+ {
+ "group": "Upgrade",
+ "pages": [
+ "self-hosting/upgrade/v2-to-v3-guide"
]
}
]
+ },
+ {
+ "anchor": "Changelog",
+ "href": "https://sourcebot.dev/changelog",
+ "icon": "list-check"
+ },
+ {
+ "anchor": "Support",
+ "href": "https://github.com/sourcebot-dev/sourcebot/discussions/categories/support",
+ "icon": "life-ring"
}
]
},
@@ -68,14 +96,14 @@
"navbar": {
"links": [
{
- "label": "Support",
- "href": "mailto:team@sourcebot.dev"
+ "label": "GitHub",
+ "href": "https://github.com/sourcebot-dev/sourcebot"
}
],
"primary": {
"type": "button",
- "label": "Login",
- "href": "https://sourcebot.dev/login"
+ "label": "Sourcebot Cloud",
+ "href": "https://app.sourcebot.dev"
}
},
"footer": {
diff --git a/docs/docs/access-tokens/overview.mdx b/docs/docs/access-tokens/overview.mdx
deleted file mode 100644
index 036766a4..00000000
--- a/docs/docs/access-tokens/overview.mdx
+++ /dev/null
@@ -1,31 +0,0 @@
----
-title: Access Tokens
-sidebarTitle: Access Tokens
----
-
-In order to search your private repos, you'll need to provide Sourcebot with an access token. Access tokens are provided as [secrets](/docs/access-tokens/secrets), which are encrypted at rest and in transit using [AES-256](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard)
-
-
-
-
- Navigate to the [personal access token (PAT) creation page](https://github.com/settings/tokens/new) (or the equivelent page if you're
- self hosting)
-
- Ensure that the `repo` scope is enabled. This gives the PAT access to fetch your repos
-
- 
-
- Once you've created your GitHub PAT, add it as a [secret](access-tokens/secrets) in Sourcebot to use it within your connections
-
-
-
- Navigate to the [personal access token (PAT) creation page](https://gitlab.com/-/user_settings/personal_access_tokens) (or the equivelent page if you're
- self hosting), and press `Add new token`
-
- Ensure that the `read_api` scope is enabled. This gives the PAT **read-only** access to fetch your repos
-
- 
-
- Once you've created your GitLab PAT, add it as a [secret](access-tokens/secrets) in Sourcebot to use it within your connections
-
-
\ No newline at end of file
diff --git a/docs/docs/access-tokens/secrets.mdx b/docs/docs/access-tokens/secrets.mdx
deleted file mode 100644
index 3d6234f2..00000000
--- a/docs/docs/access-tokens/secrets.mdx
+++ /dev/null
@@ -1,22 +0,0 @@
----
-title: Secrets
-subtitle: Secrets
----
-
-Secrets are used to store sensitive data (ex. GitHub/GitLab access tokens), and can be referenced in [Connections](/connections/overview). The main use of secrets is to provide a connection with a personal access token to fetch private repositories.
-
-### Create a secret
-
-To create a secret, navigate to the `Secrets` page within `Settings`. Under `Import a new secret`, select the code host you're importing a secret for and follow the instructions presented.
-
-
-
-
-
-### Referencing a secret
-
-After creating a secret, it'll be available to reference within a [connection](/connections/overview). On the connection creation page, **press the secret selection drop down to reference your secret**. This will automatically add the secret reference to the
-configuration schema:
-
-
-
\ No newline at end of file
diff --git a/docs/docs/connections/gerrit.mdx b/docs/docs/connections/gerrit.mdx
new file mode 100644
index 00000000..627ac526
--- /dev/null
+++ b/docs/docs/connections/gerrit.mdx
@@ -0,0 +1,125 @@
+---
+title: Linking code from Gerrit
+sidebarTitle: Gerrit
+---
+
+Authenticating with Gerrit is currently not supported. If you need this capability, please raise a [feature request](https://github.com/sourcebot-dev/sourcebot/discussions/categories/ideas).
+
+Sourcebot can sync code from self-hosted gerrit instances.
+
+## Connecting to a Gerrit instance
+
+To connect to a gerrit instance, provide the `url` property to your config:
+
+```json
+{
+ "type": "gerrit",
+ "url": "https://gerrit.example.com"
+ // .. rest of config ..
+}
+```
+
+## Examples
+
+
+
+ ```json
+ {
+ "type": "gerrit",
+ "url": "https://gerrit.example.com",
+ // Sync all repos under project1 and project2/sub-project
+ "projects": [
+ "project1/**",
+ "project2/sub-project/**"
+ ]
+ }
+ ```
+
+
+ ```json
+ {
+ "type": "gerrit",
+ "url": "https://gerrit.example.com",
+ // Sync all repos under project1 and project2/sub-project...
+ "projects": [
+ "project1/**",
+ "project2/sub-project/**"
+ ],
+ // ...except:
+ "exclude": {
+ // any project that matches these glob patterns
+ "projects": [
+ "project1/foo-project",
+ "project2/sub-project/some-sub-folder/**"
+ ]
+ }
+ }
+ ```
+
+
+
+## Schema reference
+
+
+[schemas/v3/gerrit.json](https://github.com/sourcebot-dev/sourcebot/blob/main/schemas/v3/gerrit.json)
+
+```json
+{
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "type": "object",
+ "title": "GerritConnectionConfig",
+ "properties": {
+ "type": {
+ "const": "gerrit",
+ "description": "Gerrit Configuration"
+ },
+ "url": {
+ "type": "string",
+ "format": "url",
+ "description": "The URL of the Gerrit host.",
+ "examples": [
+ "https://gerrit.example.com"
+ ],
+ "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$"
+ },
+ "projects": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "description": "List of specific projects to sync. If not specified, all projects will be synced. Glob patterns are supported",
+ "examples": [
+ [
+ "project1/repo1",
+ "project2/**"
+ ]
+ ]
+ },
+ "exclude": {
+ "type": "object",
+ "properties": {
+ "projects": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "examples": [
+ [
+ "project1/repo1",
+ "project2/**"
+ ]
+ ],
+ "description": "List of specific projects to exclude from syncing."
+ }
+ },
+ "additionalProperties": false
+ }
+ },
+ "required": [
+ "type",
+ "url"
+ ],
+ "additionalProperties": false
+}
+```
+
\ No newline at end of file
diff --git a/docs/docs/connections/gitea.mdx b/docs/docs/connections/gitea.mdx
new file mode 100644
index 00000000..17be8275
--- /dev/null
+++ b/docs/docs/connections/gitea.mdx
@@ -0,0 +1,308 @@
+---
+title: Linking code from Gitea
+sidebarTitle: Gitea
+---
+
+Sourcebot can sync code from Gitea Cloud, and self-hosted.
+
+## Examples
+
+
+
+ ```json
+ {
+ "type": "gitea",
+ "repos": [
+ "sourcebot-dev/sourcebot",
+ "getsentry/sentry",
+ "torvalds/linux"
+ ]
+ }
+ ```
+
+
+ ```json
+ {
+ "type": "gitea",
+ "orgs": [
+ "sourcebot-dev",
+ "getsentry",
+ "vercel"
+ ]
+ }
+ ```
+
+
+ ```json
+ {
+ "type": "gitea",
+ "users": [
+ "torvalds",
+ "ggerganov"
+ ]
+ }
+ ```
+
+
+ ```json
+ {
+ "type": "gitea",
+ // Include all repos in my-org...
+ "orgs": [
+ "my-org"
+ ],
+ // ...except:
+ "exclude": {
+ // repos that are archived
+ "archived": true,
+ // repos that are forks
+ "forks": true,
+ // repos that match these glob patterns
+ "repos": [
+ "my-org/repo1",
+ "my-org/repo2",
+ "my-org/sub-org-1/**",
+ "my-org/sub-org-*/**"
+ ]
+ }
+ }
+ ```
+
+
+
+## Authenticating with Gitea
+
+In order to index private repositories, you'll need to generate a Gitea access token. Generate a Gitea access token [here](http://gitea.com/user/settings/applications). At minimum, you'll need to select the `read:repository` scope. `read:user` and `read:organization` are required for the `user` and `org` fields of your config file:
+
+
+
+Next, provide the access token via the `token` property, either as an environment variable or a secret:
+
+
+
+ Environment variables are only supported in a [declarative config](/self-hosting/more/declarative-config) and cannot be used in the web UI.
+
+ 1. Add the `token` property to your connection config:
+ ```json
+ {
+ "type": "gitea",
+ "token": {
+ // note: this env var can be named anything. It
+ // doesn't need to be `GITEA_TOKEN`.
+ "env": "GITEA_TOKEN"
+ }
+ // .. rest of config ..
+ }
+ ```
+
+ 2. Pass this environment variable each time you run Sourcebot:
+ ```bash
+ docker run \
+ -e GITEA_TOKEN= \
+ /* additional args */ \
+ ghcr.io/sourcebot-dev/sourcebot:latest
+ ```
+
+
+
+ Secrets are only supported when [authentication](/self-hosting/more/authentication) is enabled.
+
+ 1. Navigate to **Secrets** in settings and create a new secret with your PAT:
+
+ 
+
+ 2. Add the `token` property to your connection config:
+
+ ```json
+ {
+ "type": "gitea",
+ "token": {
+ "secret": "mysecret"
+ }
+ // .. rest of config ..
+ }
+ ```
+
+
+
+
+## Connecting to a custom Gitea
+
+To connect to a custom Gitea deployment, provide the `url` property to your config:
+
+```json
+{
+ "type": "gitea",
+ "url": "https://gitea.example.com"
+ // .. rest of config ..
+}
+```
+
+## Schema reference
+
+
+[schemas/v3/gitea.json](https://github.com/sourcebot-dev/sourcebot/blob/main/schemas/v3/gitea.json)
+
+```json
+{
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "type": "object",
+ "title": "GiteaConnectionConfig",
+ "properties": {
+ "type": {
+ "const": "gitea",
+ "description": "Gitea Configuration"
+ },
+ "token": {
+ "description": "A Personal Access Token (PAT).",
+ "examples": [
+ {
+ "secret": "SECRET_KEY"
+ }
+ ],
+ "anyOf": [
+ {
+ "type": "object",
+ "properties": {
+ "secret": {
+ "type": "string",
+ "description": "The name of the secret that contains the token."
+ }
+ },
+ "required": [
+ "secret"
+ ],
+ "additionalProperties": false
+ },
+ {
+ "type": "object",
+ "properties": {
+ "env": {
+ "type": "string",
+ "description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
+ }
+ },
+ "required": [
+ "env"
+ ],
+ "additionalProperties": false
+ }
+ ]
+ },
+ "url": {
+ "type": "string",
+ "format": "url",
+ "default": "https://gitea.com",
+ "description": "The URL of the Gitea host. Defaults to https://gitea.com",
+ "examples": [
+ "https://gitea.com",
+ "https://gitea.example.com"
+ ],
+ "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$"
+ },
+ "orgs": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "examples": [
+ [
+ "my-org-name"
+ ]
+ ],
+ "description": "List of organizations to sync with. All repositories in the organization visible to the provided `token` (if any) will be synced, unless explicitly defined in the `exclude` property. If a `token` is provided, it must have the read:organization scope."
+ },
+ "repos": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "pattern": "^[\\w.-]+\\/[\\w.-]+$"
+ },
+ "description": "List of individual repositories to sync with. Expected to be formatted as '{orgName}/{repoName}' or '{userName}/{repoName}'."
+ },
+ "users": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "examples": [
+ [
+ "username-1",
+ "username-2"
+ ]
+ ],
+ "description": "List of users to sync with. All repositories that the user owns will be synced, unless explicitly defined in the `exclude` property. If a `token` is provided, it must have the read:user scope."
+ },
+ "exclude": {
+ "type": "object",
+ "properties": {
+ "forks": {
+ "type": "boolean",
+ "default": false,
+ "description": "Exclude forked repositories from syncing."
+ },
+ "archived": {
+ "type": "boolean",
+ "default": false,
+ "description": "Exclude archived repositories from syncing."
+ },
+ "repos": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "default": [],
+ "description": "List of individual repositories to exclude from syncing. Glob patterns are supported."
+ }
+ },
+ "additionalProperties": false
+ },
+ "revisions": {
+ "type": "object",
+ "description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
+ "properties": {
+ "branches": {
+ "type": "array",
+ "description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
+ "items": {
+ "type": "string"
+ },
+ "examples": [
+ [
+ "main",
+ "release/*"
+ ],
+ [
+ "**"
+ ]
+ ],
+ "default": []
+ },
+ "tags": {
+ "type": "array",
+ "description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
+ "items": {
+ "type": "string"
+ },
+ "examples": [
+ [
+ "latest",
+ "v2.*.*"
+ ],
+ [
+ "**"
+ ]
+ ],
+ "default": []
+ }
+ },
+ "additionalProperties": false
+ }
+ },
+ "required": [
+ "type"
+ ],
+ "additionalProperties": false
+}
+```
+
\ No newline at end of file
diff --git a/docs/docs/connections/github.mdx b/docs/docs/connections/github.mdx
index 81783e82..52165c68 100644
--- a/docs/docs/connections/github.mdx
+++ b/docs/docs/connections/github.mdx
@@ -1,25 +1,26 @@
---
-title: GitHub Connection
+title: Linking code from GitHub
sidebarTitle: GitHub
-icon: github
---
-To search repos on GitHub, you'll create a GitHub Connection. Using this connection, you can:
-- Index individual GitHub repos
-- Index all of the repos in a GitHub organization
-- Index all of the repos from a GitHub user
+Sourcebot can sync code from GitHub.com, GitHub Enterprise Server, and GitHub Enterprise Cloud.
-If you want to index a private repo/org, make sure you provide an [access token](/docs/access-tokens/overview)
+## Examples
-
-
-# Config Schema
-
-For a detailed description of the schema, check out the schema file (TODO: link)
-
-
-
- You can provide a list of orgs to index. Sourcebot will fetch all of the visible repos in these orgs.
+
+
+ ```json
+ {
+ "type": "github",
+ "repos": [
+ "sourcebot-dev/sourcebot",
+ "getsentry/sentry",
+ "torvalds/linux"
+ ]
+ }
+ ```
+
+
```json
{
"type": "github",
@@ -28,154 +29,363 @@ For a detailed description of the schema, check out the schema file (TODO: link)
"getsentry",
"vercel"
]
- // .. additional configs ..
}
```
-
-
- You can provide a list of users to index. Sourcebot will fetch all of the visible repos for these users.
+
+
```json
{
"type": "github",
"users": [
- "msukkari",
- "brendan-kellem",
- "torvalds"
+ "torvalds",
+ "ggerganov"
]
- // .. additional configs ..
}
```
-
-
- You can provide a list of repos to index. These must be provided in the `/` syntax.
+
+
```json
{
"type": "github",
- "repos": [
- "sourcebot-dev/sourcebot",
- "facebook/react"
+ // Sync all repos in `my-org` that have a topic that...
+ "orgs": [
+ "my-org"
+ ],
+ // ...match one of these glob patterns.
+ "topics": [
+ "test-*",
+ "ci-*",
+ "k8s"
]
- // .. additional configs ..
}
+
```
-
-
- If you're self-hosting you're own GitHub instance, you can provide a link to it:
+
+
```json
{
"type": "github",
- "url": {
- "https://my.custom.github.com"
+ // Include all repos in my-org...
+ "orgs": [
+ "my-org"
+ ],
+ // ...except:
+ "exclude": {
+ // repos that are archived
+ "archived": true,
+ // repos that are forks
+ "forks": true,
+ // repos that match these glob patterns
+ "repos": [
+ "my-org/repo1",
+ "my-org/repo2",
+ "my-org/sub-org-1/**",
+ "my-org/sub-org-*/**"
+ ],
+ "size": {
+ // repos that are less than 1MB (in bytes)...
+ "min": 1048576,
+ // or repos greater than 100MB (in bytes)
+ "max": 104857600
+ },
+ // repos with topics that match these glob patterns
+ "topics": [
+ "test-*",
+ "ci"
+ ]
}
- // .. additional configs ..
}
```
-
-
- If you'd like to index private repos, make sure you provide a token. You can do so by referencing the [secret](/docs/access-tokens/secrets) that contains your token.
+
+
+
+## Authenticating with GitHub
+
+In order to index private repositories, you'll need to generate a GitHub Personal Access Token (PAT). Create a new PAT [here](https://github.com/settings/tokens/new) and make sure you select the `repo` scope:
+
+
+
+Next, provide the PAT via the `token` property, either as an environment variable or a secret:
+
+
+
+ Environment variables are only supported in a [declarative config](/self-hosting/more/declarative-config) and cannot be used in the web UI.
+
+ 1. Add the `token` property to your connection config:
```json
{
"type": "github",
"token": {
- "secret": "MY_SECRET"
+ // note: this env var can be named anything. It
+ // doesn't need to be `GITHUB_TOKEN`.
+ "env": "GITHUB_TOKEN"
}
- // .. additional configs ..
+ // .. rest of config ..
}
```
+
+ 2. Pass this environment variable each time you run Sourcebot:
+ ```bash
+ docker run \
+ -e GITHUB_TOKEN= \
+ /* additional args */ \
+ ghcr.io/sourcebot-dev/sourcebot:latest
+ ```
-
-### Filter Options
+
+ Secrets are only supported when [authentication](/self-hosting/more/authentication) is enabled.
-This connection type supports additional options for filtering which repos are indexed.
+ 1. Navigate to **Secrets** in settings and create a new secret with your PAT:
+
+ 
+
+ 2. Add the `token` property to your connection config:
-
-
- You can provide a list of repository topics to include while fetching the repos. Only repos that match at least one of these topics will be fetched.
- ```json
- {
- "type": "github",
- "topics": [
- "docs",
- "core"
- ]
- // .. additional configs ..
- }
- ```
-
-
- If enabled, forked repos will be ignored.
```json
{
"type": "github",
- "exclude": {
- "forks": true
- // .. additional excludes ..
+ "token": {
+ "secret": "mysecret"
}
- // .. additional configs ..
+ // .. rest of config ..
}
```
+
-
- If enabled, archived repos will be ignored.
- ```json
+
+
+## Connecting to a custom GitHub host
+
+To connect to a GitHub host other than `github.com`, provide the `url` property to your config:
+
+```json
+{
+ "type": "github",
+ "url": "https://github.example.com"
+ // .. rest of config ..
+}
+```
+
+## Schema reference
+
+
+[schemas/v3/github.json](https://github.com/sourcebot-dev/sourcebot/blob/main/schemas/v3/github.json)
+
+```json
+{
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "type": "object",
+ "title": "GithubConnectionConfig",
+ "properties": {
+ "type": {
+ "const": "github",
+ "description": "GitHub Configuration"
+ },
+ "token": {
+ "description": "A Personal Access Token (PAT).",
+ "examples": [
{
- "type": "github",
- "exclude": {
- "archived": true
- // .. additional excludes ..
- }
- // .. additional configs ..
+ "secret": "SECRET_KEY"
}
- ```
-
-
- You can provide a list of repos to exclude. These must be provided in the `/` syntax.
- ```json
+ ],
+ "anyOf": [
{
- "type": "github",
- "exclude": {
- "repos": [
- "sourcebot-dev/zoekt",
- "torvalds/linux"
- ]
- // .. additional excludes ..
+ "type": "object",
+ "properties": {
+ "secret": {
+ "type": "string",
+ "description": "The name of the secret that contains the token."
}
- // .. additional configs ..
- }
- ```
-
-
- You can provide a list of topics to ignore. Repos that contain any of these topics will be ignored
- ```json
+ },
+ "required": [
+ "secret"
+ ],
+ "additionalProperties": false
+ },
{
- "type": "github",
- "exclude": {
- "topics": [
- "ci",
- "experimental"
- ]
- // .. additional excludes ..
+ "type": "object",
+ "properties": {
+ "env": {
+ "type": "string",
+ "description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
- // .. additional configs ..
+ },
+ "required": [
+ "env"
+ ],
+ "additionalProperties": false
}
- ```
-
-
- The disk usage is calculated by GitHub and may not reflect the actual disk usage when cloned
- Exclude repos based on their disk usage. Min/max numbers are in bytes.
- ```json
- {
- "type": "github",
- "exclude": {
- "size": {
- "min": 1000
- "max": 1000000000
- }
- // .. additional excludes ..
+ ]
+ },
+ "url": {
+ "type": "string",
+ "format": "url",
+ "default": "https://github.com",
+ "description": "The URL of the GitHub host. Defaults to https://github.com",
+ "examples": [
+ "https://github.com",
+ "https://github.example.com"
+ ],
+ "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$"
+ },
+ "users": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "pattern": "^[\\w.-]+$"
+ },
+ "default": [],
+ "examples": [
+ [
+ "torvalds",
+ "DHH"
+ ]
+ ],
+ "description": "List of users to sync with. All repositories that the user owns will be synced, unless explicitly defined in the `exclude` property."
+ },
+ "orgs": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "pattern": "^[\\w.-]+$"
+ },
+ "default": [],
+ "examples": [
+ [
+ "my-org-name"
+ ],
+ [
+ "sourcebot-dev",
+ "commaai"
+ ]
+ ],
+ "description": "List of organizations to sync with. All repositories in the organization visible to the provided `token` (if any) will be synced, unless explicitly defined in the `exclude` property."
+ },
+ "repos": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "pattern": "^[\\w.-]+\\/[\\w.-]+$"
+ },
+ "default": [],
+ "description": "List of individual repositories to sync with. Expected to be formatted as '{orgName}/{repoName}' or '{userName}/{repoName}'."
+ },
+ "topics": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "minItems": 1,
+ "default": [],
+ "description": "List of repository topics to include when syncing. Only repositories that match at least one of the provided `topics` will be synced. If not specified, all repositories will be synced, unless explicitly defined in the `exclude` property. Glob patterns are supported.",
+ "examples": [
+ [
+ "docs",
+ "core"
+ ]
+ ]
+ },
+ "exclude": {
+ "type": "object",
+ "properties": {
+ "forks": {
+ "type": "boolean",
+ "default": false,
+ "description": "Exclude forked repositories from syncing."
+ },
+ "archived": {
+ "type": "boolean",
+ "default": false,
+ "description": "Exclude archived repositories from syncing."
+ },
+ "repos": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "default": [],
+ "description": "List of individual repositories to exclude from syncing. Glob patterns are supported."
+ },
+ "topics": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "default": [],
+ "description": "List of repository topics to exclude when syncing. Repositories that match one of the provided `topics` will be excluded from syncing. Glob patterns are supported.",
+ "examples": [
+ [
+ "tests",
+ "ci"
+ ]
+ ]
+ },
+ "size": {
+ "type": "object",
+ "description": "Exclude repositories based on their disk usage. Note: the disk usage is calculated by GitHub and may not reflect the actual disk usage when cloned.",
+ "properties": {
+ "min": {
+ "type": "integer",
+ "description": "Minimum repository size (in bytes) to sync (inclusive). Repositories less than this size will be excluded from syncing."
+ },
+ "max": {
+ "type": "integer",
+ "description": "Maximum repository size (in bytes) to sync (inclusive). Repositories greater than this size will be excluded from syncing."
}
- // .. additional configs ..
+ },
+ "additionalProperties": false
}
- ```
-
-
\ No newline at end of file
+ },
+ "additionalProperties": false
+ },
+ "revisions": {
+ "type": "object",
+ "description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
+ "properties": {
+ "branches": {
+ "type": "array",
+ "description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
+ "items": {
+ "type": "string"
+ },
+ "examples": [
+ [
+ "main",
+ "release/*"
+ ],
+ [
+ "**"
+ ]
+ ],
+ "default": []
+ },
+ "tags": {
+ "type": "array",
+ "description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
+ "items": {
+ "type": "string"
+ },
+ "examples": [
+ [
+ "latest",
+ "v2.*.*"
+ ],
+ [
+ "**"
+ ]
+ ],
+ "default": []
+ }
+ },
+ "additionalProperties": false
+ }
+ },
+ "required": [
+ "type"
+ ],
+ "additionalProperties": false
+}
+```
+
+
\ No newline at end of file
diff --git a/docs/docs/connections/gitlab.mdx b/docs/docs/connections/gitlab.mdx
index a4e50ba8..63fe10a8 100644
--- a/docs/docs/connections/gitlab.mdx
+++ b/docs/docs/connections/gitlab.mdx
@@ -1,175 +1,384 @@
---
-title: GitLab Connection
+title: Linking code from GitLab
sidebarTitle: GitLab
-icon: gitlab
---
-To search projects on GitLab, you'll create a GitLab Connection. Using this connection, you can:
-- Index individual GitLab projects
-- Index all of the projects in a GitLab group/sub-group
-- Index all of the projects from a GitLab user
+Sourcebot can sync code from GitLab.com, Self Managed (CE & EE), and Dedicated.
-If you want to index a private project/group, make sure you provide an [access token](/docs/access-tokens/overview)
-
+## Examples
-# Config Schema
-
-For a detailed description of the schema, check out the schema file (TODO: link)
-
-
-
- This flag only works if your GitLab instance url is set and not equal to https://gitlab.com. Make sure you provide a [secret](/docs/access-tokens/secrets) containing the necessary access token to fetch your projects.
- If you'd like to index all projects visible to your GitLab instance, you can use the `all` flag.
+
+
```json
- {
- "type": "gitlab",
- "all": "true",
- "url": "https://gitlab.aperaturelabs.com" // This is required, and cannot be https://gitlab.com
- "token": {
- "secret": "MY_SECRET_KEY_NAME" // the name of the secret you created in Sourcebot
- }
- }
+ {
+ "type": "gitlab",
+ "projects": [
+ "my-group/foo",
+ "my-group/subgroup/bar"
+ ]
+ }
```
-
-
- You can provide a list of groups (and recursive sub-groups) to index. Sourcebot will fetch all of the visible projects in these groups. Subgroups can be specified by providing the path to the subgroup (e.g. `my-group/sub-group-a`)
+
+
```json
{
"type": "gitlab",
"groups": [
- "gitlab-org/api",
- "veloren"
+ "my-group",
+ "my-other-group/sub-group"
]
- // .. additional configs ..
}
```
-
-
- You can provide a list of users to index. Sourcebot will fetch all of the visible projects for these users.
+
+
+ This option is ignored if `url` is unset. See [connecting to a custom gitlab host](/docs/connections/gitlab#connecting-to-a-custom-gitlab-host).
```json
{
"type": "gitlab",
- "users": [
- "gnachman"
- ]
- // .. additional configs ..
+ "url": "https://gitlab.example.com",
+ // Index all projects in this self-managed instance
+ "all": true
}
```
-
-
- You can provide a list of projects to index. These must be provided in the `/` syntax.
+
+
```json
{
"type": "gitlab",
- "projects": [
- "inkscape/inkscape",
- "wireshark/wireshark"
+ "users": [
+ "user-1",
+ "user-2"
]
- // .. additional configs ..
}
```
-
-
- If you're self-hosting you're own GitLab instance, you can provide a link to it:
+
+
```json
{
"type": "gitlab",
- "url": {
- "https://gitlab.aperaturelabs.com"
- }
- // .. additional configs ..
+ // Sync all projects in `my-group` that have a topic that...
+ "groups": [
+ "my-group"
+ ],
+ // ...match one of these glob patterns.
+ "topics": [
+ "test-*",
+ "ci-*",
+ "k8s"
+ ]
}
+
```
-
-
- If you'd like to index private projects, make sure you provide a token. You can do so by referencing the [secret](/docs/access-tokens/secrets) that contains your token.
+
+
```json
{
"type": "gitlab",
- "token": {
- "secret": "MY_SECRET_KEY_NAME"
+ // Include all projects in these groups...
+ "groups": [
+ "my-group",
+ "my-other-group/sub-group"
+ ]
+ // ...except:
+ "exclude": {
+ // projects that are archived
+ "archived": true,
+ // projects that are forks
+ "forks": true,
+ // projects that match these glob patterns
+ "projects": [
+ "my-group/foo/**",
+ "my-group/bar/**",
+ "my-other-group/sub-group/specific-project"
+ ],
+ // repos with topics that match these glob patterns
+ "topics": [
+ "test-*",
+ "ci"
+ ]
}
- // .. additional configs ..
}
```
-
-
+
+
-### Filter Options
-This connection type supports additional options for filtering which projects are indexed.
+## Authenticating with GitLab
+
+In order to index private projects, you'll need to generate a GitLab Personal Access Token (PAT). Create a new PAT [here](https://gitlab.com/-/user_settings/personal_access_tokens) and make sure you select the `read_api` scope:
+
+
+
+Next, provide the PAT via the `token` property, either as an environment variable or a secret:
-
- You can provide a list of repository topics to include while fetching the projects. Only projects that match at least one of these topics will be fetched.
+
+ Environment variables are only supported in a [declarative config](/self-hosting/more/declarative-config) and cannot be used in the web UI.
+
+ 1. Add the `token` property to your connection config:
```json
{
"type": "gitlab",
- "topics": [
- "docs",
- "core"
- ]
- // .. additional configs ..
+ "token": {
+ // note: this env var can be named anything. It
+ // doesn't need to be `GITLAB_TOKEN`.
+ "env": "GITLAB_TOKEN"
+ }
+ // .. rest of config ..
}
```
+
+ 2. Pass this environment variable each time you run Sourcebot:
+ ```bash
+ docker run \
+ -e GITLAB_TOKEN= \
+ /* additional args */ \
+ ghcr.io/sourcebot-dev/sourcebot:latest
+ ```
-
- If enabled, forked projects will be ignored.
+
+
+ Secrets are only supported when [authentication](/self-hosting/more/authentication) is enabled.
+
+ 1. Navigate to **Secrets** in settings and create a new secret with your PAT:
+
+ 
+
+ 2. Add the `token` property to your connection config:
+
```json
{
"type": "gitlab",
- "exclude": {
- "forks": true
- // .. additional excludes ..
+ "token": {
+ "secret": "mysecret"
}
- // .. additional configs ..
+ // .. rest of config ..
}
```
+
-
- If enabled, archived projects will be ignored.
- ```json
+
+
+## Connecting to a custom GitLab host
+
+To connect to a GitLab host other than `gitlab.com`, provide the `url` property to your config:
+
+```json
+{
+ "type": "gitlab",
+ "url": "https://gitlab.example.com"
+ // .. rest of config ..
+}
+```
+
+## Schema reference
+
+
+[schemas/v3/gitlab.json](https://github.com/sourcebot-dev/sourcebot/blob/main/schemas/v3/gitlab.json)
+
+```json
+{
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "type": "object",
+ "title": "GitlabConnectionConfig",
+ "properties": {
+ "type": {
+ "const": "gitlab",
+ "description": "GitLab Configuration"
+ },
+ "token": {
+ "description": "An authentication token.",
+ "examples": [
{
- "type": "gitlab",
- "exclude": {
- "archived": true
- // .. additional excludes ..
- }
- // .. additional configs ..
+ "secret": "SECRET_KEY"
}
- ```
-
-
- You can provide a list of projects to exclude. These must be provided in the `/` syntax.
- ```json
+ ],
+ "anyOf": [
{
- "type": "gitlab",
- "exclude": {
- "projects": [
- "sourcebot-dev/zoekt",
- "torvalds/linux"
- ]
- // .. additional excludes ..
+ "type": "object",
+ "properties": {
+ "secret": {
+ "type": "string",
+ "description": "The name of the secret that contains the token."
}
- // .. additional configs ..
- }
- ```
-
-
- You can provide a list of topics to ignore. Projects that contain any of these topics will be ignored
- ```json
+ },
+ "required": [
+ "secret"
+ ],
+ "additionalProperties": false
+ },
{
- "type": "gitlab",
- "exclude": {
- "topics": [
- "ci",
- "experimental"
- ]
- // .. additional excludes ..
+ "type": "object",
+ "properties": {
+ "env": {
+ "type": "string",
+ "description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
- // .. additional configs ..
+ },
+ "required": [
+ "env"
+ ],
+ "additionalProperties": false
}
- ```
-
-
\ No newline at end of file
+ ]
+ },
+ "url": {
+ "type": "string",
+ "format": "url",
+ "default": "https://gitlab.com",
+ "description": "The URL of the GitLab host. Defaults to https://gitlab.com",
+ "examples": [
+ "https://gitlab.com",
+ "https://gitlab.example.com"
+ ],
+ "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$"
+ },
+ "all": {
+ "type": "boolean",
+ "default": false,
+ "description": "Sync all projects visible to the provided `token` (if any) in the GitLab instance. This option is ignored if `url` is either unset or set to https://gitlab.com ."
+ },
+ "users": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "description": "List of users to sync with. All projects owned by the user and visible to the provided `token` (if any) will be synced, unless explicitly defined in the `exclude` property."
+ },
+ "groups": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "examples": [
+ [
+ "my-group"
+ ],
+ [
+ "my-group/sub-group-a",
+ "my-group/sub-group-b"
+ ]
+ ],
+ "description": "List of groups to sync with. All projects in the group (and recursive subgroups) visible to the provided `token` (if any) will be synced, unless explicitly defined in the `exclude` property. Subgroups can be specified by providing the path to the subgroup (e.g. `my-group/sub-group-a`)."
+ },
+ "projects": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "examples": [
+ [
+ "my-group/my-project"
+ ],
+ [
+ "my-group/my-sub-group/my-project"
+ ]
+ ],
+ "description": "List of individual projects to sync with. The project's namespace must be specified. See: https://docs.gitlab.com/ee/user/namespace/"
+ },
+ "topics": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "minItems": 1,
+ "description": "List of project topics to include when syncing. Only projects that match at least one of the provided `topics` will be synced. If not specified, all projects will be synced, unless explicitly defined in the `exclude` property. Glob patterns are supported.",
+ "examples": [
+ [
+ "docs",
+ "core"
+ ]
+ ]
+ },
+ "exclude": {
+ "type": "object",
+ "properties": {
+ "forks": {
+ "type": "boolean",
+ "default": false,
+ "description": "Exclude forked projects from syncing."
+ },
+ "archived": {
+ "type": "boolean",
+ "default": false,
+ "description": "Exclude archived projects from syncing."
+ },
+ "projects": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "default": [],
+ "examples": [
+ [
+ "my-group/my-project"
+ ]
+ ],
+ "description": "List of projects to exclude from syncing. Glob patterns are supported. The project's namespace must be specified, see: https://docs.gitlab.com/ee/user/namespace/"
+ },
+ "topics": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "description": "List of project topics to exclude when syncing. Projects that match one of the provided `topics` will be excluded from syncing. Glob patterns are supported.",
+ "examples": [
+ [
+ "tests",
+ "ci"
+ ]
+ ]
+ }
+ },
+ "additionalProperties": false
+ },
+ "revisions": {
+ "type": "object",
+ "description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
+ "properties": {
+ "branches": {
+ "type": "array",
+ "description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
+ "items": {
+ "type": "string"
+ },
+ "examples": [
+ [
+ "main",
+ "release/*"
+ ],
+ [
+ "**"
+ ]
+ ],
+ "default": []
+ },
+ "tags": {
+ "type": "array",
+ "description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
+ "items": {
+ "type": "string"
+ },
+ "examples": [
+ [
+ "latest",
+ "v2.*.*"
+ ],
+ [
+ "**"
+ ]
+ ],
+ "default": []
+ }
+ },
+ "additionalProperties": false
+ }
+ },
+ "required": [
+ "type"
+ ],
+ "additionalProperties": false
+}
+```
+
\ No newline at end of file
diff --git a/docs/docs/connections/overview.mdx b/docs/docs/connections/overview.mdx
index 8b6d50fe..a105a3de 100644
--- a/docs/docs/connections/overview.mdx
+++ b/docs/docs/connections/overview.mdx
@@ -1,20 +1,33 @@
---
-title: "Connections"
-sidebarTitle: "Connections"
-icon: "plug"
+title: Overview
+sidebarTitle: Overview
---
-import ConnectionCards from '/snippets/connection-cards.mdx';
+To connect your code to Sourcebot you create **connections**. A **connection** is a configuration object that describes how Sourcebot should fetch information from a supported code host.
-Connections are how you tell Sourcebot what repositories you want to index.
+There are two ways to define connections:
-To index your code, you'll create a connection to your code host platform and tell Sourcebot which repos/users/orgs to fetch, as well as other information Sourcebot may need (ex. access tokens, instance URLs)
+
+
+ This is only supported when self-hosting, and is the default mechanism to define connections. Connections are defined in a [JSON file](/self-hosting/more/declarative-config)
+ and the path to the file is provided through the `CONFIG_PATH` environment variable
+
+
+ This is the only way to define connections when using Sourcebot Cloud, and can be configured when self-hosting by enabling [authentication](/self-hosting/more/authentications).
-You can view your connections by pressing `Connections` in the nav menu:
+ In this method, connections are defined and managed within the webapp:
-
+ 
+
+
-# Platform Support
+### Supported code hosts
-Sourcebot can index repositories across various different code host platforms
-
+
+
+
+
+
+
+
+Missing your code host? [Submit a feature request on GitHub](https://github.com/sourcebot-dev/sourcebot/discussions/categories/ideas).
\ No newline at end of file
diff --git a/docs/docs/connections/request-new.mdx b/docs/docs/connections/request-new.mdx
new file mode 100644
index 00000000..dc42d9fc
--- /dev/null
+++ b/docs/docs/connections/request-new.mdx
@@ -0,0 +1,7 @@
+---
+sidebarTitle: Request another host
+url: https://github.com/sourcebot-dev/sourcebot/discussions/categories/ideas
+title: Request another code host
+---
+
+Is your code host not supported? Please open a [feature request](https://github.com/sourcebot-dev/sourcebot/discussions/categories/ideas).
\ No newline at end of file
diff --git a/docs/docs/getting-started-selfhost.mdx b/docs/docs/getting-started-selfhost.mdx
new file mode 100644
index 00000000..17113ea6
--- /dev/null
+++ b/docs/docs/getting-started-selfhost.mdx
@@ -0,0 +1,8 @@
+---
+sidebarTitle: Quick start guide (self-host)
+url: /self-hosting/overview
+---
+
+{/*This page acts as a navigation link*/}
+
+[Quick start guide (self-host)](/self-hosting/overview)
\ No newline at end of file
diff --git a/docs/docs/getting-started.mdx b/docs/docs/getting-started.mdx
index 15124256..2d78b61c 100644
--- a/docs/docs/getting-started.mdx
+++ b/docs/docs/getting-started.mdx
@@ -1,45 +1,49 @@
---
-title: Getting started
+title: Cloud quick start guide
+sidebarTitle: Quick start guide (cloud)
---
-This page assumes that you're either using Sourcebot Cloud or have spun up your own Sourcebot
-self-hosted deployment. If you haven't, checkout out the [Overview](/overview) page.
+Looking for a self-hosted solution? Checkout our [self-hosting docs](/self-hosting/overview).
-This page will guide you through configuring Sourcebot with your teams repositories.
+This page will provide a quick walkthrough of how to get onboarded on Sourcebot, import your code, and start searching.
-### Account Registration
+{/*@todo: record a quick start guide
+
+*/}
+
+
+
+ Head over to [app.sourcebot.dev](https://app.sourcebot.dev) and create an account.
+
-When you visit a Sourcebot deployment for the first time (either Sourcebot Cloud or your own self-hosted deployment), you'll be presented with the following login page. Pick
-your login method of choice to register for a new account.
+
+ After logging in, you'll be asked to create an organization. You'll invite your team members to this organization later so they can also use Sourcebot.
-Some login options may not be visible if you haven't configured them in your self-hosted deployment
-
+ 
+
-### Organization Creation
+
+ After selecting a code host you want to connect to, you'll be presented with the connection creation page. This page has the following three inputs:
+ - Connection name (required): The name of the connection within Sourcebot
+ - Secret (optional): An [access token](/access-tokens/overview) that is used to fetch private repos
+ - Configuration: The JSON configuration schema that defines the repos/orgs to fetch.
-After logging in, you'll be asked to create an organization. You'll invite your team members to this organization later so they can also use Sourcebot.
+ For a more detailed explanation of connections, check out the [Connections](/docs/connections/overview) page.
-
+ The example below shows a connection named `sourcebot-org` that fetches all of the repos for the `sourcebot-dev` GitHub organization, but excludes the `sourcebot-dev/zoekt` repo
-### Connection Creation
-
-After creating an org, we jump right into creating a [connection](/connections/overview). Connections are how you tell Sourcebot which repositories you want to index.
-
-If you're self-hosting Sourcebot, your source code never leaves your machine
-
-
-#### Create Connection
-After selecting a code host you want to connect to, you'll be presented with the connection creation page. This page has the following three inputs:
-- Connection name (required): The name of the connection within Sourcebot
-- Secret (optional): An [access token](/access-tokens/overview) that is used to fetch private repos
-- Configuration: The JSON configuration schema that defines the repos/orgs to fetch.
-
-For a more detailed explenation of connections, check out the [Connections](/connections/overview) page.
-
-The example below shows a connection named `sourcebot-org` that fetches all of the repos for the `sourcebot-dev` GitHub organization, but excludes the `sourcebot-dev/zoekt` repo
-
-This page won't let you continue with an invalid connection schema. If you're hitting errors, make sure the input you're providing is a valid JSON
-
+ This page won't let you continue with an invalid connection schema. If you're hitting errors, make sure the input you're providing is a valid JSON
+ 
+
+
### Search
@@ -48,5 +52,4 @@ of the repos you've indexed

-Congrats, you've successfuly setup Sourcebot! Read on to learn more about the Sourcebot's capabilities
-- Checkout the [Connections](/connections/overview) page to learn how to control which repos Sourcebot fetches
\ No newline at end of file
+Congrats, you've successfuly setup Sourcebot! Read on to learn more about the Sourcebot's capabilities. Checkout the [Connections](/docs/connections/overview) page to learn how to control which repos Sourcebot fetches
\ No newline at end of file
diff --git a/docs/docs/more/roles-and-permissions.mdx b/docs/docs/more/roles-and-permissions.mdx
new file mode 100644
index 00000000..92ff91a7
--- /dev/null
+++ b/docs/docs/more/roles-and-permissions.mdx
@@ -0,0 +1,13 @@
+---
+title: Roles and Permissions
+---
+
+Looking to sync permissions with your identify provider? We're working on it - [reach out](https://www.sourcebot.dev/contact) to us to learn more
+
+If you're using Sourcebot Cloud, or are self-hosting with [authentication](/self-hosting/more/authentication) enabled, you may have multiple members in your organization. Each
+member has a role which defines their permissions:
+
+| Role | Permission |
+| :--- | :--------- |
+| `Owner` | Each organization has a single `Owner`. This user has full access rights, including: connection management, organization management, and inviting new members. |
+| `Member` | Read-only access to the organization. A `Member` can search across the repos indexed by an organization's connections, but may not manage the organization or its connections. |
\ No newline at end of file
diff --git a/docs/docs/overview.mdx b/docs/docs/overview.mdx
index 2cccd861..e0495d02 100644
--- a/docs/docs/overview.mdx
+++ b/docs/docs/overview.mdx
@@ -1,37 +1,22 @@
---
title: "Overview"
-icon: "bookmark"
-iconType: "solid"
---
import ConnectionCards from '/snippets/connection-cards.mdx';
-Sourcebot is an open-source code search tool ([Github](https://github.com/sourcebot-dev/sourcebot)). It allows teams to index repositories across various different platforms, and provides a powerful interface to search across them.
+Sourcebot is an **[open-source](https://github.com/sourcebot-dev/sourcebot) code search tool** that is purpose built to search multi-million line codebases in seconds. It integrates with [GitHub](/docs/connections/github), [GitLab](/docs/connections/gitlab), and [other platforms](/docs/connections).
-We have a [public demo](https://sourcebot.dev/search) if you'd like to try Sourcebot out before registering.
+## Getting Started
There are two ways to get started using Sourcebot:
-
-
- A fully managed version of Sourcebot that is hosted and maintained by the Sourcebot team: https://app.sourcebot.dev/
-
- Best for teams who:
- - Have large indexing requirements (500+ repos)
- - Don't want to deal with the headache of managing their own Sourcebot deployment
-
-
- Run Sourcebot within your own infrastructure. Check out our [self-hosting](/self-hosting/overview) guide to get started (it takes < 1 minute to spin up)
+
+
+ Deploy Sourcebot on your own infrastructure.
+
+
+ Use Sourcebot on our managed infrastructure.
+
+
- Best for teams who:
- - Want to try out Sourcebot without going through security review (everything stays on your machine)
- - Want full control over their Sourcebot deployment
-
-
-
-Once you know which Sourcebot deployment you want to use, check out the [Getting Started](/getting-started) page
-# Platform Support
-
-Sourcebot can index repositories across various different code host platforms.
-
-
\ No newline at end of file
+We also have a [public demo](https://sourcebot.dev/search) if you'd like to try Sourcebot out before registering.
diff --git a/docs/images/architecture_diagram.png b/docs/images/architecture_diagram.png
new file mode 100644
index 00000000..a115568c
Binary files /dev/null and b/docs/images/architecture_diagram.png differ
diff --git a/docs/images/connection_page.png b/docs/images/connection_page.png
new file mode 100644
index 00000000..9dca5cc4
Binary files /dev/null and b/docs/images/connection_page.png differ
diff --git a/docs/images/gitea_pat_creation.png b/docs/images/gitea_pat_creation.png
new file mode 100644
index 00000000..00d6ab94
Binary files /dev/null and b/docs/images/gitea_pat_creation.png differ
diff --git a/docs/images/login_redeem_code.png b/docs/images/login_redeem_code.png
new file mode 100644
index 00000000..c88b195c
Binary files /dev/null and b/docs/images/login_redeem_code.png differ
diff --git a/docs/images/org_switch.png b/docs/images/org_switch.png
new file mode 100644
index 00000000..ea863d2a
Binary files /dev/null and b/docs/images/org_switch.png differ
diff --git a/docs/images/secrets_list.png b/docs/images/secrets_list.png
new file mode 100644
index 00000000..e272688d
Binary files /dev/null and b/docs/images/secrets_list.png differ
diff --git a/docs/introduction.mdx b/docs/introduction.mdx
deleted file mode 100644
index 5e935da4..00000000
--- a/docs/introduction.mdx
+++ /dev/null
@@ -1,73 +0,0 @@
----
-title: Introduction
-description: "Welcome to the home of your new documentation"
----
-
-
-
-
-## Setting up
-
-Test
-
-The first step to world-class documentation is setting up your editing environments.
-
-
-
- Get your docs set up locally for easy development
-
-
- Preview your changes before you push to make sure they're perfect
-
-
-
-## Make it yours
-
-Update your docs to your brand and add valuable content for the best user conversion.
-
-
-
- Customize your docs to your company's colors and brands
-
-
- Automatically generate endpoints from an OpenAPI spec
-
-
- Build interactive features and designs to guide your users
-
-
- Check out our showcase of our favorite documentation
-
-
diff --git a/docs/quickstart.mdx b/docs/quickstart.mdx
deleted file mode 100644
index 9ac2a6ac..00000000
--- a/docs/quickstart.mdx
+++ /dev/null
@@ -1,97 +0,0 @@
----
-title: 'Quickstart'
-description: 'Start building awesome documentation in under 5 minutes'
----
-
-## Setup your development
-
-Learn how to update your docs locally and deploy them to the public.
-
-### Edit and preview
-
-
-
- During the onboarding process, we created a repository on your Github with
- your docs content. You can find this repository on our
- [dashboard](https://dashboard.mintlify.com). To clone the repository
- locally, follow these
- [instructions](https://docs.github.com/en/repositories/creating-and-managing-repositories/cloning-a-repository)
- in your terminal.
-
-
- Previewing helps you make sure your changes look as intended. We built a
- command line interface to render these changes locally.
- 1. Install the
- [Mintlify CLI](https://www.npmjs.com/package/mintlify) to preview the
- documentation changes locally with this command: ``` npm i -g mintlify ```
- 2. Run the following command at the root of your documentation (where
- `docs.json` is): ``` mintlify dev ```
-
- If you’re currently using the legacy ```mint.json``` configuration file, please update the Mintlify CLI:
-
-
- ```npm i -g mintlify@latest```
- And run the new upgrade command in your docs repository:
-
- ```mintlify upgrade```
- You should now be using the new ```docs.json``` configuration file. Feel free to delete the ```mint.json``` file from your repository.
-
-
-
-
-### Deploy your changes
-
-
-
-
- Our Github app automatically deploys your changes to your docs site, so you
- don't need to manage deployments yourself. You can find the link to install on
- your [dashboard](https://dashboard.mintlify.com). Once the bot has been
- successfully installed, there should be a check mark next to the commit hash
- of the repo.
-
-
- [Commit and push your changes to
- Git](https://docs.github.com/en/get-started/using-git/pushing-commits-to-a-remote-repository#about-git-push)
- for your changes to update in your docs site. If you push and don't see that
- the Github app successfully deployed your changes, you can also manually
- update your docs through our [dashboard](https://dashboard.mintlify.com).
-
-
-
-
-## Update your docs
-
-Add content directly in your files with MDX syntax and React components. You can use any of our components, or even build your own.
-
-
-
-
- Add content to your docs with MDX syntax.
-
-
-
- Add code directly to your docs with syntax highlighting.
-
-
-
- Add images to your docs to make them more engaging.
-
-
-
- Add templates to your docs to make them more reusable.
-
-
-
diff --git a/docs/self-hosting/configuration.mdx b/docs/self-hosting/configuration.mdx
new file mode 100644
index 00000000..419b4163
--- /dev/null
+++ b/docs/self-hosting/configuration.mdx
@@ -0,0 +1,59 @@
+---
+title: Configuration
+sidebarTitle: Configuration
+---
+
+
+## Environment Variables
+
+Sourcebot accepts a variety of environment variables to fine tune your deployment.
+
+| Variable | Default | Description |
+| :------- | :------ | :---------- |
+| `SOURCEBOT_LOG_LEVEL` | `info` | The Sourcebot logging level. Valid values are `debug`, `info`, `warn`, `error`, in order of severity. |
+| `DATABASE_URL` | `postgresql://postgres@ localhost:5432/sourcebot` | Connection string of your Postgres database. By default, a Postgres database is automatically provisioned at startup within the container. |
+| `REDIS_URL` | `redis://localhost:6379` | Connection string of your Redis instance. By default, a Redis database is automatically provisioned at startup within the container. |
+| `SOURCEBOT_ENCRYPTION_KEY` | - | Used to encrypt connection secrets. Generated using `openssl rand -base64 24`. Automatically generated at startup if no value is provided. |
+| `AUTH_SECRET` | - | Used to validate login session cookies. Generated using `openssl rand -base64 33`. Automatically generated at startup if no value is provided. |
+| `AUTH_URL` | - | URL of your Sourcebot deployment, e.g., `https://example.com` or `http://localhost:3000`. Required when `SOURCEBOT_AUTH_ENABLED` is `true`. |
+| `SOURCEBOT_TENANCY_MODE` | `single` | The tenancy configuration for Sourcebot. Valid values are `single` or `multi`. See [this doc](/self-hosting/more/tenancy) for more info. |
+| `SOURCEBOT_AUTH_ENABLED` | `false` | Enables/disables authentication in Sourcebot. If set to `false`, `SOURCEBOT_TENANCY_MODE` must be `single`. See [this doc](/self-hosting/more/authentication) for more info. |
+| `SOURCEBOT_TELEMETRY_DISABLED` | `false` | Enables/disables telemetry collection in Sourcebot. See [this doc](/self-hosting/security/telemetry) for more info. |
+| `DATA_DIR` | `/data` | The directory within the container to store all persistent data. Typically, this directory will be volume mapped such that data is persisted across container restarts (e.g., `docker run -v $(pwd):/data`) |
+| `DATA_CACHE_DIR` | `$DATA_DIR/.sourcebot` | The root data directory in which all data written to disk by Sourcebot will be located. |
+| `DATABASE_DATA_DIR` | `$DATA_CACHE_DIR/db` | The data directory for the default Postgres database. |
+| `REDIS_DATA_DIR` | `$DATA_CACHE_DIR/redis` | The data directory for the default Redis instance. |
+
+
+## Additional Features
+
+There are additional features that can be enabled and configured via environment variables.
+
+
+
+
+
+
+
+
+## Health Check and Version Endpoints
+
+Sourcebot includes a health check endpoint that indicates if the application is alive, returning `200 OK` if it is:
+
+```sh
+curl http://localhost:3000/api/health
+```
+
+It also includes a version endpoint to check the current version of the application:
+
+```sh
+curl http://localhost:3000/api/version
+```
+
+Sample response:
+
+```json
+{
+ "version": "v3.0.0"
+}
+```
\ No newline at end of file
diff --git a/docs/self-hosting/deployment/source.mdx b/docs/self-hosting/deployment/source.mdx
deleted file mode 100644
index ddebe7f7..00000000
--- a/docs/self-hosting/deployment/source.mdx
+++ /dev/null
@@ -1,43 +0,0 @@
----
-title: Build from Source
-sidebarTitle: Build from Source
----
-
-Building from source is only required if you're making changes. The recommended way to self-host Sourcebot is to use the [pre-built docker image](/self-hosting/overview)
-
-
-
- A NodeJS version of at least 21.1.0 is required.
- 1. [go](https://go.dev/doc/install)
- 2. [NodeJS](https://nodejs.org/)
- 3. [ctags](https://github.com/universal-ctags/ctags)
- ```bash
- // macOS:
- brew install universal-ctags
-
- // Linux:
- snap install universal-ctags
- ```
-
-
- ```bash
- git clone --recurse-submodules https://github.com/sourcebot-dev/sourcebot.git
- ```
-
-
- ```bash
- cd sourcebot
- make
- ```
-
-
- ``` bash
- yarn dev
- ```
-
-
- Start searching at `http://localhost:3000`
-
-
-
-Congrats, you're now running Sourcebot from source! Check out our [getting started](/docs/getting-started) page to dive in.
\ No newline at end of file
diff --git a/docs/self-hosting/more/authentication.mdx b/docs/self-hosting/more/authentication.mdx
new file mode 100644
index 00000000..78c14657
--- /dev/null
+++ b/docs/self-hosting/more/authentication.mdx
@@ -0,0 +1,63 @@
+---
+title: Authentication
+sidebarTitle: Authentication
+---
+
+SSO is currently not supported. If you'd like SSO, please reach out using our [contact form](https://www.sourcebot.dev/contact)
+If you're switching from non-auth, delete the Sourcebot cache (the `.sourcebot` folder) before starting.
+
+Sourcebot has built-in authentication that gates access to your organization. OAuth, email codes, and email / password are supported. To enable authentication, set the `SOURCEBOT_AUTH_ENABLED` environment variable to `true`.
+When authentication is enabled:
+
+- [Connection managment](/docs/connections/overview) happens through the UI
+- Members must be invited to an organization to gain access
+- If you're in single-tenant mode, the first user to register will be made the owner of the default organization. Check out the [roles page](/docs/more/roles-and-permissions) for more info on the different roles and permissions
+
+
+
+
+# Authentication Providers
+
+Make sure the `AUTH_URL` environment variable is [configured correctly](/self-hosting/configuration) when using Sourcebot in a deployed environment.
+
+To enable an authentication provider in Sourcebot, configure the required environment variables for the provider. Under the hood, Sourcebot uses Auth.js which supports [many providers](https://authjs.dev/getting-started/authentication/oauth). Submit a [feature request on GitHub](https://github.com/sourcebot-dev/sourcebot/discussions/categories/ideas) if you want us to add support for a specific provider.
+
+
+## Email / Password
+---
+Email / password authentication is enabled by default. It can be **disabled** by setting `AUTH_CREDENTIALS_LOGIN_ENABLED` to `false`.
+
+## Email codes
+---
+Email codes are 6 digit codes sent to a provided email. Email codes are enabled when transactional emails are configured using the following environment variables:
+
+- `SMTP_CONNECTION_URL`
+- `EMAIL_FROM_ADDRESS`
+
+
+See [transactional emails](/self-hosting/more/transactional-emails) for more details.
+
+## GitHub
+---
+
+[Auth.js GitHub Provider Docs](https://authjs.dev/getting-started/providers/github)
+
+**Required environment variables:**
+- `AUTH_GITHUB_CLIENT_ID`
+- `AUTH_GITHUB_CLIENT_SECRET`
+
+## Google
+---
+
+[Auth.js Google Provider Docs](https://next-auth.js.org/providers/google)
+
+**Required environment variables:**
+- `AUTH_GOOGLE_CLIENT_ID`
+- `AUTH_GOOGLE_CLIENT_SECRET`
+
+---
+
+# Troubleshooting
+
+- If you experience issues logging in, logging out, or accessing an organization you should have access to, try clearing your cookies & performing a full page refresh (`Cmd/Ctrl + Shift + R` on most browsers).
+- Still not working? Reach out to us on our [discord](https://discord.com/invite/6Fhp27x7Pb) or [github discussions](https://github.com/sourcebot-dev/sourcebot/discussions)
\ No newline at end of file
diff --git a/docs/self-hosting/more/declarative-config.mdx b/docs/self-hosting/more/declarative-config.mdx
new file mode 100644
index 00000000..3b635b8a
--- /dev/null
+++ b/docs/self-hosting/more/declarative-config.mdx
@@ -0,0 +1,624 @@
+---
+title: Configuring Sourcebot from a file (declarative config)
+sidebarTitle: Declarative config
+---
+
+Some teams require Sourcebot to be configured via a file (where it can be stored in version control, run through CI/CD pipelines, etc.) instead of a web UI. For more information on configuring connections, see this [overview](/docs/connections/overview).
+
+
+| Variable | Description |
+| :------- | :---------- |
+| `CONFIG_PATH` | Path to declarative config. |
+
+
+```json
+{
+ "$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/refs/heads/main/schemas/v3/index.json",
+ "connections": {
+ "connection-1": {
+ "type": "github",
+ "repos": [
+ "sourcebot-dev/sourcebot"
+ ]
+ }
+ }
+}
+```
+
+## Schema reference
+
+
+[schemas/v3/index.json](https://github.com/sourcebot-dev/sourcebot/blob/main/schemas/v3/index.json)
+
+```json
+{
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "type": "object",
+ "title": "SourcebotConfig",
+ "definitions": {
+ "Settings": {
+ "type": "object",
+ "description": "Defines the globabl settings for Sourcebot.",
+ "properties": {
+ "maxFileSize": {
+ "type": "number",
+ "description": "The maximum size of a file (in bytes) to be indexed. Files that exceed this maximum will not be indexed. Defaults to 2MB.",
+ "minimum": 1
+ },
+ "maxTrigramCount": {
+ "type": "number",
+ "description": "The maximum number of trigrams per document. Files that exceed this maximum will not be indexed. Default to 20000.",
+ "minimum": 1
+ },
+ "reindexIntervalMs": {
+ "type": "number",
+ "description": "The interval (in milliseconds) at which the indexer should re-index all repositories. Defaults to 1 hour.",
+ "minimum": 1
+ },
+ "resyncConnectionPollingIntervalMs": {
+ "type": "number",
+ "description": "The polling rate (in milliseconds) at which the db should be checked for connections that need to be re-synced. Defaults to 1 second.",
+ "minimum": 1
+ },
+ "reindexRepoPollingIntervalMs": {
+ "type": "number",
+ "description": "The polling rate (in milliseconds) at which the db should be checked for repos that should be re-indexed. Defaults to 1 second.",
+ "minimum": 1
+ },
+ "maxConnectionSyncJobConcurrency": {
+ "type": "number",
+ "description": "The number of connection sync jobs to run concurrently. Defaults to 8.",
+ "minimum": 1
+ },
+ "maxRepoIndexingJobConcurrency": {
+ "type": "number",
+ "description": "The number of repo indexing jobs to run concurrently. Defaults to 8.",
+ "minimum": 1
+ },
+ "maxRepoGarbageCollectionJobConcurrency": {
+ "type": "number",
+ "description": "The number of repo GC jobs to run concurrently. Defaults to 8.",
+ "minimum": 1
+ },
+ "repoGarbageCollectionGracePeriodMs": {
+ "type": "number",
+ "description": "The grace period (in milliseconds) for garbage collection. Used to prevent deleting shards while they're being loaded. Defaults to 10 seconds.",
+ "minimum": 1
+ },
+ "repoIndexTimeoutMs": {
+ "type": "number",
+ "description": "The timeout (in milliseconds) for a repo indexing to timeout. Defaults to 2 hours.",
+ "minimum": 1
+ }
+ },
+ "additionalProperties": false
+ }
+ },
+ "properties": {
+ "$schema": {
+ "type": "string"
+ },
+ "settings": {
+ "$ref": "#/definitions/Settings"
+ },
+ "connections": {
+ "type": "object",
+ "description": "Defines a collection of connections from varying code hosts that Sourcebot should sync with. This is only available in single-tenancy mode.",
+ "patternProperties": {
+ "^[a-zA-Z0-9_-]+$": {
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "title": "ConnectionConfig",
+ "oneOf": [
+ {
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "type": "object",
+ "title": "GithubConnectionConfig",
+ "properties": {
+ "type": {
+ "const": "github",
+ "description": "GitHub Configuration"
+ },
+ "token": {
+ "description": "A Personal Access Token (PAT).",
+ "examples": [
+ {
+ "secret": "SECRET_KEY"
+ }
+ ],
+ "anyOf": [
+ {
+ "type": "object",
+ "properties": {
+ "secret": {
+ "type": "string",
+ "description": "The name of the secret that contains the token."
+ }
+ },
+ "required": [
+ "secret"
+ ],
+ "additionalProperties": false
+ },
+ {
+ "type": "object",
+ "properties": {
+ "env": {
+ "type": "string",
+ "description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
+ }
+ },
+ "required": [
+ "env"
+ ],
+ "additionalProperties": false
+ }
+ ]
+ },
+ "url": {
+ "type": "string",
+ "format": "url",
+ "default": "https://github.com",
+ "description": "The URL of the GitHub host. Defaults to https://github.com",
+ "examples": [
+ "https://github.com",
+ "https://github.example.com"
+ ],
+ "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$"
+ },
+ "users": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "pattern": "^[\\w.-]+$"
+ },
+ "default": [],
+ "examples": [
+ [
+ "torvalds",
+ "DHH"
+ ]
+ ],
+ "description": "List of users to sync with. All repositories that the user owns will be synced, unless explicitly defined in the `exclude` property."
+ },
+ "orgs": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "pattern": "^[\\w.-]+$"
+ },
+ "default": [],
+ "examples": [
+ [
+ "my-org-name"
+ ],
+ [
+ "sourcebot-dev",
+ "commaai"
+ ]
+ ],
+ "description": "List of organizations to sync with. All repositories in the organization visible to the provided `token` (if any) will be synced, unless explicitly defined in the `exclude` property."
+ },
+ "repos": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "pattern": "^[\\w.-]+\\/[\\w.-]+$"
+ },
+ "default": [],
+ "description": "List of individual repositories to sync with. Expected to be formatted as '{orgName}/{repoName}' or '{userName}/{repoName}'."
+ },
+ "topics": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "minItems": 1,
+ "default": [],
+ "description": "List of repository topics to include when syncing. Only repositories that match at least one of the provided `topics` will be synced. If not specified, all repositories will be synced, unless explicitly defined in the `exclude` property. Glob patterns are supported.",
+ "examples": [
+ [
+ "docs",
+ "core"
+ ]
+ ]
+ },
+ "exclude": {
+ "type": "object",
+ "properties": {
+ "forks": {
+ "type": "boolean",
+ "default": false,
+ "description": "Exclude forked repositories from syncing."
+ },
+ "archived": {
+ "type": "boolean",
+ "default": false,
+ "description": "Exclude archived repositories from syncing."
+ },
+ "repos": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "default": [],
+ "description": "List of individual repositories to exclude from syncing. Glob patterns are supported."
+ },
+ "topics": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "default": [],
+ "description": "List of repository topics to exclude when syncing. Repositories that match one of the provided `topics` will be excluded from syncing. Glob patterns are supported.",
+ "examples": [
+ [
+ "tests",
+ "ci"
+ ]
+ ]
+ },
+ "size": {
+ "type": "object",
+ "description": "Exclude repositories based on their disk usage. Note: the disk usage is calculated by GitHub and may not reflect the actual disk usage when cloned.",
+ "properties": {
+ "min": {
+ "type": "integer",
+ "description": "Minimum repository size (in bytes) to sync (inclusive). Repositories less than this size will be excluded from syncing."
+ },
+ "max": {
+ "type": "integer",
+ "description": "Maximum repository size (in bytes) to sync (inclusive). Repositories greater than this size will be excluded from syncing."
+ }
+ },
+ "additionalProperties": false
+ }
+ },
+ "additionalProperties": false
+ },
+ "revisions": {
+ "type": "object",
+ "description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
+ "properties": {
+ "branches": {
+ "type": "array",
+ "description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
+ "items": {
+ "type": "string"
+ },
+ "examples": [
+ [
+ "main",
+ "release/*"
+ ],
+ [
+ "**"
+ ]
+ ],
+ "default": []
+ },
+ "tags": {
+ "type": "array",
+ "description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
+ "items": {
+ "type": "string"
+ },
+ "examples": [
+ [
+ "latest",
+ "v2.*.*"
+ ],
+ [
+ "**"
+ ]
+ ],
+ "default": []
+ }
+ },
+ "additionalProperties": false
+ }
+ },
+ "required": [
+ "type"
+ ],
+ "additionalProperties": false
+ },
+ {
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "type": "object",
+ "title": "GitlabConnectionConfig",
+ "properties": {
+ "type": {
+ "const": "gitlab",
+ "description": "GitLab Configuration"
+ },
+ "token": {
+ "$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/token",
+ "description": "An authentication token.",
+ "examples": [
+ {
+ "secret": "SECRET_KEY"
+ }
+ ]
+ },
+ "url": {
+ "type": "string",
+ "format": "url",
+ "default": "https://gitlab.com",
+ "description": "The URL of the GitLab host. Defaults to https://gitlab.com",
+ "examples": [
+ "https://gitlab.com",
+ "https://gitlab.example.com"
+ ],
+ "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$"
+ },
+ "all": {
+ "type": "boolean",
+ "default": false,
+ "description": "Sync all projects visible to the provided `token` (if any) in the GitLab instance. This option is ignored if `url` is either unset or set to https://gitlab.com ."
+ },
+ "users": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "description": "List of users to sync with. All projects owned by the user and visible to the provided `token` (if any) will be synced, unless explicitly defined in the `exclude` property."
+ },
+ "groups": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "examples": [
+ [
+ "my-group"
+ ],
+ [
+ "my-group/sub-group-a",
+ "my-group/sub-group-b"
+ ]
+ ],
+ "description": "List of groups to sync with. All projects in the group (and recursive subgroups) visible to the provided `token` (if any) will be synced, unless explicitly defined in the `exclude` property. Subgroups can be specified by providing the path to the subgroup (e.g. `my-group/sub-group-a`)."
+ },
+ "projects": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "examples": [
+ [
+ "my-group/my-project"
+ ],
+ [
+ "my-group/my-sub-group/my-project"
+ ]
+ ],
+ "description": "List of individual projects to sync with. The project's namespace must be specified. See: https://docs.gitlab.com/ee/user/namespace/"
+ },
+ "topics": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "minItems": 1,
+ "description": "List of project topics to include when syncing. Only projects that match at least one of the provided `topics` will be synced. If not specified, all projects will be synced, unless explicitly defined in the `exclude` property. Glob patterns are supported.",
+ "examples": [
+ [
+ "docs",
+ "core"
+ ]
+ ]
+ },
+ "exclude": {
+ "type": "object",
+ "properties": {
+ "forks": {
+ "type": "boolean",
+ "default": false,
+ "description": "Exclude forked projects from syncing."
+ },
+ "archived": {
+ "type": "boolean",
+ "default": false,
+ "description": "Exclude archived projects from syncing."
+ },
+ "projects": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "default": [],
+ "examples": [
+ [
+ "my-group/my-project"
+ ]
+ ],
+ "description": "List of projects to exclude from syncing. Glob patterns are supported. The project's namespace must be specified, see: https://docs.gitlab.com/ee/user/namespace/"
+ },
+ "topics": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "description": "List of project topics to exclude when syncing. Projects that match one of the provided `topics` will be excluded from syncing. Glob patterns are supported.",
+ "examples": [
+ [
+ "tests",
+ "ci"
+ ]
+ ]
+ }
+ },
+ "additionalProperties": false
+ },
+ "revisions": {
+ "$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/revisions"
+ }
+ },
+ "required": [
+ "type"
+ ],
+ "additionalProperties": false
+ },
+ {
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "type": "object",
+ "title": "GiteaConnectionConfig",
+ "properties": {
+ "type": {
+ "const": "gitea",
+ "description": "Gitea Configuration"
+ },
+ "token": {
+ "$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/token",
+ "description": "A Personal Access Token (PAT).",
+ "examples": [
+ {
+ "secret": "SECRET_KEY"
+ }
+ ]
+ },
+ "url": {
+ "type": "string",
+ "format": "url",
+ "default": "https://gitea.com",
+ "description": "The URL of the Gitea host. Defaults to https://gitea.com",
+ "examples": [
+ "https://gitea.com",
+ "https://gitea.example.com"
+ ],
+ "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$"
+ },
+ "orgs": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "examples": [
+ [
+ "my-org-name"
+ ]
+ ],
+ "description": "List of organizations to sync with. All repositories in the organization visible to the provided `token` (if any) will be synced, unless explicitly defined in the `exclude` property. If a `token` is provided, it must have the read:organization scope."
+ },
+ "repos": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "pattern": "^[\\w.-]+\\/[\\w.-]+$"
+ },
+ "description": "List of individual repositories to sync with. Expected to be formatted as '{orgName}/{repoName}' or '{userName}/{repoName}'."
+ },
+ "users": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "examples": [
+ [
+ "username-1",
+ "username-2"
+ ]
+ ],
+ "description": "List of users to sync with. All repositories that the user owns will be synced, unless explicitly defined in the `exclude` property. If a `token` is provided, it must have the read:user scope."
+ },
+ "exclude": {
+ "type": "object",
+ "properties": {
+ "forks": {
+ "type": "boolean",
+ "default": false,
+ "description": "Exclude forked repositories from syncing."
+ },
+ "archived": {
+ "type": "boolean",
+ "default": false,
+ "description": "Exclude archived repositories from syncing."
+ },
+ "repos": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "default": [],
+ "description": "List of individual repositories to exclude from syncing. Glob patterns are supported."
+ }
+ },
+ "additionalProperties": false
+ },
+ "revisions": {
+ "$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/revisions"
+ }
+ },
+ "required": [
+ "type"
+ ],
+ "additionalProperties": false
+ },
+ {
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "type": "object",
+ "title": "GerritConnectionConfig",
+ "properties": {
+ "type": {
+ "const": "gerrit",
+ "description": "Gerrit Configuration"
+ },
+ "url": {
+ "type": "string",
+ "format": "url",
+ "description": "The URL of the Gerrit host.",
+ "examples": [
+ "https://gerrit.example.com"
+ ],
+ "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$"
+ },
+ "projects": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "description": "List of specific projects to sync. If not specified, all projects will be synced. Glob patterns are supported",
+ "examples": [
+ [
+ "project1/repo1",
+ "project2/**"
+ ]
+ ]
+ },
+ "exclude": {
+ "type": "object",
+ "properties": {
+ "projects": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "examples": [
+ [
+ "project1/repo1",
+ "project2/**"
+ ]
+ ],
+ "description": "List of specific projects to exclude from syncing."
+ }
+ },
+ "additionalProperties": false
+ }
+ },
+ "required": [
+ "type",
+ "url"
+ ],
+ "additionalProperties": false
+ }
+ ]
+ }
+ },
+ "additionalProperties": false
+ }
+ },
+ "additionalProperties": false
+}
+```
+
+
\ No newline at end of file
diff --git a/docs/self-hosting/more/tenancy.mdx b/docs/self-hosting/more/tenancy.mdx
new file mode 100644
index 00000000..af2b0f56
--- /dev/null
+++ b/docs/self-hosting/more/tenancy.mdx
@@ -0,0 +1,27 @@
+---
+title: Multi Tenancy Mode
+sidebarTitle: Multi tenancy
+---
+
+If you're switching from single-tenant mode, delete the Sourcebot cache (the `.sourcebot` folder) before starting.
+[Authentication](/self-hosting/more/authentication) must be enabled to enable multi tenancy mode
+Multi tenancy allows your Sourcebot deployment to have **multiple organizations**, each with their own set of members and repos. To enable multi tenancy mode, define an environment variable
+named `SOURCEBOT_AUTH_ENABLED` and set its value to `multi`. When multi tenancy mode is enabled:
+
+- Any members or repos that are configured in an organization are isolated to that organization
+- Members must be invited to an organization to gain access
+- Members may be a part of multiple organizations and switch through them in the UI
+
+
+### Organization creation form
+
+When you sign in for the first time (assuming you didn't go through an invite), you'll be presented with the organization creation form. The member who creates
+the organization will be the Owner.
+
+
+
+### Switching between organizations
+
+To switch between organizations, press the drop down on the top left of the navigation menu. This also provides an option to create a new organization:
+
+
\ No newline at end of file
diff --git a/docs/self-hosting/more/transactional-emails.mdx b/docs/self-hosting/more/transactional-emails.mdx
new file mode 100644
index 00000000..d84c17b7
--- /dev/null
+++ b/docs/self-hosting/more/transactional-emails.mdx
@@ -0,0 +1,14 @@
+---
+title: Transactional Email
+sidebarTitle: Transactional email
+---
+
+To enable transactional emails in your deployment, set the following environment variables. We recommend using [Resend](https://resend.com/), but you can use any provider. Setting this enables you to:
+
+- Send emails when new members are invited
+- Log into the Sourcebot deployment using [email codes](self-hosting/more/authentication#email-codes)
+
+| Variable | Description |
+| :------- | :---------- |
+| `SMTP_CONNECTION_URL` | SMTP server connection. |
+| `EMAIL_FROM_ADDRESS` | The sender's email address |
\ No newline at end of file
diff --git a/docs/self-hosting/overview.mdx b/docs/self-hosting/overview.mdx
index e8c6738e..f9e2af0b 100644
--- a/docs/self-hosting/overview.mdx
+++ b/docs/self-hosting/overview.mdx
@@ -3,27 +3,124 @@ title: Self-host Sourcebot
sidebarTitle: Overview
---
-Want to use Sourcebot without self-hosting? Checkout [Sourcebot Cloud](https://sourcebot.dev/login)
+Want a managed solution? Checkout [Sourcebot Cloud](/docs/getting-started).
-Sourcebot is open source and can be self-hosted using our official Docker images. To get started, run the following command:
+Sourcebot is open source and can be self-hosted using our official [Docker image](https://github.com/sourcebot-dev/sourcebot/pkgs/container/sourcebot).
-``` bash
-docker run -p 3000:3000 --rm --name sourcebot ghcr.io/sourcebot-dev/sourcebot:staging
-```
+## Quick Start Guide
-Navigate to `localhost:3000` to see your Sourcebot deployment in action. Check out the [README](https://github.com/sourcebot-dev/sourcebot/tree/main) to learn how to configure Sourcebot to index your repos.
+{/*@todo: record a self-hosting quick start guide
+
+*/}
-Making changes to Sourcebot? Checkout our guide on how to [build from source](/self-hosting/deployment/source).
+
+
+ By default, Sourcebot requires a configuration file with a list of [code host connections](/docs/connections/overview) that specify what repositories should be **synced** (cloned and indexed). To get started, run the following command to create a starter `config.json`:
+ ```bash
+ touch config.json
+ echo '{
+ "$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
+ "connections": {
+ // Comments are supported
+ "starter-connection": {
+ "type": "github",
+ "repos": [
+ "sourcebot-dev/sourcebot"
+ ]
+ }
+ }
+ }' > config.json
+ ```
-### Telemetry
+ This config creates a single GitHub connection named `starter-connection` that specifies [Sourcebot](https://github.com/sourcebot-dev/sourcebot) as a repo to sync.
+
-By default, Sourcebot collects anonymized usage data through [PostHog](https://posthog.com/) to help us improve the performance and reliability of our tool. We don't collect or transmit [any information related to your codebase](https://sourcebot.dev/search/search?query=captureEvent%20repo%3Asourcebot%20case%3Ano). In addition, all events are [sanitized](https://github.com/sourcebot-dev/sourcebot/blob/HEAD/packages/web/src/app/posthogProvider.tsx) to ensure that no sensitive details (ex. ip address, query info) leave your machine.
+
+ Sourcebot is packaged as a [single Docker image](https://github.com/sourcebot-dev/sourcebot/pkgs/container/sourcebot). In the same directory as `config.json`, run the following command to start your instance:
-The data we collect includes general usage statistics and metadata such as query performance (e.g., search duration, error rates) to monitor the application's health and functionality. This information helps us better understand how Sourcebot is used and where improvements can be made :)
+ ``` bash
+ docker run \
+ -p 3000:3000 \
+ --pull=always \
+ --rm \
+ -v $(pwd):/data \
+ -e CONFIG_PATH=/data/config.json \
+ --name sourcebot \
+ ghcr.io/sourcebot-dev/sourcebot:latest
+ ```
-If you'd like to disable all telemetry, you can do so by setting the environment variable `SOURCEBOT_TELEMETRY_DISABLED` to `1` in the docker run command:
+ Navigate to `localhost:3000` to start searching the Sourcebot repo.
+
+
+ **This command**:
+ - pulls the latest version of the `sourcebot` docker image.
+ - mounts the working directory to `/data` in the container to allow Sourcebot to persist data across restarts, and to access the `config.json`. In your local directory, you should see a `.sourcebot` folder created that contains all persistent data.
+ - runs any pending database migrations.
+ - starts up all services, including the webserver exposed on port 3000.
+ - reads `config.json` and starts syncing.
+
+
+ Hit an issue? Please let us know on [GitHub discussions](https://github.com/sourcebot-dev/sourcebot/discussions/categories/support) or by [emailing us](mailto:team@sourcebot.dev).
+
+
+
+ Sourcebot supports indexing public & private code on the following code hosts:
+
+
+
+
+
+
+
+
+ Missing your code host? [Submit a feature request on GitHub](https://github.com/sourcebot-dev/sourcebot/discussions/categories/ideas).
+
+
+
+## Architecture
+
+Sourcebot is shipped as a single docker container that runs a collection of services using [supervisord](https://supervisord.org/):
+
+
+
+{/*TODO: outline the different services, how Sourcebot communicates with code hosts, and the different*/}
+
+Sourcebot consists of the following components:
+- **Web Server** : main Next.js web application serving the Sourcebot UI.
+- **Backend Worker** : Node.js process that incrementally syncs with code hosts (e.g., GitHub, GitLab etc.) and asynchronously indexes configured repositories.
+- **Zoekt** : the [open-source](https://github.com/sourcegraph/zoekt), trigram indexing code search engine that powers Sourcebot under the hood.
+- **Postgres** : transactional database for storing business-logic data.
+- **Redis Job Queue** : fast in-memory store. Used with [BullMQ](https://docs.bullmq.io/) for queuing asynchronous work.
+- **`.sourcebot/` cache** : file-system cache where persistent data is written.
+
+You can use managed Redis / Postgres services that run outside of the Sourcebot container by providing the `REDIS_URL` and `DATABASE_URL` environment variables, respectively. See the [configuration](/self-hosting/configuration) for more configuration options.
+
+## Scalability
+
+One of our design philosophies for Sourcebot is to keep our infrastructure [radically simple](https://www.radicalsimpli.city/) while balancing scalability concerns. Depending on the number of repositories you have indexed and the instance you are running Sourcebot on, you may experience slow search times or other performance degradations. Our recommendation is to vertically scale your instance by increasing the number of CPU cores and memory.
+
+Sourcebot does not support horizontal scaling at this time, but it is on our roadmap. If this is something your team would be interested in, please contact us at [team@sourcebot.dev](mailto:team@sourcebot.dev).
+
+
+## Telemetry
+By default, Sourcebot collects anonymized usage data through [PostHog](https://posthog.com/) to help us improve the performance and reliability of our tool. We don't collect or transmit any information related to your codebase. In addition, all events are [sanitized](https://github.com/sourcebot-dev/sourcebot/blob/HEAD/packages/web/src/app/posthogProvider.tsx) to ensure that no sensitive details (ex. ip address, query info) leave your machine.
+
+The data we collect includes general usage statistics and metadata such as query performance (e.g., search duration, error rates) to monitor the application's health and functionality. This information helps us better understand how Sourcebot is used and where improvements can be made.
+
+If you'd like to disable all telemetry, you can do so by setting the environment variable `SOURCEBOT_TELEMETRY_DISABLED` to `true`:
```bash
-docker run -e SOURCEBOT_TELEMETRY_DISABLED=1 /* additional args */ ghcr.io/sourcebot-dev/sourcebot:latest
+docker run \
+ -e SOURCEBOT_TELEMETRY_DISABLED=true \
+ /* additional args */ \
+ ghcr.io/sourcebot-dev/sourcebot:latest
```
\ No newline at end of file
diff --git a/docs/self-hosting/upgrade/v2-to-v3-guide.mdx b/docs/self-hosting/upgrade/v2-to-v3-guide.mdx
new file mode 100644
index 00000000..898c7af2
--- /dev/null
+++ b/docs/self-hosting/upgrade/v2-to-v3-guide.mdx
@@ -0,0 +1,93 @@
+---
+title: V2 to V3 Guide
+sidebarTitle: V2 to V3 guide
+---
+
+This guide will walk you through upgrading your Sourcebot deployment from v2 to v3.
+
+
+Please note that the following features are no longer supported in v3:
+- Local file indexing
+- Raw remote `.git` repo indexing (i.e. not through a supported code host)
+
+If your deployment is dependent on these features, please [reach out](https://github.com/sourcebot-dev/sourcebot/discussions).
+
+
+This migration will require you to reindex all your repos
+
+
+
+
+
+
+
+ The main change between the v3 and v2 schemas is how the data is structured. In v2, you defined a `repos` array which contained unnamed config objects:
+
+ ```json
+ {
+ "$schema": "./schemas/v2/index.json",
+ "repos": [
+ {
+ "type": "github",
+ "repos": [
+ "sourcebot-dev/sourcebot"
+ ]
+ },
+ {
+ "type": "gitlab":
+ "groups": [
+ "wireshark"
+ ]
+ }
+ ]
+ }
+ ```
+
+ In v3, you define a `connections` map which contains named `connection` objects:
+ ```json
+ {
+ "$schema": "./schemas/v3/index.json",
+ "connections": {
+ "sourcebot-connection": {
+ "type": "github",
+ "repos": [
+ "sourcebot-dev/sourcebot"
+ ]
+ },
+ "wireshark-connection": {
+ "type": "gitlab":
+ "groups": [
+ "wireshark
+ ]
+ }
+ }
+ }
+ ```
+
+ The schema of the connections defined here is the same as the "repos" you defined in the v2 schema. Some helpful notes:
+
+ - The name of the connection (`sourcebot-connection` and `wireshark-connection` above) is only used to identify the connection in Sourcebot. It can be any string that contains letters, digits, hyphens, or underscores
+ - A connection is associated with one and only one code host platform, and this must be specified in the connections `type` field
+ - Make sure you update the `$schema` field to point to the v3 schema
+ - The `settings` object doesn't need to be changed. We've added new settings params (check out the v3 schema for more details)
+
+
+ When you start up your Sourcebot deployment, it will create a fresh cache and begin indexing against your new v3 configuration file.
+
+ If there are issues with your configuration file it will provide an error in the console.
+ After updating your configuration file, restart your Sourcebot deployment to pick up the new changes.
+
+
+ Congrats, you've successfully migrated to v3! Please let us know what you think of the new features by reaching out on our [discord](https://discord.gg/6Fhp27x7Pb) or [GitHub discussion](https://github.com/sourcebot-dev/sourcebot/discussions/categories/support)
+
+
+
+## Troubleshooting
+
+Some things to check:
+
+- Make sure you update the `$schema` field in the configuration file to point to the v3 schema
+- Make sure you have a name for each `connection`, and that the name only contains letters, digits, hyphens, or underscores
+- Make sure each `connection` has a `type` field with a valid value (`gitlab`, `github`, `gitea`, `gerrit`)
+
+Having troubles migrating from v2 to v3? Reach out to us on [discord](https://discord.gg/6Fhp27x7Pb) or [GitHub discussion](https://github.com/sourcebot-dev/sourcebot/discussions/categories/support) and we'll try our best to help
\ No newline at end of file
diff --git a/entrypoint.sh b/entrypoint.sh
index a0eac049..c7fb543e 100644
--- a/entrypoint.sh
+++ b/entrypoint.sh
@@ -29,11 +29,11 @@ if [ ! -d "$DATA_CACHE_DIR" ]; then
mkdir -p "$DATA_CACHE_DIR"
fi
-# Check if DB_DATA_DIR exists, if not initialize it
-if [ ! -d "$DB_DATA_DIR" ]; then
- echo -e "\e[34m[Info] Initializing database at $DB_DATA_DIR...\e[0m"
- mkdir -p $DB_DATA_DIR && chown -R postgres:postgres "$DB_DATA_DIR"
- su postgres -c "initdb -D $DB_DATA_DIR"
+# Check if DATABASE_DATA_DIR exists, if not initialize it
+if [ ! -d "$DATABASE_DATA_DIR" ]; then
+ echo -e "\e[34m[Info] Initializing database at $DATABASE_DATA_DIR...\e[0m"
+ mkdir -p $DATABASE_DATA_DIR && chown -R postgres:postgres "$DATABASE_DATA_DIR"
+ su postgres -c "initdb -D $DATABASE_DATA_DIR"
fi
# Create the redis data directory if it doesn't exist
@@ -130,20 +130,20 @@ echo "{\"version\": \"$NEXT_PUBLIC_SOURCEBOT_VERSION\", \"install_id\": \"$SOURC
# Start the database and wait for it to be ready before starting any other service
if [ "$DATABASE_URL" = "postgresql://postgres@localhost:5432/sourcebot" ]; then
- su postgres -c "postgres -D $DB_DATA_DIR" &
+ su postgres -c "postgres -D $DATABASE_DATA_DIR" &
until pg_isready -h localhost -p 5432 -U postgres; do
echo -e "\e[34m[Info] Waiting for the database to be ready...\e[0m"
sleep 1
done
# Check if the database already exists, and create it if it dne
- EXISTING_DB=$(psql -U postgres -tAc "SELECT 1 FROM pg_database WHERE datname = '$DB_NAME'")
+ EXISTING_DB=$(psql -U postgres -tAc "SELECT 1 FROM pg_database WHERE datname = 'sourcebot'")
if [ "$EXISTING_DB" = "1" ]; then
- echo "Database '$DB_NAME' already exists; skipping creation."
+ echo "Database 'sourcebot' already exists; skipping creation."
else
- echo "Creating database '$DB_NAME'..."
- psql -U postgres -c "CREATE DATABASE \"$DB_NAME\""
+ echo "Creating database 'sourcebot'..."
+ psql -U postgres -c "CREATE DATABASE \"sourcebot\""
fi
fi
diff --git a/packages/web/src/actions.ts b/packages/web/src/actions.ts
index e6341125..93a94500 100644
--- a/packages/web/src/actions.ts
+++ b/packages/web/src/actions.ts
@@ -684,7 +684,7 @@ export const createInvites = async (emails: string[], domain: string): Promise<{
});
// Send invites to recipients
- if (env.SMTP_CONNECTION_URL && env.EMAIL_FROM) {
+ if (env.SMTP_CONNECTION_URL && env.EMAIL_FROM_ADDRESS) {
const origin = (await headers()).get('origin')!;
await Promise.all(emails.map(async (email) => {
const invite = await prisma.invite.findUnique({
@@ -727,7 +727,7 @@ export const createInvites = async (emails: string[], domain: string): Promise<{
const result = await transport.sendMail({
to: email,
- from: env.EMAIL_FROM,
+ from: env.EMAIL_FROM_ADDRESS,
subject: `Join ${invite.org.name} on Sourcebot`,
html,
text: `Join ${invite.org.name} on Sourcebot by clicking here: ${inviteLink}`,
diff --git a/packages/web/src/auth.ts b/packages/web/src/auth.ts
index 82e7850d..4aa8c497 100644
--- a/packages/web/src/auth.ts
+++ b/packages/web/src/auth.ts
@@ -50,10 +50,10 @@ export const getProviders = () => {
}));
}
- if (env.SMTP_CONNECTION_URL && env.EMAIL_FROM) {
+ if (env.SMTP_CONNECTION_URL && env.EMAIL_FROM_ADDRESS) {
providers.push(EmailProvider({
server: env.SMTP_CONNECTION_URL,
- from: env.EMAIL_FROM,
+ from: env.EMAIL_FROM_ADDRESS,
maxAge: 60 * 10,
generateVerificationToken: async () => {
const token = String(Math.floor(100000 + Math.random() * 900000));
diff --git a/packages/web/src/env.mjs b/packages/web/src/env.mjs
index 8b0f99eb..45fdbac1 100644
--- a/packages/web/src/env.mjs
+++ b/packages/web/src/env.mjs
@@ -29,7 +29,7 @@ export const env = createEnv({
// Email
SMTP_CONNECTION_URL: z.string().url().optional(),
- EMAIL_FROM: z.string().email().optional(),
+ EMAIL_FROM_ADDRESS: z.string().email().optional(),
// Stripe
STRIPE_SECRET_KEY: z.string().optional(),