Skip to content

feat(search): support code search by zoekt #33850

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions assets/go-licenses.json

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions custom/conf/app.example.ini
Original file line number Diff line number Diff line change
Expand Up @@ -1494,10 +1494,10 @@ LEVEL = Info
;; If empty then it defaults to `sources` only, as if you'd like to disable fully please see REPO_INDEXER_ENABLED.
;REPO_INDEXER_REPO_TYPES = sources,forks,mirrors,templates
;;
;; Code search engine type, could be `bleve` or `elasticsearch`.
;; Code search engine type, could be `bleve`, `zoekt` or `elasticsearch`.
;REPO_INDEXER_TYPE = bleve
;;
;; Index file used for code search. available when `REPO_INDEXER_TYPE` is bleve
;; Index file used for code search. available when `REPO_INDEXER_TYPE` is bleve or zoekt
;REPO_INDEXER_PATH = indexers/repos.bleve
;;
;; Code indexer connection string, available when `REPO_INDEXER_TYPE` is elasticsearch. i.e. http://elastic:changeme@localhost:9200
Expand All @@ -1507,10 +1507,10 @@ LEVEL = Info
;REPO_INDEXER_NAME = gitea_codes
;;
;; A comma separated list of glob patterns (see https://github.com/gobwas/glob) to include
;; in the index; default is empty
;; in the index; it's not compatible with the `zoekt` indexer type; default is empty
;REPO_INDEXER_INCLUDE =
;;
;; A comma separated list of glob patterns to exclude from the index; ; default is empty
;; A comma separated list of glob patterns to exclude from the index; it's not compatible with the `zoekt` indexer type; default is empty
;REPO_INDEXER_EXCLUDE =
;;
;MAX_FILE_SIZE = 1048576
Expand Down
6 changes: 6 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ require (
github.com/sassoftware/go-rpmutils v0.4.0
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3
github.com/shurcooL/vfsgen v0.0.0-20230704071429-0000e147ea92
github.com/sourcegraph/zoekt v0.0.0-20250407211326-2283cd5f430c
github.com/stretchr/testify v1.10.0
github.com/syndtr/goleveldb v1.0.0
github.com/tstranex/u2f v1.0.0
Expand Down Expand Up @@ -176,6 +177,7 @@ require (
github.com/blevesearch/zapx/v14 v14.3.10 // indirect
github.com/blevesearch/zapx/v15 v15.3.13 // indirect
github.com/blevesearch/zapx/v16 v16.1.5 // indirect
github.com/bmatcuk/doublestar v1.3.4 // indirect
github.com/bmatcuk/doublestar/v4 v4.8.1 // indirect
github.com/boombuler/barcode v1.0.2 // indirect
github.com/bradfitz/gomemcache v0.0.0-20230905024940-24af94b03874 // indirect
Expand Down Expand Up @@ -232,6 +234,8 @@ require (
github.com/gorilla/handlers v1.5.2 // indirect
github.com/gorilla/mux v1.8.1 // indirect
github.com/gorilla/securecookie v1.1.2 // indirect
github.com/grafana/regexp v0.0.0-20240607082908-2cb410fa05da // indirect
github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
github.com/hashicorp/go-multierror v1.1.1 // indirect
Expand Down Expand Up @@ -265,6 +269,7 @@ require (
github.com/oklog/ulid v1.3.1 // indirect
github.com/olekukonko/tablewriter v0.0.5 // indirect
github.com/onsi/ginkgo v1.16.5 // indirect
github.com/opentracing/opentracing-go v1.2.0 // indirect
github.com/pelletier/go-toml/v2 v2.2.3 // indirect
github.com/pierrec/lz4/v4 v4.1.22 // indirect
github.com/pjbgf/sha1cd v0.3.2 // indirect
Expand All @@ -283,6 +288,7 @@ require (
github.com/sirupsen/logrus v1.9.3 // indirect
github.com/skeema/knownhosts v1.3.1 // indirect
github.com/sourcegraph/conc v0.3.0 // indirect
github.com/sourcegraph/go-ctags v0.0.0-20240424152308-4faeee4849da // indirect
github.com/spf13/afero v1.14.0 // indirect
github.com/spf13/cast v1.7.1 // indirect
github.com/spf13/pflag v1.0.6 // indirect
Expand Down
126 changes: 126 additions & 0 deletions go.sum

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions modules/indexer/code/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ func getRepoChanges(ctx context.Context, repo *repo_model.Repository, revision s
needGenesis = len(stdout) == 0
}

// TODO: check if zoekt index file meta status is not sync with db index status, if not, get genesis changes
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we still need this comment?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this check can help ensure the correctness of the index data, but it's not necessary at the moment—it can be added in the future.

//if setting.Indexer.RepoType == "zoekt" {
//}

if needGenesis {
return genesisChanges(ctx, repo, revision)
}
Expand Down
22 changes: 21 additions & 1 deletion modules/indexer/code/indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"code.gitea.io/gitea/modules/indexer/code/bleve"
"code.gitea.io/gitea/modules/indexer/code/elasticsearch"
"code.gitea.io/gitea/modules/indexer/code/internal"
"code.gitea.io/gitea/modules/indexer/code/zoekt"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/process"
"code.gitea.io/gitea/modules/queue"
Expand Down Expand Up @@ -116,7 +117,7 @@ func Init() {

// Create the Queue
switch setting.Indexer.RepoType {
case "bleve", "elasticsearch":
case "bleve", "elasticsearch", "zoekt":
handler := func(items ...*internal.IndexerData) (unhandled []*internal.IndexerData) {
indexer := *globalIndexer.Load()
for _, indexerData := range items {
Expand Down Expand Up @@ -183,6 +184,25 @@ func Init() {
close(waitChannel)
log.Fatal("PID: %d Unable to initialize the elasticsearch Repository Indexer connstr: %s Error: %v", os.Getpid(), setting.Indexer.RepoConnStr, err)
}
case "zoekt":
log.Info("PID: %d Initializing Repository Indexer at: %s", os.Getpid(), setting.Indexer.RepoPath)
defer func() {
if err := recover(); err != nil {
log.Error("PANIC whilst initializing repository indexer: %v\nStacktrace: %s", err, log.Stack(2))
log.Error("The indexer files are likely corrupted and may need to be deleted")
log.Error("You can completely remove the \"%s\" directory to make Gitea recreate the indexes", setting.Indexer.RepoPath)
}
}()

rIndexer = zoekt.NewIndexer(setting.Indexer.RepoPath)
existed, err = rIndexer.Init(ctx)
if err != nil {
cancel()
(*globalIndexer.Load()).Close()
close(waitChannel)

log.Fatal("PID: %d Unable to initialize the zoekt Repository Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.RepoPath, err)
}

default:
log.Fatal("PID: %d Unknown Indexer type: %s", os.Getpid(), setting.Indexer.RepoType)
Expand Down
49 changes: 49 additions & 0 deletions modules/indexer/code/zoekt/utils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package zoekt

import "unicode/utf8"

// Bitmap used by func special to check whether a character needs to be escaped.
var specialBytes [16]byte

// special reports whether byte b needs to be escaped by QuoteMeta.
func special(b byte) bool {
return b < utf8.RuneSelf && specialBytes[b%16]&(1<<(b/16)) != 0
}

func init() {
for _, b := range []byte(`-:\.+*?()|[]{}^$`) {
specialBytes[b%16] |= 1 << (b / 16)
}
}

func QuoteMeta(s string) string {
// A byte loop is correct because all metacharacters are ASCII.
var i int
for i = 0; i < len(s); i++ {
if special(s[i]) {
break
}
}
// No meta characters found, so return original string.
if i >= len(s) {
return s
}

b := make([]byte, 3*len(s)-2*i)
copy(b, s[:i])
j := i
for ; i < len(s); i++ {
if special(s[i]) {
b[j] = '\\'
j++
b[j] = '\\'
j++
}
b[j] = s[i]
j++
}
return string(b[:j])
}
Loading