From 52a751bc3756104956f9b80300feb20c854d87eb Mon Sep 17 00:00:00 2001
From: Andrew Thornton <art27@cantab.net>
Date: Mon, 8 Nov 2021 19:15:31 +0000
Subject: [PATCH 1/8] Add .gitattribute assisted language detection to blame,
 diff and render

Use check attribute code to check the assigned language of a file and send that in to
chroma as a hint for the language of the file.

Signed-off-by: Andrew Thornton <art27@cantab.net>
---
 modules/git/repo_attribute.go              | 17 +++++++-
 modules/git/repo_index.go                  | 12 +++---
 modules/git/repo_language_stats_gogit.go   | 41 ++++++++------------
 modules/git/repo_language_stats_nogogit.go | 45 +++++++++-------------
 modules/highlight/highlight.go             | 29 ++++++++++----
 modules/highlight/highlight_test.go        |  2 +-
 modules/indexer/code/search.go             |  2 +-
 routers/web/repo/blame.go                  | 22 ++++++++++-
 routers/web/repo/view.go                   | 22 ++++++++++-
 services/gitdiff/gitdiff.go                | 43 +++++++++++----------
 services/gitdiff/gitdiff_test.go           |  2 +-
 11 files changed, 147 insertions(+), 90 deletions(-)

diff --git a/modules/git/repo_attribute.go b/modules/git/repo_attribute.go
index aace64425388a..88fb7810a6637 100644
--- a/modules/git/repo_attribute.go
+++ b/modules/git/repo_attribute.go
@@ -22,6 +22,8 @@ type CheckAttributeOpts struct {
 	AllAttributes bool
 	Attributes    []string
 	Filenames     []string
+	IndexFile     string
+	WorkTree      string
 }
 
 // CheckAttribute return the Blame object of file
@@ -31,6 +33,19 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[
 		return nil, fmt.Errorf("git version missing: %v", err)
 	}
 
+	env := []string{}
+
+	if len(opts.IndexFile) > 0 && CheckGitVersionAtLeast("1.7.8") == nil {
+		env = append(env, "GIT_INDEX_FILE="+opts.IndexFile)
+	}
+	if len(opts.WorkTree) > 0 && CheckGitVersionAtLeast("1.7.8") == nil {
+		env = append(env, "GIT_WORK_TREE="+opts.WorkTree)
+	}
+
+	if len(env) > 0 {
+		env = append(os.Environ(), env...)
+	}
+
 	stdOut := new(bytes.Buffer)
 	stdErr := new(bytes.Buffer)
 
@@ -61,7 +76,7 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[
 
 	cmd := NewCommand(cmdArgs...)
 
-	if err := cmd.RunInDirPipeline(repo.Path, stdOut, stdErr); err != nil {
+	if err := cmd.RunInDirTimeoutEnvPipeline(env, -1, repo.Path, stdOut, stdErr); err != nil {
 		return nil, fmt.Errorf("failed to run check-attr: %v\n%s\n%s", err, stdOut.String(), stdErr.String())
 	}
 
diff --git a/modules/git/repo_index.go b/modules/git/repo_index.go
index 27cb7fbebe191..38c01295b67cc 100644
--- a/modules/git/repo_index.go
+++ b/modules/git/repo_index.go
@@ -8,6 +8,7 @@ import (
 	"bytes"
 	"context"
 	"os"
+	"path/filepath"
 	"strings"
 
 	"code.gitea.io/gitea/modules/log"
@@ -45,14 +46,15 @@ func (repo *Repository) readTreeToIndex(id SHA1, indexFilename ...string) error
 }
 
 // ReadTreeToTemporaryIndex reads a treeish to a temporary index file
-func (repo *Repository) ReadTreeToTemporaryIndex(treeish string) (filename string, cancel context.CancelFunc, err error) {
-	tmpIndex, err := os.CreateTemp("", "index")
+func (repo *Repository) ReadTreeToTemporaryIndex(treeish string) (filename, tmpDir string, cancel context.CancelFunc, err error) {
+	tmpDir, err = os.MkdirTemp("", "index")
 	if err != nil {
 		return
 	}
-	filename = tmpIndex.Name()
+
+	filename = filepath.Join(tmpDir, ".tmp-index")
 	cancel = func() {
-		err := util.Remove(filename)
+		err := util.RemoveAll(tmpDir)
 		if err != nil {
 			log.Error("failed to remove tmp index file: %v", err)
 		}
@@ -60,7 +62,7 @@ func (repo *Repository) ReadTreeToTemporaryIndex(treeish string) (filename strin
 	err = repo.ReadTreeToIndex(treeish, filename)
 	if err != nil {
 		defer cancel()
-		return "", func() {}, err
+		return "", "", func() {}, err
 	}
 	return
 }
diff --git a/modules/git/repo_language_stats_gogit.go b/modules/git/repo_language_stats_gogit.go
index d37827c3de6fc..726243883c6c3 100644
--- a/modules/git/repo_language_stats_gogit.go
+++ b/modules/git/repo_language_stats_gogit.go
@@ -51,32 +51,25 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
 		indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
 		if err == nil {
 			defer deleteTemporaryFile()
-			tmpWorkTree, err := os.MkdirTemp("", "empty-work-dir")
-			if err == nil {
-				defer func() {
-					_ = util.RemoveAll(tmpWorkTree)
+			checker = &CheckAttributeReader{
+				Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"},
+				Repo:       repo,
+				IndexFile:  indexFilename,
+				WorkTree:   filepath.Base(indexFilename),
+			}
+			ctx, cancel := context.WithCancel(DefaultContext)
+			if err := checker.Init(ctx); err != nil {
+				log.Error("Unable to open checker for %s. Error: %v", commitID, err)
+			} else {
+				go func() {
+					err = checker.Run()
+					if err != nil {
+						log.Error("Unable to open checker for %s. Error: %v", commitID, err)
+						cancel()
+					}
 				}()
-
-				checker = &CheckAttributeReader{
-					Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"},
-					Repo:       repo,
-					IndexFile:  indexFilename,
-					WorkTree:   tmpWorkTree,
-				}
-				ctx, cancel := context.WithCancel(DefaultContext)
-				if err := checker.Init(ctx); err != nil {
-					log.Error("Unable to open checker for %s. Error: %v", commitID, err)
-				} else {
-					go func() {
-						err = checker.Run()
-						if err != nil {
-							log.Error("Unable to open checker for %s. Error: %v", commitID, err)
-							cancel()
-						}
-					}()
-				}
-				defer cancel()
 			}
+			defer cancel()
 		}
 	}
 
diff --git a/modules/git/repo_language_stats_nogogit.go b/modules/git/repo_language_stats_nogogit.go
index 06269a466c72c..cb4fa0450adff 100644
--- a/modules/git/repo_language_stats_nogogit.go
+++ b/modules/git/repo_language_stats_nogogit.go
@@ -13,11 +13,9 @@ import (
 	"context"
 	"io"
 	"math"
-	"os"
 
 	"code.gitea.io/gitea/modules/analyze"
 	"code.gitea.io/gitea/modules/log"
-	"code.gitea.io/gitea/modules/util"
 
 	"github.com/go-enry/go-enry/v2"
 )
@@ -68,35 +66,28 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
 	var checker *CheckAttributeReader
 
 	if CheckGitVersionAtLeast("1.7.8") == nil {
-		indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
+		indexFilename, worktree, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
 		if err == nil {
 			defer deleteTemporaryFile()
-			tmpWorkTree, err := os.MkdirTemp("", "empty-work-dir")
-			if err == nil {
-				defer func() {
-					_ = util.RemoveAll(tmpWorkTree)
+			checker = &CheckAttributeReader{
+				Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"},
+				Repo:       repo,
+				IndexFile:  indexFilename,
+				WorkTree:   worktree,
+			}
+			ctx, cancel := context.WithCancel(DefaultContext)
+			if err := checker.Init(ctx); err != nil {
+				log.Error("Unable to open checker for %s. Error: %v", commitID, err)
+			} else {
+				go func() {
+					err = checker.Run()
+					if err != nil {
+						log.Error("Unable to open checker for %s. Error: %v", commitID, err)
+						cancel()
+					}
 				}()
-
-				checker = &CheckAttributeReader{
-					Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"},
-					Repo:       repo,
-					IndexFile:  indexFilename,
-					WorkTree:   tmpWorkTree,
-				}
-				ctx, cancel := context.WithCancel(DefaultContext)
-				if err := checker.Init(ctx); err != nil {
-					log.Error("Unable to open checker for %s. Error: %v", commitID, err)
-				} else {
-					go func() {
-						err = checker.Run()
-						if err != nil {
-							log.Error("Unable to open checker for %s. Error: %v", commitID, err)
-							cancel()
-						}
-					}()
-				}
-				defer cancel()
 			}
+			defer cancel()
 		}
 	}
 
diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go
index 6684fbe842910..f6dd969fe3bed 100644
--- a/modules/highlight/highlight.go
+++ b/modules/highlight/highlight.go
@@ -54,7 +54,7 @@ func NewContext() {
 }
 
 // Code returns a HTML version of code string with chroma syntax highlighting classes
-func Code(fileName, code string) string {
+func Code(fileName, language, code string) string {
 	NewContext()
 
 	// diff view newline will be passed as empty, change to literal \n so it can be copied
@@ -68,9 +68,16 @@ func Code(fileName, code string) string {
 	}
 
 	var lexer chroma.Lexer
-	if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
-		//use mapped value to find lexer
-		lexer = lexers.Get(val)
+
+	if len(language) > 0 {
+		lexer = lexers.Get(language)
+	}
+
+	if lexer == nil {
+		if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
+			//use mapped value to find lexer
+			lexer = lexers.Get(val)
+		}
 	}
 
 	if lexer == nil {
@@ -118,7 +125,7 @@ func CodeFromLexer(lexer chroma.Lexer, code string) string {
 }
 
 // File returns a slice of chroma syntax highlighted lines of code
-func File(numLines int, fileName string, code []byte) []string {
+func File(numLines int, fileName, language string, code []byte) []string {
 	NewContext()
 
 	if len(code) > sizeLimit {
@@ -138,8 +145,16 @@ func File(numLines int, fileName string, code []byte) []string {
 	htmlw := bufio.NewWriter(&htmlbuf)
 
 	var lexer chroma.Lexer
-	if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
-		lexer = lexers.Get(val)
+
+	// linguist-language overrides everything
+	if len(language) > 0 {
+		lexer = lexers.Get(language)
+	}
+
+	if lexer == nil {
+		if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
+			lexer = lexers.Get(val)
+		}
 	}
 
 	if lexer == nil {
diff --git a/modules/highlight/highlight_test.go b/modules/highlight/highlight_test.go
index 0a67e4c602697..0686c9bb1a9f9 100644
--- a/modules/highlight/highlight_test.go
+++ b/modules/highlight/highlight_test.go
@@ -95,7 +95,7 @@ steps:
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			if got := File(tt.numLines, tt.fileName, []byte(tt.code)); !reflect.DeepEqual(got, tt.want) {
+			if got := File(tt.numLines, tt.fileName, "", []byte(tt.code)); !reflect.DeepEqual(got, tt.want) {
 				t.Errorf("File() = %v, want %v", got, tt.want)
 			}
 		})
diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go
index 51b7c9427d2d9..bb8dcf16b3f49 100644
--- a/modules/indexer/code/search.go
+++ b/modules/indexer/code/search.go
@@ -101,7 +101,7 @@ func searchResult(result *SearchResult, startIndex, endIndex int) (*Result, erro
 		Language:       result.Language,
 		Color:          result.Color,
 		LineNumbers:    lineNumbers,
-		FormattedLines: highlight.Code(result.Filename, formattedLinesBuffer.String()),
+		FormattedLines: highlight.Code(result.Filename, "", formattedLinesBuffer.String()),
 	}, nil
 }
 
diff --git a/routers/web/repo/blame.go b/routers/web/repo/blame.go
index 3632d1846eaeb..365c33f9da6fa 100644
--- a/routers/web/repo/blame.go
+++ b/routers/web/repo/blame.go
@@ -15,6 +15,7 @@ import (
 	"code.gitea.io/gitea/modules/context"
 	"code.gitea.io/gitea/modules/git"
 	"code.gitea.io/gitea/modules/highlight"
+	"code.gitea.io/gitea/modules/log"
 	"code.gitea.io/gitea/modules/templates"
 	"code.gitea.io/gitea/modules/timeutil"
 )
@@ -202,6 +203,25 @@ func processBlameParts(ctx *context.Context, blameParts []git.BlamePart) (map[st
 func renderBlame(ctx *context.Context, blameParts []git.BlamePart, commitNames map[string]*models.UserCommit, previousCommits map[string]string) {
 	repoLink := ctx.Repo.RepoLink
 
+	language := ""
+
+	indexFilename, worktree, deleteTemporaryFile, err := ctx.Repo.GitRepo.ReadTreeToTemporaryIndex(ctx.Repo.CommitID)
+	if err == nil {
+		defer deleteTemporaryFile()
+
+		filename2attribute2info, err := ctx.Repo.GitRepo.CheckAttribute(git.CheckAttributeOpts{
+			CachedOnly: true,
+			Attributes: []string{"linguist-language"},
+			Filenames:  []string{ctx.Repo.TreePath},
+			IndexFile:  indexFilename,
+			WorkTree:   worktree,
+		})
+		if err != nil {
+			log.Error("Unable to load attributes for %-v:%s. Error: %v", ctx.Repo.Repository, ctx.Repo.TreePath, err)
+		}
+
+		language = filename2attribute2info[ctx.Repo.TreePath]["linguist-language"]
+	}
 	var lines = make([]string, 0)
 	rows := make([]*blameRow, 0)
 
@@ -246,7 +266,7 @@ func renderBlame(ctx *context.Context, blameParts []git.BlamePart, commitNames m
 				line += "\n"
 			}
 			fileName := fmt.Sprintf("%v", ctx.Data["FileName"])
-			line = highlight.Code(fileName, line)
+			line = highlight.Code(fileName, language, line)
 
 			br.Code = gotemplate.HTML(line)
 			rows = append(rows, br)
diff --git a/routers/web/repo/view.go b/routers/web/repo/view.go
index 90be631c73499..37fe942ac7d70 100644
--- a/routers/web/repo/view.go
+++ b/routers/web/repo/view.go
@@ -501,7 +501,27 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st
 			lineNums := linesBytesCount(buf)
 			ctx.Data["NumLines"] = strconv.Itoa(lineNums)
 			ctx.Data["NumLinesSet"] = true
-			ctx.Data["FileContent"] = highlight.File(lineNums, blob.Name(), buf)
+
+			language := ""
+
+			indexFilename, worktree, deleteTemporaryFile, err := ctx.Repo.GitRepo.ReadTreeToTemporaryIndex(ctx.Repo.CommitID)
+			if err == nil {
+				defer deleteTemporaryFile()
+
+				filename2attribute2info, err := ctx.Repo.GitRepo.CheckAttribute(git.CheckAttributeOpts{
+					CachedOnly: true,
+					Attributes: []string{"linguist-language"},
+					Filenames:  []string{ctx.Repo.TreePath},
+					IndexFile:  indexFilename,
+					WorkTree:   worktree,
+				})
+				if err != nil {
+					log.Error("Unable to load attributes for %-v:%s. Error: %v", ctx.Repo.Repository, ctx.Repo.TreePath, err)
+				}
+
+				language = filename2attribute2info[ctx.Repo.TreePath]["linguist-language"]
+			}
+			ctx.Data["FileContent"] = highlight.File(lineNums, blob.Name(), language, buf)
 		}
 		if !isLFSFile {
 			if ctx.Repo.CanEnableEditor() {
diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go
index 614f8104ecaf2..dec380423830a 100644
--- a/services/gitdiff/gitdiff.go
+++ b/services/gitdiff/gitdiff.go
@@ -31,7 +31,6 @@ import (
 	"code.gitea.io/gitea/modules/log"
 	"code.gitea.io/gitea/modules/process"
 	"code.gitea.io/gitea/modules/setting"
-	"code.gitea.io/gitea/modules/util"
 
 	"github.com/sergi/go-diff/diffmatchpatch"
 	stdcharset "golang.org/x/net/html/charset"
@@ -178,6 +177,7 @@ func getLineContent(content string) string {
 
 // DiffSection represents a section of a DiffFile.
 type DiffSection struct {
+	file     *DiffFile
 	FileName string
 	Name     string
 	Lines    []*DiffLine
@@ -546,6 +546,11 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) tem
 		diff2           string
 	)
 
+	language := ""
+	if diffSection.file != nil {
+		language = diffSection.file.Language
+	}
+
 	// try to find equivalent diff line. ignore, otherwise
 	switch diffLine.Type {
 	case DiffLineSection:
@@ -553,25 +558,25 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) tem
 	case DiffLineAdd:
 		compareDiffLine = diffSection.GetLine(DiffLineDel, diffLine.RightIdx)
 		if compareDiffLine == nil {
-			return template.HTML(highlight.Code(diffSection.FileName, diffLine.Content[1:]))
+			return template.HTML(highlight.Code(diffSection.FileName, language, diffLine.Content[1:]))
 		}
 		diff1 = compareDiffLine.Content
 		diff2 = diffLine.Content
 	case DiffLineDel:
 		compareDiffLine = diffSection.GetLine(DiffLineAdd, diffLine.LeftIdx)
 		if compareDiffLine == nil {
-			return template.HTML(highlight.Code(diffSection.FileName, diffLine.Content[1:]))
+			return template.HTML(highlight.Code(diffSection.FileName, language, diffLine.Content[1:]))
 		}
 		diff1 = diffLine.Content
 		diff2 = compareDiffLine.Content
 	default:
 		if strings.IndexByte(" +-", diffLine.Content[0]) > -1 {
-			return template.HTML(highlight.Code(diffSection.FileName, diffLine.Content[1:]))
+			return template.HTML(highlight.Code(diffSection.FileName, language, diffLine.Content[1:]))
 		}
-		return template.HTML(highlight.Code(diffSection.FileName, diffLine.Content))
+		return template.HTML(highlight.Code(diffSection.FileName, language, diffLine.Content))
 	}
 
-	diffRecord := diffMatchPatch.DiffMain(highlight.Code(diffSection.FileName, diff1[1:]), highlight.Code(diffSection.FileName, diff2[1:]), true)
+	diffRecord := diffMatchPatch.DiffMain(highlight.Code(diffSection.FileName, language, diff1[1:]), highlight.Code(diffSection.FileName, language, diff2[1:]), true)
 	diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
 
 	return diffToHTML(diffSection.FileName, diffRecord, diffLine.Type)
@@ -597,6 +602,7 @@ type DiffFile struct {
 	IsProtected             bool
 	IsGenerated             bool
 	IsVendored              bool
+	Language                string
 }
 
 // GetType returns type of diff file.
@@ -1008,7 +1014,7 @@ func parseHunks(curFile *DiffFile, maxLines, maxLineCharacters int, input *bufio
 			line := sb.String()
 
 			// Create a new section to represent this hunk
-			curSection = &DiffSection{}
+			curSection = &DiffSection{file: curFile}
 			lastLeftIdx = -1
 			curFile.Sections = append(curFile.Sections, curSection)
 
@@ -1048,7 +1054,7 @@ func parseHunks(curFile *DiffFile, maxLines, maxLineCharacters int, input *bufio
 			rightLine++
 			if curSection == nil {
 				// Create a new section to represent this hunk
-				curSection = &DiffSection{}
+				curSection = &DiffSection{file: curFile}
 				curFile.Sections = append(curFile.Sections, curSection)
 				lastLeftIdx = -1
 			}
@@ -1074,7 +1080,7 @@ func parseHunks(curFile *DiffFile, maxLines, maxLineCharacters int, input *bufio
 			}
 			if curSection == nil {
 				// Create a new section to represent this hunk
-				curSection = &DiffSection{}
+				curSection = &DiffSection{file: curFile}
 				curFile.Sections = append(curFile.Sections, curSection)
 				lastLeftIdx = -1
 			}
@@ -1094,7 +1100,7 @@ func parseHunks(curFile *DiffFile, maxLines, maxLineCharacters int, input *bufio
 			lastLeftIdx = -1
 			if curSection == nil {
 				// Create a new section to represent this hunk
-				curSection = &DiffSection{}
+				curSection = &DiffSection{file: curFile}
 				curFile.Sections = append(curFile.Sections, curSection)
 			}
 			curSection.Lines = append(curSection.Lines, diffLine)
@@ -1302,23 +1308,15 @@ func GetDiffRangeWithWhitespaceBehavior(gitRepo *git.Repository, beforeCommitID,
 	var checker *git.CheckAttributeReader
 
 	if git.CheckGitVersionAtLeast("1.7.8") == nil {
-		indexFilename, deleteTemporaryFile, err := gitRepo.ReadTreeToTemporaryIndex(afterCommitID)
+		indexFilename, worktree, deleteTemporaryFile, err := gitRepo.ReadTreeToTemporaryIndex(afterCommitID)
 		if err == nil {
 			defer deleteTemporaryFile()
-			workdir, err := os.MkdirTemp("", "empty-work-dir")
-			if err != nil {
-				log.Error("Unable to create temporary directory: %v", err)
-				return nil, err
-			}
-			defer func() {
-				_ = util.RemoveAll(workdir)
-			}()
 
 			checker = &git.CheckAttributeReader{
-				Attributes: []string{"linguist-vendored", "linguist-generated"},
+				Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"},
 				Repo:       gitRepo,
 				IndexFile:  indexFilename,
-				WorkTree:   workdir,
+				WorkTree:   worktree,
 			}
 			ctx, cancel := context.WithCancel(git.DefaultContext)
 			if err := checker.Init(ctx); err != nil {
@@ -1361,6 +1359,9 @@ func GetDiffRangeWithWhitespaceBehavior(gitRepo *git.Repository, beforeCommitID,
 						gotGenerated = generated == "false"
 					}
 				}
+				if language, has := attrs["linguist-language"]; has {
+					diffFile.Language = language
+				}
 			} else {
 				log.Error("Unexpected error: %v", err)
 			}
diff --git a/services/gitdiff/gitdiff_test.go b/services/gitdiff/gitdiff_test.go
index 6decb59b64b5a..50d560ffe536d 100644
--- a/services/gitdiff/gitdiff_test.go
+++ b/services/gitdiff/gitdiff_test.go
@@ -533,7 +533,7 @@ func TestGetDiffRangeWithWhitespaceBehavior(t *testing.T) {
 
 func TestDiffToHTML_14231(t *testing.T) {
 	setting.Cfg = ini.Empty()
-	diffRecord := diffMatchPatch.DiffMain(highlight.Code("main.v", "		run()\n"), highlight.Code("main.v", "		run(db)\n"), true)
+	diffRecord := diffMatchPatch.DiffMain(highlight.Code("main.v", "", "		run()\n"), highlight.Code("main.v", "", "		run(db)\n"), true)
 	diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
 
 	expected := `		<span class="n">run</span><span class="added-code"><span class="o">(</span><span class="n">db</span></span><span class="o">)</span>`

From 25aa1e57516a183a96d94f2a424f125239e1bfcc Mon Sep 17 00:00:00 2001
From: Andrew Thornton <art27@cantab.net>
Date: Mon, 8 Nov 2021 19:57:00 +0000
Subject: [PATCH 2/8] fix gogit build

Signed-off-by: Andrew Thornton <art27@cantab.net>
---
 modules/git/repo_language_stats_gogit.go | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/modules/git/repo_language_stats_gogit.go b/modules/git/repo_language_stats_gogit.go
index 726243883c6c3..03f5d3919e1a6 100644
--- a/modules/git/repo_language_stats_gogit.go
+++ b/modules/git/repo_language_stats_gogit.go
@@ -11,11 +11,10 @@ import (
 	"bytes"
 	"context"
 	"io"
-	"os"
+	"path/filepath"
 
 	"code.gitea.io/gitea/modules/analyze"
 	"code.gitea.io/gitea/modules/log"
-	"code.gitea.io/gitea/modules/util"
 
 	"github.com/go-enry/go-enry/v2"
 	"github.com/go-git/go-git/v5"

From da9329691a9a0d1899988b75f3f0573cf82c5249 Mon Sep 17 00:00:00 2001
From: Andrew Thornton <art27@cantab.net>
Date: Mon, 8 Nov 2021 20:26:01 +0000
Subject: [PATCH 3/8] Add some docs

Signed-off-by: Andrew Thornton <art27@cantab.net>
---
 docs/content/doc/advanced/config-cheat-sheet.en-us.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md
index 3b5d9213df625..b5fb5bf3d5937 100644
--- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md
+++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md
@@ -982,6 +982,14 @@ Multiple sanitisation rules can be defined by adding unique subsections, e.g. `[
 To apply a sanitisation rules only for a specify external renderer they must use the renderer name, e.g. `[markup.sanitizer.asciidoc.rule-1]`.
 If the rule is defined above the renderer ini section or the name does not match a renderer it is applied to every renderer.
 
+## Highlight Mappings (`highlight.mapping`)
+
+- `file_extension e.g. .toml`: **language e.g. ini**. File extension to language mapping overrides.
+
+- Gitea will highlight files using the `linguist-language` attribute from the `.gitattributes` file
+if available. If this is not set or the language is unavailable, the file extension will be looked up
+in this mapping or the filetype using heuristics.
+
 ## Time (`time`)
 
 - `FORMAT`: Time format to display on UI. i.e. RFC1123 or 2006-01-02 15:04:05

From 5d9870f41ed26acbbb40c537209a3ecbe1115443 Mon Sep 17 00:00:00 2001
From: Andrew Thornton <art27@cantab.net>
Date: Tue, 9 Nov 2021 08:48:32 +0000
Subject: [PATCH 4/8] fix gogit again

Signed-off-by: Andrew Thornton <art27@cantab.net>
---
 modules/git/repo_language_stats_gogit.go | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/modules/git/repo_language_stats_gogit.go b/modules/git/repo_language_stats_gogit.go
index 03f5d3919e1a6..e83b2e08b0173 100644
--- a/modules/git/repo_language_stats_gogit.go
+++ b/modules/git/repo_language_stats_gogit.go
@@ -11,7 +11,6 @@ import (
 	"bytes"
 	"context"
 	"io"
-	"path/filepath"
 
 	"code.gitea.io/gitea/modules/analyze"
 	"code.gitea.io/gitea/modules/log"
@@ -47,14 +46,14 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
 	var checker *CheckAttributeReader
 
 	if CheckGitVersionAtLeast("1.7.8") == nil {
-		indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
+		indexFilename, workTree, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
 		if err == nil {
 			defer deleteTemporaryFile()
 			checker = &CheckAttributeReader{
 				Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"},
 				Repo:       repo,
 				IndexFile:  indexFilename,
-				WorkTree:   filepath.Base(indexFilename),
+				WorkTree:   workTree,
 			}
 			ctx, cancel := context.WithCancel(DefaultContext)
 			if err := checker.Init(ctx); err != nil {

From f342cfe6cdcb68fa513c55f1f07d2ca9d3f41b6a Mon Sep 17 00:00:00 2001
From: Andrew Thornton <art27@cantab.net>
Date: Sat, 13 Nov 2021 10:03:25 +0000
Subject: [PATCH 5/8] fix test

Signed-off-by: Andrew Thornton <art27@cantab.net>
---
 modules/repofiles/diff_test.go | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/modules/repofiles/diff_test.go b/modules/repofiles/diff_test.go
index 463ce4ec6744f..4bd1ef6f4dc95 100644
--- a/modules/repofiles/diff_test.go
+++ b/modules/repofiles/diff_test.go
@@ -9,6 +9,7 @@ import (
 
 	"code.gitea.io/gitea/models"
 	"code.gitea.io/gitea/models/unittest"
+	"code.gitea.io/gitea/modules/json"
 	"code.gitea.io/gitea/modules/test"
 	"code.gitea.io/gitea/services/gitdiff"
 
@@ -118,13 +119,21 @@ func TestGetDiffPreview(t *testing.T) {
 	t.Run("with given branch", func(t *testing.T) {
 		diff, err := GetDiffPreview(ctx.Repo.Repository, branch, treePath, content)
 		assert.NoError(t, err)
-		assert.EqualValues(t, expectedDiff, diff)
+		expectedBs, err := json.Marshal(expectedDiff)
+		assert.NoError(t, err)
+		bs, err := json.Marshal(diff)
+		assert.NoError(t, err)
+		assert.EqualValues(t, expectedBs, bs)
 	})
 
 	t.Run("empty branch, same results", func(t *testing.T) {
 		diff, err := GetDiffPreview(ctx.Repo.Repository, "", treePath, content)
 		assert.NoError(t, err)
-		assert.EqualValues(t, expectedDiff, diff)
+		expectedBs, err := json.Marshal(expectedDiff)
+		assert.NoError(t, err)
+		bs, err := json.Marshal(diff)
+		assert.NoError(t, err)
+		assert.EqualValues(t, expectedBs, bs)
 	})
 }
 

From 57088660fbed111e6bb64456b1b2cca6519c64e6 Mon Sep 17 00:00:00 2001
From: Andrew Thornton <art27@cantab.net>
Date: Tue, 16 Nov 2021 15:47:49 +0000
Subject: [PATCH 6/8] handle gitlab-language too

Signed-off-by: Andrew Thornton <art27@cantab.net>
---
 .../content/doc/advanced/config-cheat-sheet.en-us.md |  2 +-
 modules/git/repo_language_stats_gogit.go             | 12 +++++++++++-
 modules/git/repo_language_stats_nogogit.go           | 12 +++++++++++-
 modules/highlight/highlight.go                       |  2 +-
 routers/web/repo/blame.go                            |  8 +++++++-
 routers/web/repo/view.go                             |  8 +++++++-
 services/gitdiff/gitdiff.go                          |  6 ++++--
 7 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md
index b5fb5bf3d5937..a087b253e99ac 100644
--- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md
+++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md
@@ -986,7 +986,7 @@ If the rule is defined above the renderer ini section or the name does not match
 
 - `file_extension e.g. .toml`: **language e.g. ini**. File extension to language mapping overrides.
 
-- Gitea will highlight files using the `linguist-language` attribute from the `.gitattributes` file
+- Gitea will highlight files using the `linguist-language` or `gitlab-language` attribute from the `.gitattributes` file
 if available. If this is not set or the language is unavailable, the file extension will be looked up
 in this mapping or the filetype using heuristics.
 
diff --git a/modules/git/repo_language_stats_gogit.go b/modules/git/repo_language_stats_gogit.go
index e83b2e08b0173..590452f40e5e4 100644
--- a/modules/git/repo_language_stats_gogit.go
+++ b/modules/git/repo_language_stats_gogit.go
@@ -50,7 +50,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
 		if err == nil {
 			defer deleteTemporaryFile()
 			checker = &CheckAttributeReader{
-				Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"},
+				Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language"},
 				Repo:       repo,
 				IndexFile:  indexFilename,
 				WorkTree:   workTree,
@@ -104,6 +104,16 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
 
 					sizes[language] += f.Size
 
+					return nil
+				} else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
+					// group languages, such as Pug -> HTML; SCSS -> CSS
+					group := enry.GetLanguageGroup(language)
+					if len(group) != 0 {
+						language = group
+					}
+
+					sizes[language] += f.Size
+
 					return nil
 				}
 			}
diff --git a/modules/git/repo_language_stats_nogogit.go b/modules/git/repo_language_stats_nogogit.go
index cb4fa0450adff..90e3b7ff91b2e 100644
--- a/modules/git/repo_language_stats_nogogit.go
+++ b/modules/git/repo_language_stats_nogogit.go
@@ -70,7 +70,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
 		if err == nil {
 			defer deleteTemporaryFile()
 			checker = &CheckAttributeReader{
-				Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"},
+				Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language"},
 				Repo:       repo,
 				IndexFile:  indexFilename,
 				WorkTree:   worktree,
@@ -127,9 +127,19 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
 						language = group
 					}
 
+					sizes[language] += f.Size()
+					continue
+				} else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
+					// group languages, such as Pug -> HTML; SCSS -> CSS
+					group := enry.GetLanguageGroup(language)
+					if len(group) != 0 {
+						language = group
+					}
+
 					sizes[language] += f.Size()
 					continue
 				}
+
 			}
 		}
 
diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go
index f6dd969fe3bed..a95676005c692 100644
--- a/modules/highlight/highlight.go
+++ b/modules/highlight/highlight.go
@@ -146,7 +146,7 @@ func File(numLines int, fileName, language string, code []byte) []string {
 
 	var lexer chroma.Lexer
 
-	// linguist-language overrides everything
+	// provided language overrides everything
 	if len(language) > 0 {
 		lexer = lexers.Get(language)
 	}
diff --git a/routers/web/repo/blame.go b/routers/web/repo/blame.go
index 365c33f9da6fa..713499f8cf702 100644
--- a/routers/web/repo/blame.go
+++ b/routers/web/repo/blame.go
@@ -211,7 +211,7 @@ func renderBlame(ctx *context.Context, blameParts []git.BlamePart, commitNames m
 
 		filename2attribute2info, err := ctx.Repo.GitRepo.CheckAttribute(git.CheckAttributeOpts{
 			CachedOnly: true,
-			Attributes: []string{"linguist-language"},
+			Attributes: []string{"linguist-language", "gitlab-language"},
 			Filenames:  []string{ctx.Repo.TreePath},
 			IndexFile:  indexFilename,
 			WorkTree:   worktree,
@@ -221,6 +221,12 @@ func renderBlame(ctx *context.Context, blameParts []git.BlamePart, commitNames m
 		}
 
 		language = filename2attribute2info[ctx.Repo.TreePath]["linguist-language"]
+		if language == "" || language == "unspecified" {
+			language = filename2attribute2info[ctx.Repo.TreePath]["gitlab-language"]
+		}
+		if language == "unspecified" {
+			language = ""
+		}
 	}
 	var lines = make([]string, 0)
 	rows := make([]*blameRow, 0)
diff --git a/routers/web/repo/view.go b/routers/web/repo/view.go
index 2e8822e15cc9e..add35a6d29449 100644
--- a/routers/web/repo/view.go
+++ b/routers/web/repo/view.go
@@ -511,7 +511,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st
 
 				filename2attribute2info, err := ctx.Repo.GitRepo.CheckAttribute(git.CheckAttributeOpts{
 					CachedOnly: true,
-					Attributes: []string{"linguist-language"},
+					Attributes: []string{"linguist-language", "gitlab-language"},
 					Filenames:  []string{ctx.Repo.TreePath},
 					IndexFile:  indexFilename,
 					WorkTree:   worktree,
@@ -521,6 +521,12 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st
 				}
 
 				language = filename2attribute2info[ctx.Repo.TreePath]["linguist-language"]
+				if language == "" || language == "unspecified" {
+					language = filename2attribute2info[ctx.Repo.TreePath]["gitlab-language"]
+				}
+				if language == "unspecified" {
+					language = ""
+				}
 			}
 			ctx.Data["FileContent"] = highlight.File(lineNums, blob.Name(), language, buf)
 		}
diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go
index dec380423830a..33e66e89ec131 100644
--- a/services/gitdiff/gitdiff.go
+++ b/services/gitdiff/gitdiff.go
@@ -1313,7 +1313,7 @@ func GetDiffRangeWithWhitespaceBehavior(gitRepo *git.Repository, beforeCommitID,
 			defer deleteTemporaryFile()
 
 			checker = &git.CheckAttributeReader{
-				Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"},
+				Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language"},
 				Repo:       gitRepo,
 				IndexFile:  indexFilename,
 				WorkTree:   worktree,
@@ -1359,7 +1359,9 @@ func GetDiffRangeWithWhitespaceBehavior(gitRepo *git.Repository, beforeCommitID,
 						gotGenerated = generated == "false"
 					}
 				}
-				if language, has := attrs["linguist-language"]; has {
+				if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" {
+					diffFile.Language = language
+				} else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
 					diffFile.Language = language
 				}
 			} else {

From 7c1718ba4aeea42aa556099f2152066307f71ec3 Mon Sep 17 00:00:00 2001
From: Andrew Thornton <art27@cantab.net>
Date: Tue, 16 Nov 2021 18:33:05 +0000
Subject: [PATCH 7/8] one good deed deserves another...

Signed-off-by: Andrew Thornton <art27@cantab.net>
---
 modules/git/repo_language_stats_gogit.go   | 21 +++++++++++++--------
 modules/git/repo_language_stats_nogogit.go | 21 ++++++++++++++-------
 modules/highlight/highlight.go             |  7 +++++++
 3 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/modules/git/repo_language_stats_gogit.go b/modules/git/repo_language_stats_gogit.go
index 590452f40e5e4..3ef03b8b42ba9 100644
--- a/modules/git/repo_language_stats_gogit.go
+++ b/modules/git/repo_language_stats_gogit.go
@@ -106,15 +106,20 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
 
 					return nil
 				} else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
-					// group languages, such as Pug -> HTML; SCSS -> CSS
-					group := enry.GetLanguageGroup(language)
-					if len(group) != 0 {
-						language = group
+					// strip off a ? if present
+					if idx := strings.IndexByte(language, '?'); idx >= 0 {
+						language = language[:idx]
+					}
+					if len(language) != 0 {
+						// group languages, such as Pug -> HTML; SCSS -> CSS
+						group := enry.GetLanguageGroup(language)
+						if len(group) != 0 {
+							language = group
+						}
+
+						sizes[language] += f.Size()
+						continue
 					}
-
-					sizes[language] += f.Size
-
-					return nil
 				}
 			}
 		}
diff --git a/modules/git/repo_language_stats_nogogit.go b/modules/git/repo_language_stats_nogogit.go
index 90e3b7ff91b2e..4fda7ab6275e9 100644
--- a/modules/git/repo_language_stats_nogogit.go
+++ b/modules/git/repo_language_stats_nogogit.go
@@ -13,6 +13,7 @@ import (
 	"context"
 	"io"
 	"math"
+	"strings"
 
 	"code.gitea.io/gitea/modules/analyze"
 	"code.gitea.io/gitea/modules/log"
@@ -130,14 +131,20 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
 					sizes[language] += f.Size()
 					continue
 				} else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
-					// group languages, such as Pug -> HTML; SCSS -> CSS
-					group := enry.GetLanguageGroup(language)
-					if len(group) != 0 {
-						language = group
+					// strip off a ? if present
+					if idx := strings.IndexByte(language, '?'); idx >= 0 {
+						language = language[:idx]
+					}
+					if len(language) != 0 {
+						// group languages, such as Pug -> HTML; SCSS -> CSS
+						group := enry.GetLanguageGroup(language)
+						if len(group) != 0 {
+							language = group
+						}
+
+						sizes[language] += f.Size()
+						continue
 					}
-
-					sizes[language] += f.Size()
-					continue
 				}
 
 			}
diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go
index a95676005c692..0cae27ae4c126 100644
--- a/modules/highlight/highlight.go
+++ b/modules/highlight/highlight.go
@@ -71,6 +71,13 @@ func Code(fileName, language, code string) string {
 
 	if len(language) > 0 {
 		lexer = lexers.Get(language)
+
+		if lexer == nil {
+			// Attempt stripping off the '?'
+			if idx := strings.IndexByte(language, '?'); idx > 0 {
+				lexer = lexers.Get(language[:idx])
+			}
+		}
 	}
 
 	if lexer == nil {

From 026aad14ee2244ed25b5e9bc292d32664517994f Mon Sep 17 00:00:00 2001
From: Andrew Thornton <art27@cantab.net>
Date: Tue, 16 Nov 2021 20:07:46 +0000
Subject: [PATCH 8/8] fix build

Signed-off-by: Andrew Thornton <art27@cantab.net>
---
 modules/git/repo_language_stats_gogit.go | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/modules/git/repo_language_stats_gogit.go b/modules/git/repo_language_stats_gogit.go
index 3ef03b8b42ba9..037ec41ec6c7a 100644
--- a/modules/git/repo_language_stats_gogit.go
+++ b/modules/git/repo_language_stats_gogit.go
@@ -11,6 +11,7 @@ import (
 	"bytes"
 	"context"
 	"io"
+	"strings"
 
 	"code.gitea.io/gitea/modules/analyze"
 	"code.gitea.io/gitea/modules/log"
@@ -117,8 +118,8 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
 							language = group
 						}
 
-						sizes[language] += f.Size()
-						continue
+						sizes[language] += f.Size
+						return nil
 					}
 				}
 			}