From 8ccf475e3f8c827ed086ae791008ffb24af33d03 Mon Sep 17 00:00:00 2001
From: Andrew Thornton <art27@cantab.net>
Date: Thu, 29 Oct 2020 22:57:54 +0000
Subject: [PATCH 1/3] When creating line diffs do not split within an html
 entity

Fix #13342

Signed-off-by: Andrew Thornton <art27@cantab.net>
---
 services/gitdiff/gitdiff.go | 84 +++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)

diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go
index 7acf8324c3a5e..50e2f8cf1efeb 100644
--- a/services/gitdiff/gitdiff.go
+++ b/services/gitdiff/gitdiff.go
@@ -290,6 +290,9 @@ func init() {
 	diffMatchPatch.DiffEditCost = 100
 }
 
+var unterminatedEntityRE = regexp.MustCompile(`&[^ ;]*$`)
+var unstartedEntiyRE = regexp.MustCompile(`^[^ ;]*;`)
+
 // GetComputedInlineDiffFor computes inline diff for the given line.
 func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) template.HTML {
 	if setting.Git.DisableDiffHighlight {
@@ -329,9 +332,90 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) tem
 
 	diffRecord := diffMatchPatch.DiffMain(highlight.Code(diffSection.FileName, diff1[1:]), highlight.Code(diffSection.FileName, diff2[1:]), true)
 	diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
+
+	// Now we need to clean up the split entities
+	diffRecord = unsplitEntities(diffRecord)
+	diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
+
 	return diffToHTML(diffSection.FileName, diffRecord, diffLine.Type)
 }
 
+// unsplitEntities looks for broken up html entities. It relies on records being presimplified and the data being passed in being valid html
+func unsplitEntities(records []diffmatchpatch.Diff) []diffmatchpatch.Diff {
+	// Unsplitting entities is simple...
+	//
+	// Iterate through all be the last records because if we're the last record then there's nothing we can do
+	for i := 0; i+1 < len(records); i++ {
+		record := &records[i]
+
+		// Look for an unterminated entity at the end of the line
+		unterminated := unterminatedEntityRE.FindString(record.Text)
+		if len(unterminated) == 0 {
+			continue
+		}
+
+		switch record.Type {
+		case diffmatchpatch.DiffEqual:
+			// If we're an diff equal we want to give this unterminated entity to our next delete and insert
+			record.Text = record.Text[0 : len(record.Text)-len(unterminated)]
+			records[i+1].Text = unterminated + records[i+1].Text
+
+			nextType := records[i+1].Type
+
+			if nextType == diffmatchpatch.DiffEqual {
+				continue
+			}
+
+			// if the next in line is a delete then we will want the thing after that to be an insert and so on.
+			oneAfterType := diffmatchpatch.DiffInsert
+			if nextType == diffmatchpatch.DiffInsert {
+				oneAfterType = diffmatchpatch.DiffDelete
+			}
+
+			if i+2 < len(records) && records[i+2].Type == oneAfterType {
+				records[i+2].Text = unterminated + records[i+2].Text
+			} else {
+				records = append(records[:i+2], append([]diffmatchpatch.Diff{
+					{
+						Type: oneAfterType,
+						Text: unterminated,
+					}}, records[i+2:]...)...)
+			}
+		case diffmatchpatch.DiffDelete:
+			fallthrough
+		case diffmatchpatch.DiffInsert:
+			// if we're an insert or delete we want to claim the terminal bit of the entity from the next equal in line
+			targetType := diffmatchpatch.DiffInsert
+			if record.Type == diffmatchpatch.DiffInsert {
+				targetType = diffmatchpatch.DiffDelete
+			}
+			next := &records[i+1]
+			if next.Type == diffmatchpatch.DiffEqual {
+				// if the next is an equal we need to snaffle the entity end off the start and add an delete/insert
+				if terminal := unstartedEntiyRE.FindString(next.Text); len(terminal) > 0 {
+					record.Text += terminal
+					next.Text = next.Text[len(terminal):]
+					records = append(records[:i+2], append([]diffmatchpatch.Diff{
+						{
+							Type: targetType,
+							Text: unterminated,
+						}}, records[i+2:]...)...)
+				}
+			} else if next.Type == targetType {
+				// if the next is an insert we need to snaffle the entity end off the one after that and add it to both.
+				if i+2 < len(records) && records[i+2].Type == diffmatchpatch.DiffEqual {
+					if terminal := unstartedEntiyRE.FindString(records[i+2].Text); len(terminal) > 0 {
+						record.Text += terminal
+						next.Text += terminal
+						records[i+2].Text = records[i+2].Text[len(terminal):]
+					}
+				}
+			}
+		}
+	}
+	return records
+}
+
 // DiffFile represents a file diff.
 type DiffFile struct {
 	Name               string

From 838757a4e97d83b93a04a1a2cb200f3b5e1e85cb Mon Sep 17 00:00:00 2001
From: Andrew Thornton <art27@cantab.net>
Date: Fri, 30 Oct 2020 17:00:19 +0000
Subject: [PATCH 2/3] Add test case

Signed-off-by: Andrew Thornton <art27@cantab.net>
---
 services/gitdiff/gitdiff_test.go | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/services/gitdiff/gitdiff_test.go b/services/gitdiff/gitdiff_test.go
index e7eeca70045db..b5c43ea00a3d4 100644
--- a/services/gitdiff/gitdiff_test.go
+++ b/services/gitdiff/gitdiff_test.go
@@ -26,6 +26,21 @@ func assertEqual(t *testing.T, s1 string, s2 template.HTML) {
 	}
 }
 
+func TestUnsplitEntities(t *testing.T) {
+	left := "sh &#34;useradd -u 111 jenkins&#34;"
+	right := "sh &#39;useradd -u $(stat -c &#34;%u&#34; .gitignore) jenkins&#39;"
+	diffRecord := diffMatchPatch.DiffMain(left, right, true)
+	diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
+
+	// Now we need to clean up the split entities
+	diffRecord = unsplitEntities(diffRecord)
+	diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
+
+	for _, record := range diffRecord {
+		assert.False(t, unterminatedEntityRE.MatchString(record.Text), "")
+	}
+}
+
 func TestDiffToHTML(t *testing.T) {
 	setting.Cfg = ini.Empty()
 	assertEqual(t, "foo <span class=\"added-code\">bar</span> biz", diffToHTML("", []dmp.Diff{

From ee9067156b7fb3f5d1733ac456572c7145d36e42 Mon Sep 17 00:00:00 2001
From: Andrew Thornton <art27@cantab.net>
Date: Fri, 30 Oct 2020 17:10:21 +0000
Subject: [PATCH 3/3] improve test

Signed-off-by: Andrew Thornton <art27@cantab.net>
---
 services/gitdiff/gitdiff_test.go | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/services/gitdiff/gitdiff_test.go b/services/gitdiff/gitdiff_test.go
index b5c43ea00a3d4..0683997dab8f7 100644
--- a/services/gitdiff/gitdiff_test.go
+++ b/services/gitdiff/gitdiff_test.go
@@ -15,6 +15,7 @@ import (
 	"code.gitea.io/gitea/models"
 	"code.gitea.io/gitea/modules/git"
 	"code.gitea.io/gitea/modules/setting"
+	"github.com/sergi/go-diff/diffmatchpatch"
 	dmp "github.com/sergi/go-diff/diffmatchpatch"
 	"github.com/stretchr/testify/assert"
 	"gopkg.in/ini.v1"
@@ -36,9 +37,23 @@ func TestUnsplitEntities(t *testing.T) {
 	diffRecord = unsplitEntities(diffRecord)
 	diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
 
+	leftRecombined := ""
+	rightRecombined := ""
 	for _, record := range diffRecord {
 		assert.False(t, unterminatedEntityRE.MatchString(record.Text), "")
+		switch record.Type {
+		case diffmatchpatch.DiffDelete:
+			leftRecombined += record.Text
+		case diffmatchpatch.DiffInsert:
+			rightRecombined += record.Text
+		default:
+			leftRecombined += record.Text
+			rightRecombined += record.Text
+		}
 	}
+
+	assert.EqualValues(t, left, leftRecombined)
+	assert.EqualValues(t, right, rightRecombined)
 }
 
 func TestDiffToHTML(t *testing.T) {