Skip to content

fix: revert regressions #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 56 additions & 59 deletions diffmatchpatch/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ const (
DiffInsert Operation = 1
// DiffEqual item represents an equal diff.
DiffEqual Operation = 0
// IndexSeparator is used to seperate the array indexes in an index string
IndexSeparator = ","
)

// Diff represents one diff operation
Expand Down Expand Up @@ -205,7 +203,7 @@ func (dmp *DiffMatchPatch) diffCompute(
// then rediff the parts for greater accuracy. This speedup can produce non-minimal diffs.
func (dmp *DiffMatchPatch) diffLineMode(text1, text2 []rune, deadline time.Time) []Diff {
// Scan the text on a line-by-line basis first.
text1, text2, linearray := dmp.DiffLinesToRunes(string(text1), string(text2))
text1, text2, linearray := dmp.diffLinesToRunes(text1, text2)

diffs := dmp.diffMainRunes(text1, text2, false, deadline)

Expand Down Expand Up @@ -406,28 +404,73 @@ func (dmp *DiffMatchPatch) diffBisectSplit(runes1, runes2 []rune, x, y int,
// a string of hashes where each Unicode character represents one line.
// It's slightly faster to call DiffLinesToRunes first, followed by DiffMainRunes.
func (dmp *DiffMatchPatch) DiffLinesToChars(text1, text2 string) (string, string, []string) {
chars1, chars2, lineArray := dmp.diffLinesToStrings(text1, text2)
return chars1, chars2, lineArray
chars1, chars2, lineArray := dmp.DiffLinesToRunes(text1, text2)
return string(chars1), string(chars2), lineArray
}

// DiffLinesToRunes splits two texts into a list of runes.
// DiffLinesToRunes splits two texts into a list of runes. Each rune represents one line.
func (dmp *DiffMatchPatch) DiffLinesToRunes(text1, text2 string) ([]rune, []rune, []string) {
chars1, chars2, lineArray := dmp.diffLinesToStrings(text1, text2)
return []rune(chars1), []rune(chars2), lineArray
// '\x00' is a valid character, but various debuggers don't like it.
// So we'll insert a junk entry to avoid generating a null character.
lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n'
lineHash := map[string]int{} // e.g. lineHash['Hello\n'] == 4

chars1 := dmp.diffLinesToRunesMunge(text1, &lineArray, lineHash)
chars2 := dmp.diffLinesToRunesMunge(text2, &lineArray, lineHash)

return chars1, chars2, lineArray
}

func (dmp *DiffMatchPatch) diffLinesToRunes(text1, text2 []rune) ([]rune, []rune, []string) {
return dmp.DiffLinesToRunes(string(text1), string(text2))
}

// diffLinesToRunesMunge splits a text into an array of strings, and reduces the texts to a []rune
// where each Unicode character represents one line.
// We use strings instead of []runes as input mainly because you can't use []rune as a map key.
func (dmp *DiffMatchPatch) diffLinesToRunesMunge(
text string,
lineArray *[]string,
lineHash map[string]int,
) []rune {
// Walk the text, pulling out a substring for each line. text.split('\n')
// would would temporarily double our memory footprint. Modifying text would create many large strings to garbage collect.
lineStart := 0
lineEnd := -1
runes := []rune{}

for lineEnd < len(text)-1 {
lineEnd = indexOf(text, "\n", lineStart)

if lineEnd == -1 {
lineEnd = len(text) - 1
}

line := text[lineStart : lineEnd+1]
lineStart = lineEnd + 1
lineValue, ok := lineHash[line]

if ok {
runes = append(runes, rune(lineValue))
} else {
*lineArray = append(*lineArray, line)
lineHash[line] = len(*lineArray) - 1
runes = append(runes, rune(len(*lineArray)-1))
}
}

return runes
}

// DiffCharsToLines rehydrates the text in a diff from a string of line hashes to real lines of text.
func (dmp *DiffMatchPatch) DiffCharsToLines(diffs []Diff, lineArray []string) []Diff {
hydrated := make([]Diff, 0, len(diffs))
for _, aDiff := range diffs {
chars := strings.Split(aDiff.Text, IndexSeparator)
chars := aDiff.Text
text := make([]string, len(chars))

for i, r := range chars {
i1, err := strconv.Atoi(r)
if err == nil {
text[i] = lineArray[i1]
}
text[i] = lineArray[r]
}

aDiff.Text = strings.Join(text, "")
Expand Down Expand Up @@ -1331,49 +1374,3 @@ func (dmp *DiffMatchPatch) DiffFromDelta(text1, delta string) (diffs []Diff, err

return diffs, nil
}

// diffLinesToStrings splits two texts into a list of strings. Each string represents one line.
func (dmp *DiffMatchPatch) diffLinesToStrings(text1, text2 string) (string, string, []string) {
// '\x00' is a valid character, but various debuggers don't like it.
// So we'll insert a junk entry to avoid generating a null character.
lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n'

// Each string has the index of lineArray which it points to
strIndexArray1 := dmp.diffLinesToStringsMunge(text1, &lineArray)
strIndexArray2 := dmp.diffLinesToStringsMunge(text2, &lineArray)

return intArrayToString(strIndexArray1), intArrayToString(strIndexArray2), lineArray
}

// diffLinesToStringsMunge splits a text into an array of strings, and reduces the texts to a []string.
func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string) []uint32 {
// Walk the text, pulling out a substring for each line. text.split('\n')
// would would temporarily double our memory footprint.
// Modifying text would create many large strings to garbage collect.
lineHash := map[string]int{} // e.g. lineHash['Hello\n'] == 4
lineStart := 0
lineEnd := -1
strs := []uint32{}

for lineEnd < len(text)-1 {
lineEnd = indexOf(text, "\n", lineStart)

if lineEnd == -1 {
lineEnd = len(text) - 1
}

line := text[lineStart : lineEnd+1]
lineStart = lineEnd + 1
lineValue, ok := lineHash[line]

if ok {
strs = append(strs, uint32(lineValue))
} else {
*lineArray = append(*lineArray, line)
lineHash[line] = len(*lineArray) - 1
strs = append(strs, uint32(len(*lineArray)-1))
}
}

return strs
}
59 changes: 12 additions & 47 deletions diffmatchpatch/diff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ package diffmatchpatch

import (
"fmt"
"io"
"os"
"reflect"
"strconv"
"strings"
Expand Down Expand Up @@ -302,10 +300,10 @@ func TestDiffLinesToChars(t *testing.T) {
dmp := New()

for i, tc := range []TestCase{
{"", "alpha\r\nbeta\r\n\r\n\r\n", "", "1,2,3,3", []string{"", "alpha\r\n", "beta\r\n", "\r\n"}},
{"a", "b", "1", "2", []string{"", "a", "b"}},
{"", "alpha\r\nbeta\r\n\r\n\r\n", "", "\u0001\u0002\u0003\u0003", []string{"", "alpha\r\n", "beta\r\n", "\r\n"}},
{"a", "b", "\u0001", "\u0002", []string{"", "a", "b"}},
// Omit final newline.
{"alpha\nbeta\nalpha", "", "1,2,3", "", []string{"", "alpha\n", "beta\n", "alpha"}},
{"alpha\nbeta\nalpha", "", "\u0001\u0002\u0003", "", []string{"", "alpha\n", "beta\n", "alpha"}},
} {
actualChars1, actualChars2, actualLines := dmp.DiffLinesToChars(tc.Text1, tc.Text2)
assertEqual(t, tc.ExpectedChars1, actualChars1, fmt.Sprintf("Test case #%d, %#v", i, tc))
Expand All @@ -318,14 +316,14 @@ func TestDiffLinesToChars(t *testing.T) {
lineList := []string{
"", // Account for the initial empty element of the lines array.
}
var charList []string
var charList []rune
for x := 1; x < n+1; x++ {
lineList = append(lineList, strconv.Itoa(x)+"\n")
charList = append(charList, strconv.Itoa(x))
charList = append(charList, rune(x))
}
lines := strings.Join(lineList, "")
chars := strings.Join(charList[:], ",")
assertEqual(t, n, len(strings.Split(chars, ",")))
chars := string(charList)
assertEqual(t, n, utf8.RuneCountInString(chars))

actualChars1, actualChars2, actualLines := dmp.DiffLinesToChars(lines, "")
assertEqual(t, chars, actualChars1)
Expand All @@ -345,8 +343,8 @@ func TestDiffCharsToLines(t *testing.T) {
for i, tc := range []TestCase{
{
Diffs: []Diff{
{DiffEqual, "1,2,1"},
{DiffInsert, "2,1,2"},
{DiffEqual, "\u0001\u0002\u0001"},
{DiffInsert, "\u0002\u0001\u0002"},
},
Lines: []string{"", "alpha\n", "beta\n"},

Expand All @@ -365,15 +363,14 @@ func TestDiffCharsToLines(t *testing.T) {
lineList := []string{
"", // Account for the initial empty element of the lines array.
}
charList := []string{}
charList := []rune{}
for x := 1; x <= n; x++ {
lineList = append(lineList, strconv.Itoa(x)+"\n")
charList = append(charList, strconv.Itoa(x))
charList = append(charList, rune(x))
}
assertEqual(t, n, len(charList))
chars := strings.Join(charList[:], ",")

actual := dmp.DiffCharsToLines([]Diff{{DiffDelete, chars}}, lineList)
actual := dmp.DiffCharsToLines([]Diff{{DiffDelete, string(charList)}}, lineList)
assertEqual(t, []Diff{{DiffDelete, strings.Join(lineList, "")}}, actual)
}

Expand Down Expand Up @@ -1507,19 +1504,6 @@ func TestDiffMainWithCheckLines(t *testing.T) {
}
}

func TestMassiveRuneDiffConversion(t *testing.T) {
sNew, err := os.ReadFile("../testdata/fixture.go")
if err != nil {
panic(err)
}

dmp := New()
t1, t2, tt := dmp.DiffLinesToChars("", string(sNew))
diffs := dmp.DiffMain(t1, t2, false)
diffs = dmp.DiffCharsToLines(diffs, tt)
assertEqual(t, true, len(diffs) > 0)
}

func BenchmarkDiffMain(bench *testing.B) {
var r []Diff

Expand Down Expand Up @@ -1579,22 +1563,3 @@ func BenchmarkDiffMainRunesLargeLines(b *testing.B) {

SinkSliceDiff = r
}

func BenchmarkDiffMainRunesLargeDiffLines(b *testing.B) {
var r []Diff

fp, _ := os.Open("../testdata/diff10klinestest.txt")
defer fp.Close()
data, _ := io.ReadAll(fp)
dmp := New()

b.ResetTimer()

for i := 0; i < b.N; i++ {
text1, text2, linearray := dmp.DiffLinesToRunes(string(data), "")
r = dmp.DiffMainRunes(text1, text2, false)
r = dmp.DiffCharsToLines(r, linearray)
}

SinkSliceDiff = r
}
56 changes: 56 additions & 0 deletions diffmatchpatch/patch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -567,3 +567,59 @@ func TestPatchApply(t *testing.T) {
)
}
}

func TestIssues(t *testing.T) {
t.Run("https://github.com/sergi/go-diff/issues/127", func(t *testing.T) {
text1 := `
1111111111111 000000
------------- ------
xxxxxxxxxxxxx ------
xxxxxxxxxxxxx ------
xxxxxxxxxxxxx xxxxxx
xxxxxxxxxxxxx ......
xxxxxxxxxxxxx 111111
xxxxxxxxxxxxx ??????
xxxxxxxxxxxxx 333333
xxxxxxxxxxxxx 555555
xxxxxxxxxx xxxxx
xxxxxxxxxx xxxxx
xxxxxxxxxx xxxxx
xxxxxxxxxx xxxxx
`
text2 := `
2222222222222 000000
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx`

patches := New().PatchMake(text1, text2)
assertEqual(t, 6, len(patches), "Issue https://github.com/sergi/go-diff/issues/127")
})

t.Run("https://github.com/sergi/go-diff/issues/4", func(t *testing.T) {
// doesn't panic
text1 := "1\n2\n3\n4\n5\n6\n7\n3\n8\n9\n3\n10\n3\n11\n3\n12\n13\n14\n15\n12\n13" +
"\n16\n13\n13\n17\n18\n19\n20\n21\n22\n23\n24\n25\n26\n27\n28\n29\n30\n31\n32\n33\n34" +
"\n35\n12\n36\n37\n38\n39\n40\n41\n42\n13\n43\n44\n13\n45\n46\n47\n13\n13\n48\n49\n50" +
"\n51\n52\n13\n53\n54\n55\n56\n57\n58\n59\n60\n61\n62\n63\n64\n65\n66\n67\n68\n69\n13\n" +
"70\n71\n72\n73\n74\n13\n75\n13\n76\n77\n78\n79\n80\n81\n82\n83\n84\n85\n86\n87\n88\n89\n" +
"90\n67\n91\n92\n93\n81\n68\n13\n94\n71\n95\n96\n97\n98\n99\n100\n101\n102\n63\n103\n67\n104" +
"\n105\n13\n106\n107\n108\n109\n110\n111\n112\n113\n114\n115\n90\n116\n67\n13\n117\n72\n73\n" +
"74\n13\n75\n13\n76\n118\n119\n120\n78\n68\n121\n13\n122\n123\n124\n125\n93\n126\n68\n127\n13" +
"\n128\n129\n130\n131\n132\n133\n134\n135\n13\n136\n137\n138\n13\n78\n68\n13\n139\n140\n141\n142" +
"\n68\n13\n143\n144\n145\n146\n13\n147\n148\n13\n149\n150\n151\n152\n153\n150\n154\n13\n155\n156\n"
text2 := "1\n2\n3\n4\n5\n6\n7\n3\n157\n9\n3\n10\n3\n11\n3\n12\n13\n14\n15\n12\n13\n16\n13\n13" +
"\n17\n18\n19\n20\n21\n22\n23\n24\n25\n26\n27\n28\n29\n30\n31\n32\n33\n34\n35\n12\n36\n37\n38\n39\n40" +
"\n41\n42\n13\n158\n159\n13\n45\n46\n47\n13\n13\n48\n49\n50\n51\n13\n53\n54\n55\n56\n57\n160\n59\n60" +
"\n61\n62\n63\n64\n161\n66\n67\n68\n69\n13\n70\n71\n72\n73\n74\n13\n75\n13\n162\n77\n78\n79\n80\n81\n" +
"82\n83\n84\n85\n86\n88\n89\n90\n67\n91\n92\n93\n81\n68\n13\n94\n71\n95\n96\n97\n98\n99\n100\n101\n102" +
"\n63\n103\n67\n104\n105\n13\n106\n107\n108\n109\n110\n111\n112\n113\n114\n115\n90\n116\n67\n13\n117\n72" +
"\n73\n74\n13\n75\n13\n163\n119\n120\n78\n68\n121\n13\n122\n123\n124\n125\n93\n126\n68\n127\n13\n128\n164" +
"\n130\n131\n132\n133\n134\n135\n13\n136\n137\n138\n13\n78\n68\n13\n139\n140\n165\n68\n13\n143\n144\n145\n" +
"146\n13\n147\n148\n13\n149\n150\n151\n166\n153\n150\n154\n13\n155\n156\n"

dmp := New()
t1, t2, lineArray := dmp.DiffLinesToChars(text1, text2)
diffs := dmp.DiffMain(t1, t2, false)
diffs = dmp.DiffCharsToLines(diffs, lineArray)
_ = diffs
})
}
18 changes: 0 additions & 18 deletions diffmatchpatch/stringutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
package diffmatchpatch

import (
"strconv"
"strings"
"unicode/utf8"
)
Expand Down Expand Up @@ -92,20 +91,3 @@ func runesIndex(r1, r2 []rune) int {
}
return -1
}

func intArrayToString(ns []uint32) string {
if len(ns) == 0 {
return ""
}

indexSeparator := IndexSeparator[0]

// Appr. 3 chars per num plus the comma.
b := []byte{}
for _, n := range ns {
b = strconv.AppendInt(b, int64(n), 10)
b = append(b, indexSeparator)
}
b = b[:len(b)-1]
return string(b)
}
Loading