diff --git a/diffmatchpatch/dmp.go b/diffmatchpatch/dmp.go index af1a34c..f42a537 100644 --- a/diffmatchpatch/dmp.go +++ b/diffmatchpatch/dmp.go @@ -798,7 +798,7 @@ func (dmp *DiffMatchPatch) diffHalfMatch(text1, text2 []rune) [][]rune { return [][]rune{hm[2], hm[3], hm[0], hm[1], hm[4]} } -// diffHalfMatchI checks if a substring of shorttext exist within longtext such that the substring is at least half the length of longtext? +// diffHalfMatchI checks if a substring of shorttext exist within longtext such that the substring is at least half the length of longtext? // @param {string} longtext Longer string. // @param {string} shorttext Shorter string. // @param {number} i Start index of quarter length substring within longtext. @@ -806,52 +806,43 @@ func (dmp *DiffMatchPatch) diffHalfMatch(text1, text2 []rune) [][]rune { // longtext, the suffix of longtext, the prefix of shorttext, the suffix // of shorttext and the common middle. Or null if there was no match. func (dmp *DiffMatchPatch) diffHalfMatchI(l, s []rune, i int) [][]rune { + var bestCommonA []rune + var bestCommonB []rune + var bestCommonLen int + var bestLongtextA []rune + var bestLongtextB []rune + var bestShorttextA []rune + var bestShorttextB []rune + // Start with a 1/4 length substring at position i as a seed. seed := l[i : i+len(l)/4] - j := -1 - bestCommon := []rune{} - bestLongtextA := []rune{} - bestLongtextB := []rune{} - bestShorttextA := []rune{} - bestShorttextB := []rune{} - - if j < len(s) { - j = runesIndexOf(s, seed, j+1) - for { - if j == -1 { - break - } - prefixLength := commonPrefixLength(l[i:], s[j:]) - suffixLength := commonSuffixLength(l[:i], s[:j]) - if len(bestCommon) < suffixLength+prefixLength { - bestCommon = concat(s[j-suffixLength:j], s[j:j+prefixLength]) - bestLongtextA = l[:i-suffixLength] - bestLongtextB = l[i+prefixLength:] - bestShorttextA = s[:j-suffixLength] - bestShorttextB = s[j+prefixLength:] - } - j = runesIndexOf(s, seed, j+1) + for j := runesIndexOf(s, seed, 0); j != -1; j = runesIndexOf(s, seed, j+1) { + prefixLength := commonPrefixLength(l[i:], s[j:]) + suffixLength := commonSuffixLength(l[:i], s[:j]) + + if bestCommonLen < suffixLength+prefixLength { + bestCommonA = s[j-suffixLength : j] + bestCommonB = s[j : j+prefixLength] + bestCommonLen = len(bestCommonA) + len(bestCommonB) + bestLongtextA = l[:i-suffixLength] + bestLongtextB = l[i+prefixLength:] + bestShorttextA = s[:j-suffixLength] + bestShorttextB = s[j+prefixLength:] } } - if len(bestCommon)*2 >= len(l) { - return [][]rune{ - bestLongtextA, - bestLongtextB, - bestShorttextA, - bestShorttextB, - bestCommon, - } + if bestCommonLen*2 < len(l) { + return nil } - return nil -} -func concat(r1, r2 []rune) []rune { - result := make([]rune, len(r1)+len(r2)) - copy(result, r1) - copy(result[len(r1):], r2) - return result + return [][]rune{ + bestLongtextA, + bestLongtextB, + bestShorttextA, + bestShorttextB, + append(bestCommonA, bestCommonB...), + } } // DiffCleanupSemantic reduces the number of edits by eliminating diff --git a/diffmatchpatch/dmp_test.go b/diffmatchpatch/dmp_test.go index 7ee4da4..a543085 100644 --- a/diffmatchpatch/dmp_test.go +++ b/diffmatchpatch/dmp_test.go @@ -1552,6 +1552,16 @@ func Benchmark_DiffMainLargeLines(b *testing.B) { } } +func Benchmark_DiffHalfMatch(b *testing.B) { + s1 := readFile("speedtest1.txt", b) + s2 := readFile("speedtest2.txt", b) + dmp := New() + b.ResetTimer() + for i := 0; i < b.N; i++ { + dmp.DiffHalfMatch(s1, s2) + } +} + func readFile(filename string, b *testing.B) string { bytes, err := ioutil.ReadFile(filename) if err != nil {