Merge pull request #48 from sergi/46-refactor-diffHalfMatchI-and-remove-concat

zimmski · web-flow · commit c55cf50e9bdd · 2016-11-02T16:33:51.000+01:00
Refactor diff half match i and remove concat
diff --git a/diffmatchpatch/dmp.go b/diffmatchpatch/dmp.go
@@ -798,60 +798,51 @@ func (dmp *DiffMatchPatch) diffHalfMatch(text1, text2 []rune) [][]rune {
 	return [][]rune{hm[2], hm[3], hm[0], hm[1], hm[4]}
 }
 
-// diffHalfMatchI checks if a substring of shorttext exist within longtext such that the substring  is at least half the length of longtext?
+// diffHalfMatchI checks if a substring of shorttext exist within longtext such that the substring is at least half the length of longtext?
 // @param {string} longtext Longer string.
 // @param {string} shorttext Shorter string.
 // @param {number} i Start index of quarter length substring within longtext.
 // @return {Array.<string>} Five element Array, containing the prefix of
 //     longtext, the suffix of longtext, the prefix of shorttext, the suffix
 //     of shorttext and the common middle.  Or null if there was no match.
 func (dmp *DiffMatchPatch) diffHalfMatchI(l, s []rune, i int) [][]rune {
+	var bestCommonA []rune
+	var bestCommonB []rune
+	var bestCommonLen int
+	var bestLongtextA []rune
+	var bestLongtextB []rune
+	var bestShorttextA []rune
+	var bestShorttextB []rune
+
 	// Start with a 1/4 length substring at position i as a seed.
 	seed := l[i : i+len(l)/4]
-	j := -1
-	bestCommon := []rune{}
-	bestLongtextA := []rune{}
-	bestLongtextB := []rune{}
-	bestShorttextA := []rune{}
-	bestShorttextB := []rune{}
-
-	if j < len(s) {
-		j = runesIndexOf(s, seed, j+1)
-		for {
-			if j == -1 {
-				break
-			}
 
-			prefixLength := commonPrefixLength(l[i:], s[j:])
-			suffixLength := commonSuffixLength(l[:i], s[:j])
-			if len(bestCommon) < suffixLength+prefixLength {
-				bestCommon = concat(s[j-suffixLength:j], s[j:j+prefixLength])
-				bestLongtextA = l[:i-suffixLength]
-				bestLongtextB = l[i+prefixLength:]
-				bestShorttextA = s[:j-suffixLength]
-				bestShorttextB = s[j+prefixLength:]
-			}
-			j = runesIndexOf(s, seed, j+1)
+	for j := runesIndexOf(s, seed, 0); j != -1; j = runesIndexOf(s, seed, j+1) {
+		prefixLength := commonPrefixLength(l[i:], s[j:])
+		suffixLength := commonSuffixLength(l[:i], s[:j])
+
+		if bestCommonLen < suffixLength+prefixLength {
+			bestCommonA = s[j-suffixLength : j]
+			bestCommonB = s[j : j+prefixLength]
+			bestCommonLen = len(bestCommonA) + len(bestCommonB)
+			bestLongtextA = l[:i-suffixLength]
+			bestLongtextB = l[i+prefixLength:]
+			bestShorttextA = s[:j-suffixLength]
+			bestShorttextB = s[j+prefixLength:]
 		}
 	}
 
-	if len(bestCommon)*2 >= len(l) {
-		return [][]rune{
-			bestLongtextA,
-			bestLongtextB,
-			bestShorttextA,
-			bestShorttextB,
-			bestCommon,
-		}
+	if bestCommonLen*2 < len(l) {
+		return nil
 	}
-	return nil
-}
 
-func concat(r1, r2 []rune) []rune {
-	result := make([]rune, len(r1)+len(r2))
-	copy(result, r1)
-	copy(result[len(r1):], r2)
-	return result
+	return [][]rune{
+		bestLongtextA,
+		bestLongtextB,
+		bestShorttextA,
+		bestShorttextB,
+		append(bestCommonA, bestCommonB...),
+	}
 }
 
 // DiffCleanupSemantic reduces the number of edits by eliminating
diff --git a/diffmatchpatch/dmp_test.go b/diffmatchpatch/dmp_test.go
@@ -1552,6 +1552,16 @@ func Benchmark_DiffMainLargeLines(b *testing.B) {
 	}
 }
 
+func Benchmark_DiffHalfMatch(b *testing.B) {
+	s1 := readFile("speedtest1.txt", b)
+	s2 := readFile("speedtest2.txt", b)
+	dmp := New()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		dmp.DiffHalfMatch(s1, s2)
+	}
+}
+
 func readFile(filename string, b *testing.B) string {
 	bytes, err := ioutil.ReadFile(filename)
 	if err != nil {