@@ -566,25 +566,39 @@ func (dmp *DiffMatchPatch) DiffCharsToLines(diffs []Diff, lineArray []string) []
566
566
// DiffCommonPrefix determines the common prefix length of two strings.
567
567
func (dmp * DiffMatchPatch ) DiffCommonPrefix (text1 , text2 string ) int {
568
568
n := min (len (text1 ), len (text2 ))
569
- for i := 0 ; i < n ; i ++ {
570
- if text1 [i ] != text2 [i ] {
569
+ i := 0
570
+ for i < n {
571
+ _ , sz := utf8 .DecodeRuneInString (text1 [i :])
572
+ if sz > n - i {
571
573
return i
572
574
}
575
+ for j := 0 ; j < sz ; j ++ {
576
+ if text1 [i + j ] != text2 [i + j ] {
577
+ return i
578
+ }
579
+ }
580
+ i += sz
573
581
}
574
- return n
582
+ return i
575
583
}
576
584
577
585
// DiffCommonSuffix determines the common suffix length of two strings.
578
586
func (dmp * DiffMatchPatch ) DiffCommonSuffix (text1 , text2 string ) int {
579
- text1_length := len (text1 )
580
- text2_length := len (text2 )
581
- n := min (text1_length , text2_length )
582
- for i := 1 ; i <= n ; i ++ {
583
- if text1 [text1_length - i ] != text2 [text2_length - i ] {
584
- return i - 1
587
+ n := min (len (text1 ), len (text2 ))
588
+ i := 0
589
+ for i < n {
590
+ _ , sz := utf8 .DecodeLastRuneInString (text1 [:len (text1 )- i ])
591
+ if sz > n - i {
592
+ return i
593
+ }
594
+ for j := 0 ; j < sz ; j ++ {
595
+ if text1 [len (text1 )- 1 - i - j ] != text2 [len (text2 )- 1 - i - j ] {
596
+ return i
597
+ }
585
598
}
599
+ i += sz
586
600
}
587
- return n
601
+ return i
588
602
// Binary search.
589
603
// Performance analysis: http://neil.fraser.name/news/2007/10/09/
590
604
/*
@@ -901,16 +915,15 @@ func (dmp *DiffMatchPatch) DiffCleanupSemanticLossless(diffs []Diff) []Diff {
901
915
return 6
902
916
}
903
917
904
- _one := []rune (one )
905
- _two := []rune (two )
906
-
907
918
// Each port of this function behaves slightly differently due to
908
919
// subtle differences in each language's definition of things like
909
920
// 'whitespace'. Since this function's purpose is largely cosmetic,
910
921
// the choice has been made to use each language's native features
911
922
// rather than force total conformity.
912
- char1 := string (_one [len (one )- 1 ])
913
- char2 := string (_two [0 ])
923
+ rune1 , _ := utf8 .DecodeLastRuneInString (one )
924
+ rune2 , _ := utf8 .DecodeRuneInString (two )
925
+ char1 := string (rune1 )
926
+ char2 := string (rune2 )
914
927
915
928
nonAlphaNumeric1 := nonAlphaNumericRegex_ .MatchString (char1 )
916
929
nonAlphaNumeric2 := nonAlphaNumericRegex_ .MatchString (char2 )
@@ -968,10 +981,14 @@ func (dmp *DiffMatchPatch) DiffCleanupSemanticLossless(diffs []Diff) []Diff {
968
981
bestScore := diffCleanupSemanticScore_ (equality1 , edit ) +
969
982
diffCleanupSemanticScore_ (edit , equality2 )
970
983
971
- for len (edit ) != 0 && len (equality2 ) != 0 && edit [0 ] == equality2 [0 ] {
984
+ for len (edit ) != 0 && len (equality2 ) != 0 {
985
+ _ , sz := utf8 .DecodeRuneInString (edit )
986
+ if edit [:sz ] != equality2 [:sz ] {
987
+ break
988
+ }
972
989
equality1 += string (edit [0 ])
973
- edit = edit [1 :] + string (equality2 [0 ])
974
- equality2 = equality2 [1 :]
990
+ edit = edit [sz :] + string (equality2 [0 ])
991
+ equality2 = equality2 [sz :]
975
992
score := diffCleanupSemanticScore_ (equality1 , edit ) +
976
993
diffCleanupSemanticScore_ (edit , equality2 )
977
994
// The >= encourages trailing rather than leading whitespace on
0 commit comments