Skip to content

Commit 449e17c

Browse files
authored
Fix non-determinism in diffing algorithm (#247)
A previous attempt to add non-determinism to the diffing algorithm unfortunately broke the algorithm for half the cases. This change modifies the algorithm to truly switch between starting with a forward search versus a reverse search. The main for-loop of Difference would switch repeatedly between performing a forward search, then a reverse search, and vice-versa. Since we can't jump into the middle of a for-loop to start with the reverse search first, we use a series of labels and goto statements to accomplish the same effect. Fixes #238
1 parent ade6b74 commit 449e17c

File tree

2 files changed

+40
-35
lines changed

2 files changed

+40
-35
lines changed

cmp/internal/diff/diff.go

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ func (r Result) Similar() bool {
119119
return r.NumSame+1 >= r.NumDiff
120120
}
121121

122-
var randInt = rand.New(rand.NewSource(time.Now().Unix())).Intn(2)
122+
var randBool = rand.New(rand.NewSource(time.Now().Unix())).Intn(2) == 0
123123

124124
// Difference reports whether two lists of lengths nx and ny are equal
125125
// given the definition of equality provided as f.
@@ -168,17 +168,6 @@ func Difference(nx, ny int, f EqualFunc) (es EditScript) {
168168
// A vertical edge is equivalent to inserting a symbol from list Y.
169169
// A diagonal edge is equivalent to a matching symbol between both X and Y.
170170

171-
// To ensure flexibility in changing the algorithm in the future,
172-
// introduce some degree of deliberate instability.
173-
// This is achieved by fiddling the zigzag iterator to start searching
174-
// the graph starting from the bottom-right versus than the top-left.
175-
// The result may differ depending on the starting search location,
176-
// but still produces a valid edit script.
177-
zigzagInit := randInt // either 0 or 1
178-
if flags.Deterministic {
179-
zigzagInit = 0
180-
}
181-
182171
// Invariants:
183172
// • 0 ≤ fwdPath.X ≤ (fwdFrontier.X, revFrontier.X) ≤ revPath.X ≤ nx
184173
// • 0 ≤ fwdPath.Y ≤ (fwdFrontier.Y, revFrontier.Y) ≤ revPath.Y ≤ ny
@@ -197,6 +186,11 @@ func Difference(nx, ny int, f EqualFunc) (es EditScript) {
197186
// approximately the square-root of the search budget.
198187
searchBudget := 4 * (nx + ny) // O(n)
199188

189+
// Running the tests with the "cmp_debug" build tag prints a visualization
190+
// of the algorithm running in real-time. This is educational for
191+
// understanding how the algorithm works. See debug_enable.go.
192+
f = debug.Begin(nx, ny, f, &fwdPath.es, &revPath.es)
193+
200194
// The algorithm below is a greedy, meet-in-the-middle algorithm for
201195
// computing sub-optimal edit-scripts between two lists.
202196
//
@@ -214,22 +208,28 @@ func Difference(nx, ny int, f EqualFunc) (es EditScript) {
214208
// frontier towards the opposite corner.
215209
// • This algorithm terminates when either the X coordinates or the
216210
// Y coordinates of the forward and reverse frontier points ever intersect.
217-
//
211+
218212
// This algorithm is correct even if searching only in the forward direction
219213
// or in the reverse direction. We do both because it is commonly observed
220214
// that two lists commonly differ because elements were added to the front
221215
// or end of the other list.
222216
//
223-
// Running the tests with the "cmp_debug" build tag prints a visualization
224-
// of the algorithm running in real-time. This is educational for
225-
// understanding how the algorithm works. See debug_enable.go.
226-
f = debug.Begin(nx, ny, f, &fwdPath.es, &revPath.es)
227-
for {
217+
// Non-deterministically start with either the forward or reverse direction
218+
// to introduce some deliberate instability so that we have the flexibility
219+
// to change this algorithm in the future.
220+
if flags.Deterministic || randBool {
221+
goto forwardSearch
222+
} else {
223+
goto reverseSearch
224+
}
225+
226+
forwardSearch:
227+
{
228228
// Forward search from the beginning.
229229
if fwdFrontier.X >= revFrontier.X || fwdFrontier.Y >= revFrontier.Y || searchBudget == 0 {
230-
break
230+
goto finishSearch
231231
}
232-
for stop1, stop2, i := false, false, zigzagInit; !(stop1 && stop2) && searchBudget > 0; i++ {
232+
for stop1, stop2, i := false, false, 0; !(stop1 && stop2) && searchBudget > 0; i++ {
233233
// Search in a diagonal pattern for a match.
234234
z := zigzag(i)
235235
p := point{fwdFrontier.X + z, fwdFrontier.Y - z}
@@ -262,10 +262,14 @@ func Difference(nx, ny int, f EqualFunc) (es EditScript) {
262262
} else {
263263
fwdFrontier.Y++
264264
}
265+
goto reverseSearch
266+
}
265267

268+
reverseSearch:
269+
{
266270
// Reverse search from the end.
267271
if fwdFrontier.X >= revFrontier.X || fwdFrontier.Y >= revFrontier.Y || searchBudget == 0 {
268-
break
272+
goto finishSearch
269273
}
270274
for stop1, stop2, i := false, false, 0; !(stop1 && stop2) && searchBudget > 0; i++ {
271275
// Search in a diagonal pattern for a match.
@@ -300,8 +304,10 @@ func Difference(nx, ny int, f EqualFunc) (es EditScript) {
300304
} else {
301305
revFrontier.Y--
302306
}
307+
goto forwardSearch
303308
}
304309

310+
finishSearch:
305311
// Join the forward and reverse paths and then append the reverse path.
306312
fwdPath.connect(revPath.point, f)
307313
for i := len(revPath.es) - 1; i >= 0; i-- {

cmp/internal/diff/diff_test.go

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,21 +10,15 @@ import (
1010
"strings"
1111
"testing"
1212
"unicode"
13-
14-
"github.com/google/go-cmp/cmp/internal/flags"
1513
)
1614

17-
func init() {
18-
flags.Deterministic = true
19-
}
20-
2115
func TestDifference(t *testing.T) {
2216
tests := []struct {
2317
// Before passing x and y to Difference, we strip all spaces so that
2418
// they can be used by the test author to indicate a missing symbol
2519
// in one of the lists.
2620
x, y string
27-
want string
21+
want string // '|' separated list of possible outputs
2822
}{{
2923
x: "",
3024
y: "",
@@ -36,7 +30,7 @@ func TestDifference(t *testing.T) {
3630
}, {
3731
x: "##",
3832
y: "# ",
39-
want: ".X",
33+
want: ".X|X.",
4034
}, {
4135
x: "a#",
4236
y: "A ",
@@ -48,7 +42,7 @@ func TestDifference(t *testing.T) {
4842
}, {
4943
x: "# ",
5044
y: "##",
51-
want: ".Y",
45+
want: ".Y|Y.",
5246
}, {
5347
x: " #",
5448
y: "@#",
@@ -148,7 +142,7 @@ func TestDifference(t *testing.T) {
148142
}, {
149143
x: "ABCAB BA ",
150144
y: " C BABAC",
151-
want: "XX.X.Y..Y",
145+
want: "XX.X.Y..Y|XX.Y.X..Y",
152146
}, {
153147
x: "# #### ###",
154148
y: "#y####yy###",
@@ -164,7 +158,7 @@ func TestDifference(t *testing.T) {
164158
}, {
165159
x: "0 12z3x 456789 x x 0",
166160
y: "0y12Z3 y456789y y y0",
167-
want: ".Y..M.XY......YXYXY.",
161+
want: ".Y..M.XY......YXYXY.|.Y..M.XY......XYXYY.",
168162
}, {
169163
x: "0 2 4 6 8 ..................abXXcdEXF.ghXi",
170164
y: " 1 3 5 7 9..................AB CDE F.GH I",
@@ -216,7 +210,7 @@ func TestDifference(t *testing.T) {
216210
}, {
217211
x: "0123456789 ",
218212
y: " 5678901234",
219-
want: "XXXXX.....YYYYY",
213+
want: "XXXXX.....YYYYY|YYYYY.....XXXXX",
220214
}, {
221215
x: "0123456789 ",
222216
y: " 4567890123",
@@ -252,9 +246,14 @@ func TestDifference(t *testing.T) {
252246
x := strings.Replace(tt.x, " ", "", -1)
253247
y := strings.Replace(tt.y, " ", "", -1)
254248
es := testStrings(t, x, y)
255-
if got := es.String(); got != tt.want {
256-
t.Errorf("Difference(%s, %s):\ngot %s\nwant %s", x, y, got, tt.want)
249+
var want string
250+
got := es.String()
251+
for _, want = range strings.Split(tt.want, "|") {
252+
if got == want {
253+
return
254+
}
257255
}
256+
t.Errorf("Difference(%s, %s):\ngot %s\nwant %s", x, y, got, want)
258257
})
259258
}
260259
}

0 commit comments

Comments
 (0)