diff --git a/src/DeepDiffs.jl b/src/DeepDiffs.jl index e37ebd6..64fa1ce 100644 --- a/src/DeepDiffs.jl +++ b/src/DeepDiffs.jl @@ -6,7 +6,7 @@ export SimpleDiff, VectorDiff, StringDiff, DictDiff # Helper function for comparing two instances of a type for equality by field function fieldequal(x::T, y::T) where T for f in fieldnames(T) - getfield(x, f) == getfield(y, f) || return false + isequal(getfield(x, f), getfield(y, f)) || return false end true end diff --git a/src/arrays.jl b/src/arrays.jl index 4f182e6..d080b87 100644 --- a/src/arrays.jl +++ b/src/arrays.jl @@ -13,6 +13,8 @@ changed(diff::VectorDiff) = Int[] Base.:(==)(d1::VectorDiff, d2::VectorDiff) = fieldequal(d1, d2) +_argmax(x, y) = x ≥ y ? (x, (0, 1)) : (y, (1, 0)) + # diffing an array is an application of the Longest Common Subsequence problem: # https://en.wikipedia.org/wiki/Longest_common_subsequence_problem function deepdiff(X::Vector, Y::Vector) @@ -21,35 +23,43 @@ function deepdiff(X::Vector, Y::Vector) # substrings. lengths = zeros(Int, length(X)+1, length(Y)+1) + backtracks = fill((0, 0), axes(lengths)) + backtracks[1,2:end] .= Ref((0, 1)) + backtracks[2:end,1] .= Ref((1, 0)) + backtracks[1,1] = (0, 0) for (j, v2) in enumerate(Y) for (i, v1) in enumerate(X) - if v1 == v2 + if isequal(v1, v2) lengths[i+1, j+1] = lengths[i, j] + 1 + backtracks[i+1, j+1] = (1, 1) else - lengths[i+1, j+1] = max(lengths[i+1, j], lengths[i, j+1]) + lengths[i+1, j+1], backtracks[i+1, j+1] = _argmax(lengths[i+1, j], lengths[i, j+1]) end end end removed = Int[] added = Int[] - backtrack(lengths, removed, added, X, Y, length(X), length(Y)) + + backtrack(backtracks, removed, added, (length(X)+1, length(Y)+1)) VectorDiff(X, Y, removed, added) end # recursively trace back the longest common subsequence, adding items # to the added and removed lists as we go -function backtrack(lengths, removed, added, X, Y, i, j) - if i > 0 && j > 0 && X[i] == Y[j] - backtrack(lengths, removed, added, X, Y, i-1, j-1) - elseif j > 0 && (i == 0 || lengths[i+1, j] ≥ lengths[i, j+1]) - backtrack(lengths, removed, added, X, Y, i, j-1) - push!(added, j) - elseif i > 0 && (j == 0 || lengths[i+1, j] < lengths[i, j+1]) - backtrack(lengths, removed, added, X, Y, i-1, j) - push!(removed, i) +function backtrack(backtracks, removed, added, ij) + bt = backtracks[ij...] + if bt != (0, 0) + backtrack(backtracks, removed, added, ij .- bt) + end + + (i, j) = ij + if bt == (0, 1) + push!(added, j-1) + elseif bt == (1, 0) + push!(removed, i-1) end end diff --git a/src/dicts.jl b/src/dicts.jl index b0574ed..d8560c3 100644 --- a/src/dicts.jl +++ b/src/dicts.jl @@ -26,7 +26,7 @@ function deepdiff(X::AbstractDict, Y::AbstractDict) changed = Dict{eltype(bothkeys), DeepDiff}() for key in bothkeys - if X[key] != Y[key] + if !isequal(X[key], Y[key]) changed[key] = deepdiff(X[key], Y[key]) else push!(unchanged, key) diff --git a/test/arrays.jl b/test/arrays.jl index b0397ba..5ca014b 100644 --- a/test/arrays.jl +++ b/test/arrays.jl @@ -44,4 +44,13 @@ d = deepdiff(a1, [2, 4]) @test removed(d) == [1, 3] @test added(d) == [] + + d = deepdiff([NaN], [NaN]) + @test removed(d) == added(d) == [] + + d = deepdiff([missing], [missing]) + @test removed(d) == added(d) == [] + + d = deepdiff([NaN], []) + @test d == d end diff --git a/test/dicts.jl b/test/dicts.jl index 70da79a..aedd60c 100644 --- a/test/dicts.jl +++ b/test/dicts.jl @@ -86,4 +86,30 @@ @test removed(d) == Set() @test changed(d) == Dict() end + + @testset "missing/NaN" begin + dnan = Dict(:d=>NaN) + d = deepdiff(dnan, dnan) + @test added(d) == removed(d) == Set() + @test changed(d) == Dict() + + dmis = Dict(:d=>missing) + d = deepdiff(dmis, dmis) + @test added(d) == removed(d) == Set() + @test changed(d) == Dict() + + dnank = Dict(NaN=>true) + d = deepdiff(dnan, dnan) + @test added(d) == removed(d) == Set() + @test changed(d) == Dict() + + dmisk = Dict(missing=>true) + d = deepdiff(dmis, dmis) + @test added(d) == removed(d) == Set() + @test changed(d) == Dict() + + d = DeepDiffs.deepdiff(dnank, dmisk) + @test d == d + + end end