Skip to content

Commit 08d6ae1

Browse files
tecosaurpull[bot]
authored andcommitted
Remove strong ordering of annotation ranges
After a long chat with Lilith Halfner, we've come to the conclusion that the range-based ordering applied to annotations, while nice for making some otherwise O(n) code O(log n) and O(n^2) code O(n), is actually assuming too much about how annotations are used and interact with each other. Removing all assumptions about ordering, and giving annotation order primacy seems like the most sensible thing to do, even if it makes a few bits of the code less algorithmically "nice". As a consequence, we also get rid of annotatedstring_optimize!. Specific producers/consumers of annotated text will know what assumptions can be made to compress/optimise the annotations used, and are thus best suited to do so themselves. The one exception to this is probably when writing to an AnnotatedIOBuffer, here adding a specific optimisation to _insert_annotations! probably makes sense, and will be explored soon.
1 parent e8bbe47 commit 08d6ae1

File tree

2 files changed

+30
-81
lines changed

2 files changed

+30
-81
lines changed

base/strings/annotated.jl

Lines changed: 19 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -255,56 +255,26 @@ annotatedstring(c::AnnotatedChar) =
255255

256256
AnnotatedString(s::SubString{<:AnnotatedString}) = annotatedstring(s)
257257

258-
"""
259-
annotatedstring_optimize!(str::AnnotatedString)
260-
261-
Merge contiguous identical annotations in `str`.
262-
"""
263-
function annotatedstring_optimize!(s::AnnotatedString)
264-
last_seen = Dict{Pair{Symbol, Any}, Int}()
265-
i = 1
266-
while i <= length(s.annotations)
267-
region, keyval = s.annotations[i]
268-
prev = get(last_seen, keyval, 0)
269-
if prev > 0
270-
lregion, _ = s.annotations[prev]
271-
if last(lregion) + 1 == first(region)
272-
s.annotations[prev] =
273-
setindex(s.annotations[prev],
274-
first(lregion):last(region),
275-
1)
276-
deleteat!(s.annotations, i)
277-
else
278-
delete!(last_seen, keyval)
279-
end
280-
else
281-
last_seen[keyval] = i
282-
i += 1
283-
end
284-
end
285-
s
286-
end
287-
288258
function repeat(str::AnnotatedString, r::Integer)
289259
r == 0 && return one(AnnotatedString)
290260
r == 1 && return str
291261
unannot = repeat(str.string, r)
292262
annotations = Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}()
293263
len = ncodeunits(str)
294264
fullregion = firstindex(str):lastindex(str)
295-
for (region, annot) in str.annotations
296-
if region == fullregion
297-
push!(annotations, (firstindex(unannot):lastindex(unannot), annot))
265+
if allequal(first, str.annotations) && first(first(str.annotations)) == fullregion
266+
newfullregion = firstindex(unannot):lastindex(unannot)
267+
for (_, annot) in str.annotations
268+
push!(annotations, (newfullregion, annot))
298269
end
299-
end
300-
for offset in 0:len:(r-1)*len
301-
for (region, annot) in str.annotations
302-
if region != fullregion
270+
else
271+
for offset in 0:len:(r-1)*len
272+
for (region, annot) in str.annotations
303273
push!(annotations, (region .+ offset, annot))
304274
end
305275
end
306276
end
307-
AnnotatedString(unannot, annotations) |> annotatedstring_optimize!
277+
AnnotatedString(unannot, annotations)
308278
end
309279

310280
repeat(str::SubString{<:AnnotatedString}, r::Integer) =
@@ -335,14 +305,9 @@ reverse(s::SubString{<:AnnotatedString}) = reverse(AnnotatedString(s))
335305
function _annotate!(annlist::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}, range::UnitRange{Int}, @nospecialize(labelval::Pair{Symbol, <:Any}))
336306
label, val = labelval
337307
if val === nothing
338-
indices = searchsorted(annlist, (range,), by=first)
339-
labelindex = filter(i -> first(annlist[i][2]) === label, indices)
340-
for index in Iterators.reverse(labelindex)
341-
deleteat!(annlist, index)
342-
end
308+
deleteat!(annlist, findall(ann -> ann[1] == range && first(ann[2]) === label, annlist))
343309
else
344-
sortedindex = searchsortedlast(annlist, (range,), by=first) + 1
345-
insert!(annlist, sortedindex, (range, Pair{Symbol, Any}(label, val)))
310+
push!(annlist, (range, Pair{Symbol, Any}(label, val)))
346311
end
347312
end
348313

@@ -521,7 +486,7 @@ end
521486
function _clear_annotations_in_region!(annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}, span::UnitRange{Int})
522487
# Clear out any overlapping pre-existing annotations.
523488
filter!(((region, _),) -> first(region) < first(span) || last(region) > last(span), annotations)
524-
extras = Tuple{UnitRange{Int}, Pair{Symbol, Any}}[]
489+
extras = Tuple{Int, Tuple{UnitRange{Int}, Pair{Symbol, Any}}}[]
525490
for i in eachindex(annotations)
526491
region, annot = annotations[i]
527492
# Test for partial overlap
@@ -532,30 +497,21 @@ function _clear_annotations_in_region!(annotations::Vector{Tuple{UnitRange{Int},
532497
# If `span` fits exactly within `region`, then we've only copied over
533498
# the beginning overhang, but also need to conserve the end overhang.
534499
if first(region) < first(span) && last(span) < last(region)
535-
push!(extras, (last(span)+1:last(region), annot))
500+
push!(extras, (i, (last(span)+1:last(region), annot)))
536501
end
537502
end
538-
# Insert any extra entries in the appropriate position
539-
for entry in extras
540-
sortedindex = searchsortedlast(annotations, (first(entry),), by=first) + 1
541-
insert!(annotations, sortedindex, entry)
542-
end
503+
end
504+
# Insert any extra entries in the appropriate position
505+
for (offset, (i, entry)) in enumerate(extras)
506+
insert!(annotations, i + offset, entry)
543507
end
544508
annotations
545509
end
546510

547511
function _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}, offset::Int = position(io))
548-
if !eof(io)
549-
for (region, annot) in annotations
550-
region = first(region)+offset:last(region)+offset
551-
sortedindex = searchsortedlast(io.annotations, (region,), by=first) + 1
552-
insert!(io.annotations, sortedindex, (region, annot))
553-
end
554-
else
555-
for (region, annot) in annotations
556-
region = first(region)+offset:last(region)+offset
557-
push!(io.annotations, (region, annot))
558-
end
512+
for (region, annot) in annotations
513+
region = first(region)+offset:last(region)+offset
514+
push!(io.annotations, (region, annot))
559515
end
560516
end
561517

test/strings/annotated.jl

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@
2424
@test Base.AnnotatedString(str[3:4]) ==
2525
Base.AnnotatedString("me", [(1:2, :thing => 0x01), (1:2, :all => 0x03)])
2626
@test Base.AnnotatedString(str[3:6]) ==
27-
Base.AnnotatedString("me s", [(1:2, :thing => 0x01), (1:4, :all => 0x03), (4:4, :other => 0x02)])
28-
@test str == Base.AnnotatedString("some string", [(1:4, :thing => 0x01), (1:11, :all => 0x03), (6:11, :other => 0x02)])
27+
Base.AnnotatedString("me s", [(1:2, :thing => 0x01), (4:4, :other => 0x02), (1:4, :all => 0x03)])
28+
@test str == Base.AnnotatedString("some string", [(1:4, :thing => 0x01), (6:11, :other => 0x02), (1:11, :all => 0x03)])
2929
@test str != Base.AnnotatedString("some string")
30-
@test str != Base.AnnotatedString("some string", [(1:1, :thing => 0x01), (6:6, :other => 0x02), (11:11, :all => 0x03)])
30+
@test str != Base.AnnotatedString("some string", [(1:1, :thing => 0x01), (1:11, :all => 0x03), (6:6, :other => 0x02)])
3131
@test str != Base.AnnotatedString("some string", [(1:4, :thing => 0x11), (1:11, :all => 0x13), (6:11, :other => 0x12)])
3232
@test str != Base.AnnotatedString("some thingg", [(1:4, :thing => 0x01), (1:11, :all => 0x03), (6:11, :other => 0x02)])
3333
@test Base.AnnotatedString([Base.AnnotatedChar('a', [:a => 1]), Base.AnnotatedChar('b', [:b => 2])]) ==
@@ -51,15 +51,8 @@
5151
# @test collect(Base.eachstyle(str)) ==
5252
# [("some", [:thing => 0x01, :all => 0x03]),
5353
# (" string", [:all => 0x03, :other => 0x02])]
54-
@test ==(Base.annotatedstring_optimize!(
55-
Base.AnnotatedString("abc", [(1:1, :val => 1),
56-
(2:2, :val => 2),
57-
(2:2, :val => 1),
58-
(3:3, :val => 2)])),
59-
Base.AnnotatedString("abc", [(1:2, :val => 1),
60-
(2:3, :val => 2)]))
6154
@test chopprefix(sprint(show, str), "Base.") ==
62-
"AnnotatedString{String}(\"some string\", [(1:4, :thing => 0x01), (1:11, :all => 0x03), (6:11, :other => 0x02)])"
55+
"AnnotatedString{String}(\"some string\", [(1:4, :thing => 0x01), (6:11, :other => 0x02), (1:11, :all => 0x03)])"
6356
@test eval(Meta.parse(repr(str))) == str
6457
@test sprint(show, MIME("text/plain"), str) == "\"some string\""
6558
end
@@ -149,8 +142,8 @@ end
149142
# Check `annotate!`, including region sorting
150143
@test truncate(aio, 0).io.size == 0
151144
@test write(aio, "hello world") == ncodeunits("hello world")
152-
@test Base.annotate!(aio, 7:11, :tag => 2) === aio
153145
@test Base.annotate!(aio, 1:5, :tag => 1) === aio
146+
@test Base.annotate!(aio, 7:11, :tag => 2) === aio
154147
@test Base.annotations(aio) == [(1:5, :tag => 1), (7:11, :tag => 2)]
155148
# Reading
156149
@test read(seekstart(deepcopy(aio.io)), String) == "hello world"
@@ -178,24 +171,24 @@ end
178171
@test Base.annotations(aio) == [(1:5, :tag => 1), (7:11, :tag => 2)] # Should be unchanged
179172
@test write(seek(aio, 0), Base.AnnotatedString("hey-o", [(1:5, :hey => 'o')])) == 5
180173
@test read(seekstart(aio), String) == "hey-o alice"
181-
@test Base.annotations(aio) == [(1:5, :hey => 'o'), (7:11, :tag => 2)] # First annotation should have been entirely replaced
174+
@test Base.annotations(aio) == [(7:11, :tag => 2), (1:5, :hey => 'o')] # First annotation should have been entirely replaced
182175
@test write(seek(aio, 7), Base.AnnotatedString("bbi", [(1:3, :hey => 'a')])) == 3 # a[lic => bbi]e ('alice' => 'abbie')
183176
@test read(seekstart(aio), String) == "hey-o abbie"
184-
@test Base.annotations(aio) == [(1:5, :hey => 'o'), (7:7, :tag => 2), (8:10, :hey => 'a'), (11:11, :tag => 2)]
177+
@test Base.annotations(aio) == [(7:7, :tag => 2), (11:11, :tag => 2), (1:5, :hey => 'o'), (8:10, :hey => 'a')]
185178
@test write(seek(aio, 0), Base.AnnotatedString("ab")) == 2 # Check first annotation's region is adjusted correctly
186179
@test read(seekstart(aio), String) == "aby-o abbie"
187-
@test Base.annotations(aio) == [(3:5, :hey => 'o'), (7:7, :tag => 2), (8:10, :hey => 'a'), (11:11, :tag => 2)]
180+
@test Base.annotations(aio) == [(7:7, :tag => 2), (11:11, :tag => 2), (3:5, :hey => 'o'), (8:10, :hey => 'a')]
188181
@test write(seek(aio, 3), Base.AnnotatedString("ss")) == 2
189182
@test read(seekstart(aio), String) == "abyss abbie"
190-
@test Base.annotations(aio) == [(3:3, :hey => 'o'), (7:7, :tag => 2), (8:10, :hey => 'a'), (11:11, :tag => 2)]
183+
@test Base.annotations(aio) == [(7:7, :tag => 2), (11:11, :tag => 2), (3:3, :hey => 'o'), (8:10, :hey => 'a')]
191184
# Writing one buffer to another
192185
newaio = Base.AnnotatedIOBuffer()
193186
@test write(newaio, seekstart(aio)) == 11
194187
@test read(seekstart(newaio), String) == "abyss abbie"
195188
@test Base.annotations(newaio) == Base.annotations(aio)
196189
@test write(seek(newaio, 5), seek(aio, 5)) == 6
197-
@test Base.annotations(newaio) == Base.annotations(aio)
190+
@test sort(Base.annotations(newaio)) == sort(Base.annotations(aio))
198191
@test write(newaio, seek(aio, 5)) == 6
199192
@test read(seekstart(newaio), String) == "abyss abbie abbie"
200-
@test Base.annotations(newaio) == vcat(Base.annotations(aio), [(13:13, :tag => 2), (14:16, :hey => 'a'), (17:17, :tag => 2)])
193+
@test sort(Base.annotations(newaio)) == sort(vcat(Base.annotations(aio), [(13:13, :tag => 2), (14:16, :hey => 'a'), (17:17, :tag => 2)]))
201194
end

0 commit comments

Comments
 (0)