Skip to content

Commit 5c928c9

Browse files
authored
Fix Regex.split/2 edge case with empty chunks (#14468)
1 parent 7c6e95f commit 5c928c9

File tree

2 files changed

+22
-5
lines changed

2 files changed

+22
-5
lines changed

lib/elixir/lib/regex.ex

+10-5
Original file line numberDiff line numberDiff line change
@@ -677,18 +677,23 @@ defmodule Regex do
677677
<<_::binary-size(^offset), part::binary-size(^keep), match::binary-size(^length), _::binary>> =
678678
string
679679

680-
if keep == 0 and trim do
681-
[match | do_split([h | t], string, new_offset, counter - 1, trim, true)]
682-
else
683-
[part, match | do_split([h | t], string, new_offset, counter - 1, trim, true)]
680+
cond do
681+
keep == 0 and (offset != 0 and length == 0) ->
682+
do_split([h | t], string, new_offset, counter - 1, trim, true)
683+
684+
keep == 0 and trim ->
685+
[match | do_split([h | t], string, new_offset, counter - 1, trim, true)]
686+
687+
true ->
688+
[part, match | do_split([h | t], string, new_offset, counter - 1, trim, true)]
684689
end
685690
end
686691

687692
defp do_split([[{pos, length} | h] | t], string, offset, counter, trim, false) do
688693
new_offset = pos + length
689694
keep = pos - offset
690695

691-
if keep == 0 and trim do
696+
if keep == 0 and (trim or (offset != 0 and length == 0)) do
692697
do_split([h | t], string, new_offset, counter, trim, false)
693698
else
694699
<<_::binary-size(^offset), part::binary-size(^keep), _::binary>> = string

lib/elixir/test/elixir/regex_test.exs

+12
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,8 @@ defmodule RegexTest do
187187
assert Regex.split(~r" ", " foo bar baz ", trim: true) == ["foo", "bar", "baz"]
188188
assert Regex.split(~r" ", " foo bar baz ", parts: 2) == ["", "foo bar baz "]
189189
assert Regex.split(~r" ", " foo bar baz ", trim: true, parts: 2) == ["foo", "bar baz "]
190+
191+
assert Regex.split(~r/b\K/, "ababab") == ["ab", "ab", "ab", ""]
190192
end
191193

192194
test "split/3 with the :on option" do
@@ -236,6 +238,16 @@ defmodule RegexTest do
236238

237239
assert Regex.split(~r/[Ei]/, "Elixir", include_captures: true, parts: 3, trim: true) ==
238240
["E", "l", "i", "xir"]
241+
242+
assert Regex.split(~r/b\Kc/, "abcabc", include_captures: true) == ["ab", "c", "ab", "c", ""]
243+
assert Regex.split(~r/(b\K)/, "abab", include_captures: true) == ["ab", "", "ab", "", ""]
244+
245+
assert Regex.split(~r/(b\K)/, "abab", include_captures: true, trim: true) == [
246+
"ab",
247+
"",
248+
"ab",
249+
""
250+
]
239251
end
240252

241253
test "replace/3,4" do

0 commit comments

Comments
 (0)