Skip to content

Commit 51e3e8c

Browse files
committed
Revert "regcomp.c: Add shortcuts to some inversion list ops"
This reverts commit c29dfc6. But it also removes the XXX suggesting that the things that commit did should be done. It turns out that I didn't fully understand this, that the shortcuts weren't found as frequently as I expected, and in fact happened when they shouldn't have, creating bugs. The only bugs I found had to do with displaying what the regex compiled into under -Dr, but I imagine there are others out there. I did try to create some test cases that showed the bugs, based on understanding how the code works, but various things prevented them from actually being bugs. To correctly decide to take the shortcut requires adding tests inside a loop, and the shortcut is just to possibly save some work after the loop. That isn't a good tradeoff.
1 parent e171168 commit 51e3e8c

File tree

1 file changed

+37
-105
lines changed

1 file changed

+37
-105
lines changed

regcomp.c

Lines changed: 37 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -8949,10 +8949,6 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
89498949
UV i_b = 0;
89508950
UV i_u = 0;
89518951

8952-
bool has_something_from_a = FALSE;
8953-
bool has_something_from_b = FALSE;
8954-
8955-
89568952
/* running count, as explained in the algorithm source book; items are
89578953
* stopped accumulating and are output when the count changes to/from 0.
89588954
* The count is incremented when we start a range that's in the set, and
@@ -9118,12 +9114,10 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
91189114
{
91199115
cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_a);
91209116
cp= array_a[i_a++];
9121-
has_something_from_a = TRUE;
91229117
}
91239118
else {
91249119
cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_b);
91259120
cp = array_b[i_b++];
9126-
has_something_from_b = TRUE;
91279121
}
91289122

91299123
/* Here, have chosen which of the two inputs to look at. Only output
@@ -9172,54 +9166,10 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
91729166
* be output. (If 'count' is non-zero, then the input list we exhausted
91739167
* has everything remaining up to the machine's limit in its set, and hence
91749168
* in the union, so there will be no further output. */
9175-
if (count != 0) {
9176-
9177-
/* Here, there is nothing left to put in the union. If the union came
9178-
* only from the input that it is to overwrite, this whole operation is
9179-
* a no-op */
9180-
if ( UNLIKELY(! has_something_from_b && *output == a)
9181-
|| UNLIKELY(! has_something_from_a && *output == b))
9182-
{
9183-
SvREFCNT_dec_NN(u);
9184-
return;
9185-
}
9186-
9187-
len_u = i_u;
9188-
}
9189-
else {
9190-
/* When 'count' is 0, the list that was exhausted (if one was shorter
9191-
* than the other) ended with everything above it not in its set. That
9192-
* means that the remaining part of the union is precisely the same as
9193-
* the non-exhausted list, so can just copy it unchanged. If only one
9194-
* of the inputs contributes to the union, and the output is to
9195-
* overwite that particular input, then this whole operation was a
9196-
* no-op. */
9197-
9198-
IV copy_count = len_a - i_a;
9199-
if (copy_count > 0) {
9200-
if (UNLIKELY(! has_something_from_b && *output == a)) {
9201-
SvREFCNT_dec_NN(u);
9202-
return;
9203-
}
9204-
Copy(array_a + i_a, array_u + i_u, copy_count, UV);
9205-
len_u = i_u + copy_count;
9206-
}
9207-
else if ((copy_count = len_b - i_b) > 0) {
9208-
if (UNLIKELY(! has_something_from_a && *output == b)) {
9209-
SvREFCNT_dec_NN(u);
9210-
return;
9211-
}
9212-
Copy(array_b + i_b, array_u + i_u, copy_count, UV);
9213-
len_u = i_u + copy_count;
9214-
} else if ( UNLIKELY(! has_something_from_b && *output == a)
9215-
|| UNLIKELY(! has_something_from_a && *output == b))
9216-
{
9217-
/* Here, both arrays are exhausted, so no need to do any additional
9218-
* copying. Also here, the union came only from the input that it is
9219-
* to overwrite, so this whole operation is a no-op */
9220-
SvREFCNT_dec_NN(u);
9221-
return;
9222-
}
9169+
len_u = i_u;
9170+
if (count == 0) {
9171+
/* At most one of the subexpressions will be non-zero */
9172+
len_u += (len_a - i_a) + (len_b - i_b);
92239173
}
92249174

92259175
/* Set the result to the final length, which can change the pointer to
@@ -9231,6 +9181,22 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
92319181
array_u = invlist_array(u);
92329182
}
92339183

9184+
/* When 'count' is 0, the list that was exhausted (if one was shorter than
9185+
* the other) ended with everything above it not in its set. That means
9186+
* that the remaining part of the union is precisely the same as the
9187+
* non-exhausted list, so can just copy it unchanged. (If both lists were
9188+
* exhausted at the same time, then the operations below will be both 0.)
9189+
*/
9190+
if (count == 0) {
9191+
IV copy_count; /* At most one will have a non-zero copy count */
9192+
if ((copy_count = len_a - i_a) > 0) {
9193+
Copy(array_a + i_a, array_u + i_u, copy_count, UV);
9194+
}
9195+
else if ((copy_count = len_b - i_b) > 0) {
9196+
Copy(array_b + i_b, array_u + i_u, copy_count, UV);
9197+
}
9198+
}
9199+
92349200
/* If the output is not to overwrite either of the inputs, just return the
92359201
* calculated union */
92369202
if (a != *output && b != *output) {
@@ -9303,9 +9269,6 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
93039269
*/
93049270
UV count = 0;
93059271

9306-
bool has_something_from_a = FALSE;
9307-
bool has_something_from_b = FALSE;
9308-
93099272
PERL_ARGS_ASSERT__INVLIST_INTERSECTION_MAYBE_COMPLEMENT_2ND;
93109273
assert(a != b);
93119274

@@ -9402,12 +9365,10 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
94029365
{
94039366
cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_a);
94049367
cp= array_a[i_a++];
9405-
has_something_from_a = TRUE;
94069368
}
94079369
else {
94089370
cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_b);
94099371
cp= array_b[i_b++];
9410-
has_something_from_b = TRUE;
94119372
}
94129373

94139374
/* Here, have chosen which of the two inputs to look at. Only output
@@ -9449,52 +9410,12 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
94499410
count++;
94509411
}
94519412

9452-
if (count < 2) {
9453-
9454-
/* Here, there is nothing left to put in the intersection. If the
9455-
* intersection came only from the input that it is to overwrite, this
9456-
* whole operation is a no-op */
9457-
if ( UNLIKELY(! has_something_from_b && *i == a)
9458-
|| UNLIKELY(! has_something_from_a && *i == b))
9459-
{
9460-
SvREFCNT_dec_NN(r);
9461-
return;
9462-
}
9463-
9464-
len_r = i_r;
9465-
}
9466-
else {
9467-
/* When 'count' is 2 or more, the list that was exhausted, what remains
9468-
* in the intersection is precisely the same as the non-exhausted list,
9469-
* so can just copy it unchanged. If only one of the inputs
9470-
* contributes to the intersection, and the output is to overwite that
9471-
* particular input, then this whole operation was a no-op. */
9472-
9473-
IV copy_count = len_a - i_a;
9474-
if (copy_count > 0) {
9475-
if (UNLIKELY(! has_something_from_b && *i == a)) {
9476-
SvREFCNT_dec_NN(r);
9477-
return;
9478-
}
9479-
Copy(array_a + i_a, array_r + i_r, copy_count, UV);
9480-
len_r = i_r + copy_count;
9481-
}
9482-
else if ((copy_count = len_b - i_b) > 0) {
9483-
if (UNLIKELY(! has_something_from_a && *i == b)) {
9484-
SvREFCNT_dec_NN(r);
9485-
return;
9486-
}
9487-
Copy(array_b + i_b, array_r + i_r, copy_count, UV);
9488-
len_r = i_r + copy_count;
9489-
} else if ( UNLIKELY(! has_something_from_b && *i == a)
9490-
|| UNLIKELY(! has_something_from_a && *i == b))
9491-
{
9492-
/* Here, both arrays are exhausted, so no need to do any additional
9493-
* copying. Also here, the intersection came only from the input
9494-
* that it is to overwrite, so this whole operation is a no-op */
9495-
SvREFCNT_dec_NN(r);
9496-
return;
9497-
}
9413+
/* The final length is what we've output so far plus what else is in the
9414+
* intersection. At most one of the subexpressions below will be non-zero
9415+
* */
9416+
len_r = i_r;
9417+
if (count >= 2) {
9418+
len_r += (len_a - i_a) + (len_b - i_b);
94989419
}
94999420

95009421
/* Set the result to the final length, which can change the pointer to
@@ -9506,6 +9427,17 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
95069427
array_r = invlist_array(r);
95079428
}
95089429

9430+
/* Finish outputting any remaining */
9431+
if (count >= 2) { /* At most one will have a non-zero copy count */
9432+
IV copy_count;
9433+
if ((copy_count = len_a - i_a) > 0) {
9434+
Copy(array_a + i_a, array_r + i_r, copy_count, UV);
9435+
}
9436+
else if ((copy_count = len_b - i_b) > 0) {
9437+
Copy(array_b + i_b, array_r + i_r, copy_count, UV);
9438+
}
9439+
}
9440+
95099441
/* If the output is not to overwrite either of the inputs, just return the
95109442
* calculated intersection */
95119443
if (a != *i && b != *i) {

0 commit comments

Comments
 (0)