Skip to content

Commit ba52ce1

Browse files
committed
Deprecate above \xFF in bitwise string ops
This is already a fatal error for operations whose outcome depends on them, but in things like "abc" & "def\x{100}" the wide character doesn't actually need to participate in the AND, and so perl doesn't. As a result of the discussion in the thread beginning with http://nntp.perl.org/group/perl.perl5.porters/244884, it was decided to deprecate these ones too.
1 parent 78ba900 commit ba52ce1

File tree

5 files changed

+106
-7
lines changed

5 files changed

+106
-7
lines changed

doop.c

+5
Original file line numberDiff line numberDiff line change
@@ -1095,6 +1095,7 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
10951095
* portion. That means that at least one of the operands has to be
10961096
* entirely non-UTF-8, and the length of that operand has to be before the
10971097
* first above-FF in the other */
1098+
if (left_utf8 || right_utf8) {
10981099
if (left_utf8) {
10991100
if (right_utf8 || rightlen > leftlen) {
11001101
Perl_croak(aTHX_ fatal_above_ff_msg, PL_op_desc[optype]);
@@ -1107,6 +1108,10 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
11071108
}
11081109
len = leftlen;
11091110
}
1111+
1112+
Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
1113+
deprecated_above_ff_msg, PL_op_desc[optype]);
1114+
}
11101115
else { /* Neither is UTF-8 */
11111116
len = MIN(leftlen, rightlen);
11121117
}

op.h

+4
Original file line numberDiff line numberDiff line change
@@ -1112,6 +1112,10 @@ C<sib> is non-null. For a higher-level interface, see C<L</op_sibling_splice>>.
11121112
static const char * const fatal_above_ff_msg
11131113
= "Use of strings with code points over 0xFF as arguments to "
11141114
"%s operator is not allowed";
1115+
static const char * const deprecated_above_ff_msg
1116+
= "Use of strings with code points over 0xFF as arguments to "
1117+
"%s operator is deprecated. This will be a fatal error in "
1118+
"Perl 5.32";
11151119
#endif
11161120

11171121

pod/perldelta.pod

+7
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,13 @@ reverted due to the extent of the trouble caused to CPAN modules.
8181
It is expected that smartmatch will be changed again in the future,
8282
but preceded by some kind of explicit deprecation.
8383

84+
=head1 Deprecations
85+
86+
=head2 Use of code points over 0xFF in string bitwise operators
87+
88+
Some uses of these already are illegal after a previous deprecation
89+
cycle. This deprecates the remaining uses. See L<perldeprecation>.
90+
8491
=head1 Performance Enhancements
8592

8693
=over 4

pod/perldeprecation.pod

+18
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,24 @@ C<vec> views its string argument as a sequence of bits. A string
5656
containing a code point over 0xFF is nonsensical. This usage is
5757
deprecated in Perl 5.28, and will be removed in Perl 5.32.
5858

59+
=head3 Use of code points over 0xFF in string bitwise operators
60+
61+
The string bitwise operators, C<&>, C<|>, C<^>, and C<~>, treat their
62+
operands as strings of bytes. As such, values above 0xFF are
63+
nonsensical. Some instances of these have been deprecated since Perl
64+
5.24, and were made fatal in 5.28, but it turns out that in cases where
65+
the wide characters did not affect the end result, no deprecation
66+
notice was raised, and so remain legal. Now, all occurrences either are
67+
fatal or raise a deprecation warning, so that the remaining legal
68+
occurrences will be fatal in 5.32.
69+
70+
An example of this is
71+
72+
"" & "\x{100}"
73+
74+
The wide character is not used in the C<&> operation because the left
75+
operand is shorter. This now warns anyway.
76+
5977
=head3 hostname() doesn't accept any arguments
6078

6179
The function C<hostname()> in the L<Sys::Hostname> module has always

t/op/bop.t

+72-7
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ BEGIN {
1818
# If you find tests are failing, please try adding names to tests to track
1919
# down where the failure is, and supply your new names as a patch.
2020
# (Just-in-time test naming)
21-
plan tests => 491;
21+
plan tests => 504;
2222

2323
# numerics
2424
ok ((0xdead & 0xbeef) == 0x9ead);
@@ -612,9 +612,74 @@ foreach my $op_info ([and => "&"], [or => "|"], [xor => "^"]) {
612612
"(~) is not allowed";
613613
}
614614
615-
is("abc" & "abc\x{100}", "abc", '"abc" & "abc\x{100}" works');
616-
is("abc" | "abc\x{100}", "abc\x{100}", '"abc" | "abc\x{100}" works');
617-
is("abc" ^ "abc\x{100}", "\0\0\0\x{100}", '"abc" ^ "abc\x{100}" works');
618-
is("abc\x{100}" & "abc", "abc", '"abc\x{100}" & "abc" works');
619-
is("abc\x{100}" | "abc", "abc\x{100}", '"abc\x{100}" | "abc" works');
620-
is("abc\x{100}" ^ "abc", "\0\0\0\x{100}", '"abc\x{100}" ^ "abc" works');
615+
{
616+
# Since these are temporary, and it was a pain to make them into loops,
617+
# the code is just rolled out.
618+
local $SIG{__WARN__} = sub { push @warnings, @_; };
619+
620+
undef @warnings;
621+
is("abc" & "abc\x{100}", "abc", '"abc" & "abc\x{100}" works');
622+
if (! is(@warnings, 1, "... but returned a single warning")) {
623+
diag join "\n", @warnings;
624+
}
625+
like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
626+
)arguments to bitwise and \(&\) operator (?#
627+
)is deprecated/,
628+
"... which is the expected warning");
629+
undef @warnings;
630+
is("abc" | "abc\x{100}", "abc\x{100}", '"abc" | "abc\x{100}" works');
631+
if (! is(@warnings, 1, "... but returned a single warning")) {
632+
diag join "\n", @warnings;
633+
}
634+
like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
635+
)arguments to bitwise or \(|\) operator (?#
636+
)is deprecated/,
637+
"... which is the expected warning");
638+
undef @warnings;
639+
is("abc" ^ "abc\x{100}", "\0\0\0\x{100}", '"abc" ^ "abc\x{100}" works');
640+
if (! is(@warnings, 1, "... but returned a single warning")) {
641+
diag join "\n", @warnings;
642+
}
643+
like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
644+
)arguments to bitwise xor \(\^\) operator (?#
645+
)is deprecated/,
646+
"... which is the expected warning");
647+
undef @warnings;
648+
is("abc\x{100}" & "abc", "abc", '"abc\x{100}" & "abc" works');
649+
if (! is(@warnings, 1, "... but returned a single warning")) {
650+
diag join "\n", @warnings;
651+
}
652+
like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
653+
)arguments to bitwise and \(&\) operator (?#
654+
)is deprecated/,
655+
"... which is the expected warning");
656+
undef @warnings;
657+
is("abc\x{100}" | "abc", "abc\x{100}", '"abc\x{100}" | "abc" works');
658+
if (! is(@warnings, 1, "... but returned a single warning")) {
659+
diag join "\n", @warnings;
660+
}
661+
like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
662+
)arguments to bitwise or \(|\) operator (?#
663+
)is deprecated/,
664+
"... which is the expected warning");
665+
undef @warnings;
666+
is("abc\x{100}" ^ "abc", "\0\0\0\x{100}", '"abc\x{100}" ^ "abc" works');
667+
if (! is(@warnings, 1, "... but returned a single warning")) {
668+
diag join "\n", @warnings;
669+
}
670+
like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
671+
)arguments to bitwise xor \(\^\) operator (?#
672+
)is deprecated/,
673+
"... which is the expected warning");
674+
no warnings 'deprecated';
675+
undef @warnings;
676+
my $foo = "abc" & "abc\x{100}";
677+
$foo = "abc" | "abc\x{100}";
678+
$foo = "abc" ^ "abc\x{100}";
679+
$foo = "abc\x{100}" & "abc";
680+
$foo = "abc\x{100}" | "abc";
681+
$foo = "abc\x{100}" ^ "abc";
682+
if (! is(@warnings, 0, "... And none of the last 6 main tests warns when 'deprecated' is off")) {
683+
diag join "\n", @warnings;
684+
}
685+
}

0 commit comments

Comments
 (0)