Skip to content

Commit c8b94fe

Browse files
jkeenankhwilliamson
authored andcommitted
Use of code points over 0xFF in string bitwise operators
Implement complete fatalization. Some instances of these were fatalized in 5.28. However, in cases where the wide characters did not affect the end result, no deprecation notice was raised. So they remained legal, though deprecated. Now, all occurrences are fatal (as of 5.32). Modify source code in doop.c. Adapt test file. Update perldiag and perldeprecation. For: RT 134140 (Commiter changed a verb to past tense in the pod)
1 parent 3a24361 commit c8b94fe

File tree

4 files changed

+33
-95
lines changed

4 files changed

+33
-95
lines changed

doop.c

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1084,30 +1084,11 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
10841084
* on zeros without having to do it. In the case of '&', the result is
10851085
* zero, and the dangling portion is simply discarded. For '|' and '^', the
10861086
* result is the same as the other operand, so the dangling part is just
1087-
* appended to the final result, unchanged. We currently accept above-FF
1088-
* code points in the dangling portion, as that's how it has long worked,
1089-
* and code depends on it staying that way. But it is now fatal for
1090-
* above-FF to appear in the portion that does get operated on. Hence, any
1091-
* above-FF must come only in the longer operand, and only in its dangling
1092-
* portion. That means that at least one of the operands has to be
1093-
* entirely non-UTF-8, and the length of that operand has to be before the
1094-
* first above-FF in the other */
1087+
* appended to the final result, unchanged. As of perl-5.32, we no longer
1088+
* accept above-FF code points in the dangling portion.
1089+
*/
10951090
if (left_utf8 || right_utf8) {
1096-
if (left_utf8) {
1097-
if (right_utf8 || rightlen > leftlen) {
1098-
Perl_croak(aTHX_ FATAL_ABOVE_FF_MSG, PL_op_desc[optype]);
1099-
}
1100-
len = rightlen;
1101-
}
1102-
else if (right_utf8) {
1103-
if (leftlen > rightlen) {
1104-
Perl_croak(aTHX_ FATAL_ABOVE_FF_MSG, PL_op_desc[optype]);
1105-
}
1106-
len = leftlen;
1107-
}
1108-
1109-
Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
1110-
DEPRECATED_ABOVE_FF_MSG, PL_op_desc[optype]);
1091+
Perl_croak(aTHX_ FATAL_ABOVE_FF_MSG, PL_op_desc[optype]);
11111092
}
11121093
else { /* Neither is UTF-8 */
11131094
len = MIN(leftlen, rightlen);

pod/perldeprecation.pod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,14 +65,14 @@ nonsensical. Some instances of these have been deprecated since Perl
6565
the wide characters did not affect the end result, no deprecation
6666
notice was raised, and so remain legal. Now, all occurrences either are
6767
fatal or raise a deprecation warning, so that the remaining legal
68-
occurrences will be fatal in 5.32.
68+
occurrences became fatal in 5.32.
6969

7070
An example of this is
7171

7272
"" & "\x{100}"
7373

7474
The wide character is not used in the C<&> operation because the left
75-
operand is shorter. This now warns anyway.
75+
operand is shorter. This now throws an exception.
7676

7777
=head3 hostname() doesn't accept any arguments
7878

pod/perldiag.pod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7338,7 +7338,7 @@ C<~>) on a string containing a code point over 0xFF. The string bitwise
73387338
operators treat their operands as strings of bytes, and values beyond
73397339
0xFF are nonsensical in this context.
73407340

7341-
This became fatal in Perl 5.28.
7341+
Certain instances became fatal in Perl 5.28; others in perl 5.32.
73427342

73437343
=item Use of strings with code points over 0xFF as arguments to vec is forbidden
73447344

t/op/bop.t

Lines changed: 26 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ BEGIN {
1818
# If you find tests are failing, please try adding names to tests to track
1919
# down where the failure is, and supply your new names as a patch.
2020
# (Just-in-time test naming)
21-
plan tests => 504;
21+
plan tests => 491;
2222

2323
# numerics
2424
ok ((0xdead & 0xbeef) == 0x9ead);
@@ -613,73 +613,30 @@ foreach my $op_info ([and => "&"], [or => "|"], [xor => "^"]) {
613613
}
614614
615615
{
616-
# Since these are temporary, and it was a pain to make them into loops,
617-
# the code is just rolled out.
618-
local $SIG{__WARN__} = sub { push @warnings, @_; };
619-
620-
undef @warnings;
621-
is("abc" & "abc\x{100}", "abc", '"abc" & "abc\x{100}" works');
622-
if (! is(@warnings, 1, "... but returned a single warning")) {
623-
diag join "\n", @warnings;
624-
}
625-
like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
626-
)arguments to bitwise and \(&\) operator (?#
627-
)is deprecated/,
628-
"... which is the expected warning");
629-
undef @warnings;
630-
is("abc" | "abc\x{100}", "abc\x{100}", '"abc" | "abc\x{100}" works');
631-
if (! is(@warnings, 1, "... but returned a single warning")) {
632-
diag join "\n", @warnings;
633-
}
634-
like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
635-
)arguments to bitwise or \(|\) operator (?#
636-
)is deprecated/,
637-
"... which is the expected warning");
638-
undef @warnings;
639-
is("abc" ^ "abc\x{100}", "\0\0\0\x{100}", '"abc" ^ "abc\x{100}" works');
640-
if (! is(@warnings, 1, "... but returned a single warning")) {
641-
diag join "\n", @warnings;
642-
}
643-
like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
644-
)arguments to bitwise xor \(\^\) operator (?#
645-
)is deprecated/,
646-
"... which is the expected warning");
647-
undef @warnings;
648-
is("abc\x{100}" & "abc", "abc", '"abc\x{100}" & "abc" works');
649-
if (! is(@warnings, 1, "... but returned a single warning")) {
650-
diag join "\n", @warnings;
651-
}
652-
like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
653-
)arguments to bitwise and \(&\) operator (?#
654-
)is deprecated/,
655-
"... which is the expected warning");
656-
undef @warnings;
657-
is("abc\x{100}" | "abc", "abc\x{100}", '"abc\x{100}" | "abc" works');
658-
if (! is(@warnings, 1, "... but returned a single warning")) {
659-
diag join "\n", @warnings;
660-
}
661-
like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
662-
)arguments to bitwise or \(|\) operator (?#
663-
)is deprecated/,
664-
"... which is the expected warning");
665-
undef @warnings;
666-
is("abc\x{100}" ^ "abc", "\0\0\0\x{100}", '"abc\x{100}" ^ "abc" works');
667-
if (! is(@warnings, 1, "... but returned a single warning")) {
668-
diag join "\n", @warnings;
669-
}
670-
like ($warnings[0], qr /^Use of strings with code points over 0xFF as (?#
671-
)arguments to bitwise xor \(\^\) operator (?#
672-
)is deprecated/,
673-
"... which is the expected warning");
674-
no warnings 'deprecated';
675-
undef @warnings;
676-
my $foo = "abc" & "abc\x{100}";
677-
$foo = "abc" | "abc\x{100}";
678-
$foo = "abc" ^ "abc\x{100}";
679-
$foo = "abc\x{100}" & "abc";
680-
$foo = "abc\x{100}" | "abc";
681-
$foo = "abc\x{100}" ^ "abc";
682-
if (! is(@warnings, 0, "... And none of the last 6 main tests warns when 'deprecated' is off")) {
683-
diag join "\n", @warnings;
616+
# RT 134140 fatalizations
617+
my %op_pairs = (
618+
and => { low => 'and', high => '&', regex => qr/&/ },
619+
or => { low => 'or', high => '|', regex => qr/\|/ },
620+
xor => { low => 'xor', high => '^', regex => qr/\^/ },
621+
);
622+
my @combos = (
623+
{ string => '"abc" & "abc\x{100}"', op_pair => $op_pairs{and} },
624+
{ string => '"abc" | "abc\x{100}"', op_pair => $op_pairs{or} },
625+
{ string => '"abc" ^ "abc\x{100}"', op_pair => $op_pairs{xor} },
626+
{ string => '"abc\x{100}" & "abc"', op_pair => $op_pairs{and} },
627+
{ string => '"abc\x{100}" | "abc"', op_pair => $op_pairs{or} },
628+
{ string => '"abc\x{100}" ^ "abc"', op_pair => $op_pairs{xor} },
629+
630+
);
631+
632+
# Use of strings with code points over 0xFF as arguments to %s operator is not allowed
633+
for my $h (@combos) {
634+
my $s1 = "Use of strings with code points over 0xFF as arguments to bitwise";
635+
my $s2 = "operator is not allowed";
636+
my $expected = qr/$s1 $h->{op_pair}->{low} \($h->{op_pair}->{regex}\) $s2/;
637+
my $description = "$s1 $h->{op_pair}->{low} ($h->{op_pair}->{high}) operator is not allowed";
638+
local $@;
639+
eval $h->{string};
640+
like $@, $expected, $description;
684641
}
685642
}

0 commit comments

Comments
 (0)