Skip to content

Commit c012444

Browse files
esertergs
authored andcommitted
Another regexp failure with utf8-flagged string and byte-flagged pattern (reminder)
Date: 17 Nov 2007 16:29:29 +0100 Message-ID: <[email protected]>
1 parent bd2db5d commit c012444

File tree

2 files changed

+16
-3
lines changed

2 files changed

+16
-3
lines changed

regexec.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1007,15 +1007,16 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
10071007

10081008
#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, \
10091009
uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \
1010+
UV uvc_unfolded = 0; \
10101011
switch (trie_type) { \
10111012
case trie_utf8_fold: \
10121013
if ( foldlen>0 ) { \
1013-
uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
1014+
uvc_unfolded = uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
10141015
foldlen -= len; \
10151016
uscan += len; \
10161017
len=0; \
10171018
} else { \
1018-
uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
1019+
uvc_unfolded = uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
10191020
uvc = to_uni_fold( uvc, foldbuf, &foldlen ); \
10201021
foldlen -= UNISKIP( uvc ); \
10211022
uscan = foldbuf + UNISKIP( uvc ); \
@@ -1054,6 +1055,9 @@ uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \
10541055
charid = (U16)SvIV(*svpp); \
10551056
} \
10561057
} \
1058+
if (!charid && trie_type == trie_utf8_fold && !UTF) { \
1059+
charid = trie->charmap[uvc_unfolded]; \
1060+
} \
10571061
} STMT_END
10581062

10591063
#define REXEC_FBC_EXACTISH_CHECK(CoNd) \

t/op/pat.t

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ sub run_tests;
1313

1414
$| = 1;
1515

16-
my $EXPECTED_TESTS = 3865; # Update this when adding/deleting tests.
16+
my $EXPECTED_TESTS = 3961; # Update this when adding/deleting tests.
1717

1818
BEGIN {
1919
chdir 't' if -d 't';
@@ -3896,6 +3896,15 @@ sub run_tests {
38963896
iseq $1, "\xd6", "Upgrade error";
38973897
}
38983898

3899+
{
3900+
# more TRIE/AHOCORASICK problems with mixed utf8 / latin-1 and case folding
3901+
for my $chr (160 .. 255) {
3902+
my $chr_byte = chr($chr);
3903+
my $chr_utf8 = chr($chr); utf8::upgrade($chr_utf8);
3904+
my $rx = qr{$chr_byte|X}i;
3905+
ok($chr_utf8 =~ $rx, "utf8/latin, codepoint $chr");
3906+
}
3907+
}
38993908

39003909
{
39013910
# Regardless of utf8ness any character matches itself when

0 commit comments

Comments
 (0)