@@ -2198,6 +2198,15 @@ sub trace { return main::trace(@_); }
2198
2198
# 'handler'
2199
2199
main::set_access('each_line_handler', \%each_line_handler, 'c');
2200
2200
2201
+ my %retain_trailing_comments;
2202
+ # This is used to not discard the comments that end data lines. This
2203
+ # would be used only for files with non-typical syntax, and most code here
2204
+ # assumes that comments have been stripped, so special handlers would have
2205
+ # to be written. It is assumed that the code will use these in
2206
+ # single-quoted contexts, and so any "'" marks in the comment will be
2207
+ # prefixed by a backslash.
2208
+ main::set_access('retain_trailing_comments', \%retain_trailing_comments, 'c');
2209
+
2201
2210
my %properties; # Optional ordered list of the properties that occur in each
2202
2211
# meaningful line of the input file. If present, an appropriate
2203
2212
# each_line_handler() is automatically generated and pushed onto the stack
@@ -2355,6 +2364,7 @@ sub trace { return main::trace(@_); }
2355
2364
2356
2365
# Set defaults
2357
2366
$handler{$addr} = \&main::process_generic_property_file;
2367
+ $retain_trailing_comments{$addr} = 0;
2358
2368
$non_skip{$addr} = 0;
2359
2369
$skip{$addr} = undef;
2360
2370
$has_missings_defaults{$addr} = $NO_DEFAULTS;
@@ -3020,9 +3030,21 @@ END
3020
3030
next;
3021
3031
}
3022
3032
3023
- # Remove comments and trailing space, and skip this line if the
3024
- # result is empty
3025
- s/#.*//;
3033
+ # Unless to keep, remove comments. If to keep, ignore
3034
+ # comment-only lines
3035
+ if ($retain_trailing_comments{$addr}) {
3036
+ next if / ^ \s* \# /x;
3037
+
3038
+ # But escape any single quotes (done in both the comment and
3039
+ # non-comment portion; this could be a bug someday, but not
3040
+ # likely)
3041
+ s/'/\\'/g;
3042
+ }
3043
+ else {
3044
+ s/#.*//;
3045
+ }
3046
+
3047
+ # Remove trailing space, and skip this line if the result is empty
3026
3048
s/\s+$//;
3027
3049
next if /^$/;
3028
3050
@@ -19188,18 +19210,21 @@ my @input_file_objects = (
19188
19210
),
19189
19211
Input_file->new("$AUXILIARY/GCBTest.txt", v4.1.0,
19190
19212
Handler => \&process_GCB_test,
19213
+ retain_trailing_comments => 1,
19191
19214
),
19192
19215
Input_file->new("$AUXILIARY/GraphemeBreakTest.html", v4.1.0,
19193
19216
Skip => $Validation_Documentation,
19194
19217
),
19195
19218
Input_file->new("$AUXILIARY/SBTest.txt", v4.1.0,
19196
19219
Handler => \&process_SB_test,
19220
+ retain_trailing_comments => 1,
19197
19221
),
19198
19222
Input_file->new("$AUXILIARY/SentenceBreakTest.html", v4.1.0,
19199
19223
Skip => $Validation_Documentation,
19200
19224
),
19201
19225
Input_file->new("$AUXILIARY/WBTest.txt", v4.1.0,
19202
19226
Handler => \&process_WB_test,
19227
+ retain_trailing_comments => 1,
19203
19228
),
19204
19229
Input_file->new("$AUXILIARY/WordBreakTest.html", v4.1.0,
19205
19230
Skip => $Validation_Documentation,
@@ -19250,6 +19275,7 @@ my @input_file_objects = (
19250
19275
),
19251
19276
Input_file->new("$AUXILIARY/LBTest.txt", v5.1.0,
19252
19277
Handler => \&process_LB_test,
19278
+ retain_trailing_comments => 1,
19253
19279
),
19254
19280
Input_file->new("$AUXILIARY/LineBreakTest.html", v5.1.0,
19255
19281
Skip => $Validation_Documentation,
@@ -19842,6 +19868,15 @@ sub _test_break($$) {
19842
19868
my $break_type = shift;
19843
19869
19844
19870
my $line = (caller 1)[2]; # Line number
19871
+ my $comment = "";
19872
+
19873
+ if ($template =~ / ( .*? ) \s* \# (.*) /x) {
19874
+ $template = $1;
19875
+ $comment = $2;
19876
+
19877
+ # Replace leading spaces with a single one.
19878
+ $comment =~ s/ ^ \s* / # /x;
19879
+ }
19845
19880
19846
19881
# The line contains characters above the ASCII range, but in Latin1. It
19847
19882
# may or may not be in utf8, and if it is, it may or may not know it. So,
@@ -19985,7 +20020,10 @@ sub _test_break($$) {
19985
20020
19986
20021
# Fancy display of test results
19987
20022
$matched = ($matched) ? "matched" : "failed to match";
19988
- print "ok ", ++$Tests, " - \"$display_string\" $matched /$pattern/$display_upgrade; line $line $display_locale\n";
20023
+ print "ok ", ++$Tests, " - \"$display_string\" $matched /$pattern/$display_upgrade; line $line $display_locale$comment\n";
20024
+
20025
+ # Only print the comment on the first use of this line
20026
+ $comment = "";
19989
20027
19990
20028
# Repeat with the first \B{} in the pattern. This makes sure the
19991
20029
# code in regexec.c:find_byclass() for \B gets executed
0 commit comments