Skip to content

malformed utf8 message: small cleanups #21616

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions doop.c
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ S_do_trans_count_invmap(pTHX_ SV * const sv, AV * const invmap)
else {
from = utf8_to_uvchr_buf(s, send, &s_len);
if (from == 0 && *s != '\0') {
_force_out_malformed_utf8_message(s, send, 0, /*die*/TRUE);
_force_out_malformed_utf8_message(s, send, 0, MALFORMED_UTF8_DIE);
}
}

Expand Down Expand Up @@ -492,7 +492,7 @@ S_do_trans_invmap(pTHX_ SV * const sv, AV * const invmap)
else {
from = utf8_to_uvchr_buf(s, send, &s_len);
if (from == 0 && *s != '\0') {
_force_out_malformed_utf8_message(s, send, 0, /*die*/TRUE);
_force_out_malformed_utf8_message(s, send, 0, MALFORMED_UTF8_DIE);
}
}

Expand Down
14 changes: 7 additions & 7 deletions handy.h
Original file line number Diff line number Diff line change
Expand Up @@ -1436,7 +1436,7 @@ or casts
/* Likewise, this is effectively a static assert to be used to guarantee the
* parameter is a pointer
*
* NOT suitable for void*
* NOT suitable for void*
*/
#define ASSERT_IS_PTR(x) (__ASSERT_(sizeof(*(x))) (x))

Expand Down Expand Up @@ -2276,15 +2276,15 @@ END_EXTERN_C

#define generic_utf8_safe_(classnum, p, e, above_latin1) \
((! _utf8_safe_assert(p, e)) \
? (_force_out_malformed_utf8_message((U8 *) (p), (U8 *) (e), 0, 1), 0)\
? (_force_out_malformed_utf8_message((U8 *) (p), (U8 *) (e), 0, MALFORMED_UTF8_DIE), 0)\
: (UTF8_IS_INVARIANT(*(p))) \
? generic_isCC_(*(p), classnum) \
: (UTF8_IS_DOWNGRADEABLE_START(*(p)) \
? ((LIKELY((e) - (p) > 1 && UTF8_IS_CONTINUATION(*((p)+1)))) \
? generic_isCC_(EIGHT_BIT_UTF8_TO_NATIVE(*(p), *((p)+1 )), \
classnum) \
: (_force_out_malformed_utf8_message( \
(U8 *) (p), (U8 *) (e), 0, 1), 0)) \
(U8 *) (p), (U8 *) (e), 0, MALFORMED_UTF8_DIE), 0)) \
: above_latin1))
/* Like the above, but calls 'above_latin1(p)' to get the utf8 value.
* 'above_latin1' can be a macro */
Expand All @@ -2294,7 +2294,7 @@ END_EXTERN_C
generic_utf8_safe_(classnum, p, e, \
(UNLIKELY((e) - (p) < UTF8SKIP(p)) \
? (_force_out_malformed_utf8_message( \
(U8 *) (p), (U8 *) (e), 0, 1), 0) \
(U8 *) (p), (U8 *) (e), 0, MALFORMED_UTF8_DIE), 0) \
: above_latin1(p)))
/* Like the above, but passes classnum to _isFOO_utf8(), instead of having an
* 'above_latin1' parameter */
Expand Down Expand Up @@ -2384,7 +2384,7 @@ END_EXTERN_C
generic_utf8_safe_no_upper_latin1_(CC_XDIGIT_, p, e, \
(UNLIKELY((e) - (p) < UTF8SKIP(p)) \
? (_force_out_malformed_utf8_message( \
(U8 *) (p), (U8 *) (e), 0, 1), 0) \
(U8 *) (p), (U8 *) (e), 0, MALFORMED_UTF8_DIE), 0) \
: is_XDIGIT_high(p)))

#define toFOLD_utf8(p,e,s,l) toFOLD_utf8_safe(p,e,s,l)
Expand Down Expand Up @@ -2433,7 +2433,7 @@ END_EXTERN_C
? ((LIKELY((e) - (p) > 1 && UTF8_IS_CONTINUATION(*((p)+1)))) \
? macro(EIGHT_BIT_UTF8_TO_NATIVE(*(p), *((p)+1))) \
: (_force_out_malformed_utf8_message( \
(U8 *) (p), (U8 *) (e), 0, 1), 0)) \
(U8 *) (p), (U8 *) (e), 0, MALFORMED_UTF8_DIE), 0)) \
: above_latin1))

#define generic_LC_invlist_utf8_safe_(macro, classnum, p, e) \
Expand All @@ -2447,7 +2447,7 @@ END_EXTERN_C
generic_LC_utf8_safe_(classnum, p, e, \
(UNLIKELY((e) - (p) < UTF8SKIP(p)) \
? (_force_out_malformed_utf8_message( \
(U8 *) (p), (U8 *) (e), 0, 1), 0) \
(U8 *) (p), (U8 *) (e), 0, MALFORMED_UTF8_DIE), 0) \
: above_latin1(p)))

#define isALPHANUMERIC_LC_utf8_safe(p, e) \
Expand Down
6 changes: 3 additions & 3 deletions pp_pack.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
#define PERL_IN_PP_PACK_C
#include "perl.h"

/* Types used by pack/unpack */
/* Types used by pack/unpack */
typedef enum {
e_no_len, /* no length */
e_number, /* number, [] */
Expand All @@ -48,7 +48,7 @@ typedef struct tempsym {
U32 flags; /* /=4, comma=2, pack=1 */
/* and group modifiers */
SSize_t length; /* length/repeat count */
howlen_t howlen; /* how length is given */
howlen_t howlen; /* how length is given */
int level; /* () nesting level */
STRLEN strbeg; /* offset of group start */
struct tempsym *previous; /* previous group */
Expand Down Expand Up @@ -3167,7 +3167,7 @@ PP_wrapped(pp_pack, 0, 1)
_force_out_malformed_utf8_message(error_pos,
(U8 *) result + result_len,
0, /* no flags */
1 /* Die */
MALFORMED_UTF8_DIE
);
NOT_REACHED; /* NOTREACHED */
}
Expand Down
4 changes: 2 additions & 2 deletions regexec.c
Original file line number Diff line number Diff line change
Expand Up @@ -8041,7 +8041,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
break;
}
} while(n);

if (!n) /* this means there is nothing that matched */
sayNO;
}
Expand Down Expand Up @@ -10921,7 +10921,7 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
if (c_len == (STRLEN)-1) {
_force_out_malformed_utf8_message(p, p_end,
utf8n_flags,
1 /* 1 means die */ );
MALFORMED_UTF8_DIE);
NOT_REACHED; /* NOTREACHED */
}
if ( c > 255
Expand Down
10 changes: 5 additions & 5 deletions toke.c
Original file line number Diff line number Diff line change
Expand Up @@ -972,7 +972,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
_force_out_malformed_utf8_message(first_bad_char_loc,
(U8 *) s + SvCUR(line),
0,
1 /* 1 means die */ );
MALFORMED_UTF8_DIE);
NOT_REACHED; /* NOTREACHED */
}
else if (ONLY_ASCII && UNLIKELY(! is_ascii_string_loc(
Expand Down Expand Up @@ -1589,7 +1589,7 @@ Perl_lex_next_chunk(pTHX_ U32 flags)
_force_out_malformed_utf8_message(first_bad_char_loc,
(U8 *) PL_parser->bufend,
0,
1 /* 1 means die */ );
MALFORMED_UTF8_DIE);
NOT_REACHED; /* NOTREACHED */
}
}
Expand Down Expand Up @@ -1679,7 +1679,7 @@ Perl_lex_peek_unichar(pTHX_ U32 flags)
_force_out_malformed_utf8_message((U8 *) s,
(U8 *) bufend,
0,
1 /* 1 means die */ );
MALFORMED_UTF8_DIE);
NOT_REACHED; /* NOTREACHED */
}
return unichar;
Expand Down Expand Up @@ -3058,7 +3058,7 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
_force_out_malformed_utf8_message(first_bad_char_loc,
(U8 *) PL_parser->bufend,
0,
0 /* 0 means don't die */ );
MALFORMED_UTF8_WARN);
/* diag_listed_as: Malformed UTF-8 returned by \N{%s}
immediately after '%s' */
*error_msg = Perl_form(aTHX_
Expand Down Expand Up @@ -9656,7 +9656,7 @@ Perl_yylex(pTHX)
_force_out_malformed_utf8_message(first_bad_char_loc,
(U8 *) PL_bufend,
0,
1 /* 1 means die */ );
MALFORMED_UTF8_DIE);
NOT_REACHED; /* NOTREACHED */
}
else if (ONLY_ASCII && UNLIKELY(! is_ascii_string_loc(
Expand Down
4 changes: 2 additions & 2 deletions utf8.c
Original file line number Diff line number Diff line change
Expand Up @@ -3289,7 +3289,7 @@ S_is_utf8_common(pTHX_ const U8 *const p, const U8 * const e,
PERL_ARGS_ASSERT_IS_UTF8_COMMON;

if (cp == 0 && (p >= e || *p != '\0')) {
_force_out_malformed_utf8_message(p, e, 0, 1);
_force_out_malformed_utf8_message(p, e, 0, MALFORMED_UTF8_DIE);
NOT_REACHED; /* NOTREACHED */
}

Expand Down Expand Up @@ -3834,7 +3834,7 @@ S_turkic_uc(pTHX_ const U8 * const p, const U8 * const e,
STRLEN len_result; \
result = utf8n_to_uvchr(p, e - p, &len_result, UTF8_CHECK_ONLY); \
if (len_result == (STRLEN) -1) { \
_force_out_malformed_utf8_message(p, e, 0, 1 /* Die */ ); \
_force_out_malformed_utf8_message(p, e, 0, MALFORMED_UTF8_DIE ); \
}

#define CASE_CHANGE_BODY_END(locale_flags, change_macro) \
Expand Down
3 changes: 3 additions & 0 deletions utf8.h
Original file line number Diff line number Diff line change
Expand Up @@ -1304,6 +1304,9 @@ point's representation.
* retained solely for backwards compatibility */
#define IS_UTF8_CHAR(p, n) (isUTF8_CHAR(p, (p) + (n)) == n)

#define MALFORMED_UTF8_DIE TRUE
#define MALFORMED_UTF8_WARN FALSE

#endif /* PERL_UTF8_H_ */

/*
Expand Down