From 22e39bae4f2b3d4e23a8e0e80541298a30d08802 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Wed, 4 Dec 2024 07:00:32 -0700 Subject: [PATCH] utf8_to_uv_msgs: Move decls and inits closer to first use C99 allows us to declare anywhere; so move these to where its more logical. It also makes sure some variables are initialized before the goto that jumps to the end of the program, and which currently doesn't rely on these values, but could be changed to do so someday without the coder realizing it. This prevents a problem in case that happens. --- utf8.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/utf8.c b/utf8.c index c06c546ae7e0f..f1abd238cae34 100644 --- a/utf8.c +++ b/utf8.c @@ -1590,13 +1590,6 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0, { PERL_ARGS_ASSERT_UTF8_TO_UV_MSGS_HELPER_; - const U8 * s = s0; - - U32 possible_problems; /* A bit is set here for each potential problem - found as we go along */ - UV uv; - SSize_t expectlen; /* How long should this sequence be? */ - /* Here, is one of: * a) malformed; * b) a problematic code point (surrogate, non-unicode, or nonchar); or @@ -1635,9 +1628,6 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0, || UTF8_IS_NONCHAR(s0, e)); */ - s = s0; - possible_problems = 0; - expectlen = 0; if (errors) { *errors = 0; } @@ -1672,7 +1662,10 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0, * allowed one, we could allow in something that shouldn't have been. */ + SSize_t expectlen = 0; /* How long should this sequence be? */ SSize_t curlen = 0; /* How many bytes have we processed so far */ + UV uv = 0; /* The accumulated code point, so far */ + const U8 * s = s0; /* Our current position examining the sequence */ /* Gives how many bytes are available, which may turn out to be less than * the expected length */ @@ -1683,15 +1676,18 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0, * than a single character */ const U8 * send = e; + /* A bit is set here for each potential problem found as we go along */ + U32 possible_problems = 0; + + /* The above variables have to be initialized before the 'goto' */ + if (UNLIKELY(avail_len <= 0)) { possible_problems |= UTF8_GOT_EMPTY; goto ready_to_handle_errors; } - /* We now know we can examine the first byte of the input */ - expectlen = UTF8SKIP(s0); - - /* A continuation character can't start a valid sequence */ + /* We now know we can examine the first byte of the input. A continuation + * character can't start a valid sequence */ if (UNLIKELY(UTF8_IS_CONTINUATION(*s0))) { possible_problems |= UTF8_GOT_CONTINUATION; curlen = 1; @@ -1707,6 +1703,8 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0, * to check for sure because it excludes start bytes like \xC0 that always * lead to overlongs.) */ + expectlen = UTF8SKIP(s0); /* How long should this sequence be? */ + /* Convert to I8 on EBCDIC (no-op on ASCII), then remove the leading bits * that indicate the number of bytes in the character's whole UTF-8 * sequence, leaving just the bits that are part of the value. */