From da982b4f001ee1b4bf07a2a0b583fa4d1d17d0a3 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 4 Oct 2022 20:39:02 -0600 Subject: [PATCH 01/12] Add wrap_wsetlocale() to embed.fnc This makes the calls to it cleaner. --- embed.fnc | 2 ++ embed.h | 1 + locale.c | 8 +++++--- proto.h | 2 ++ 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/embed.fnc b/embed.fnc index 201b2f37b77d..0e66ce458253 100644 --- a/embed.fnc +++ b/embed.fnc @@ -3425,6 +3425,8 @@ S |void |print_collxfrm_input_and_return \ S |char* |win32_setlocale|int category|NULLOK const char* locale pTC |wchar_t *|Win_utf8_string_to_wstring|NULLOK const char * utf8_string pTC |char * |Win_wstring_to_utf8_string|NULLOK const wchar_t * wstring +S |char *|wrap_wsetlocale |const int category \ + |NULLOK const char *locale # endif # if defined(HAS_NL_LANGINFO) || defined(HAS_NL_LANGINFO_L) S |const char*|my_langinfo_i|const nl_item item \ diff --git a/embed.h b/embed.h index 552148ba7226..9828d4083be2 100644 --- a/embed.h +++ b/embed.h @@ -1782,6 +1782,7 @@ # endif # if defined(WIN32) #define win32_setlocale(a,b) S_win32_setlocale(aTHX_ a,b) +#define wrap_wsetlocale(a,b) S_wrap_wsetlocale(aTHX_ a,b) # endif # endif # endif diff --git a/locale.c b/locale.c index ef5bfa083f73..b64394c48ed7 100644 --- a/locale.c +++ b/locale.c @@ -2500,6 +2500,8 @@ Perl_Win_wstring_to_utf8_string(const wchar_t * wstring) STATIC char * S_wrap_wsetlocale(pTHX_ int category, const char *locale) { + PERL_ARGS_ASSERT_WRAP_WSETLOCALE; + wchar_t *wlocale = NULL; wchar_t *wresult; char *result; @@ -2587,7 +2589,7 @@ S_win32_setlocale(pTHX_ int category, const char* locale) } #ifdef USE_WSETLOCALE - result = S_wrap_wsetlocale(aTHX_ category, locale); + result = wrap_wsetlocale(category, locale); #else result = setlocale(category, locale); #endif @@ -2608,7 +2610,7 @@ S_win32_setlocale(pTHX_ int category, const char* locale) result = PerlEnv_getenv(category_names[i]); if (result && strNE(result, "")) { #ifdef USE_WSETLOCALE - S_wrap_wsetlocale(aTHX_ categories[i], result); + wrap_wsetlocale(categories[i], result); #else setlocale(categories[i], result); #endif @@ -5050,7 +5052,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn) * use wrap_wsetlocale(). */ const char *system_default_locale = stdize_locale(LC_ALL, - S_wrap_wsetlocale(aTHX_ LC_ALL, ""), + wrap_wsetlocale(LC_ALL, ""), &PL_stdize_locale_buf, &PL_stdize_locale_bufsize, __LINE__); diff --git a/proto.h b/proto.h index 06b6710ee548..7af4b8952bea 100644 --- a/proto.h +++ b/proto.h @@ -5767,6 +5767,8 @@ PERL_CALLCONV char * Perl_Win_wstring_to_utf8_string(const wchar_t * wstring); #define PERL_ARGS_ASSERT_WIN_WSTRING_TO_UTF8_STRING STATIC char* S_win32_setlocale(pTHX_ int category, const char* locale); #define PERL_ARGS_ASSERT_WIN32_SETLOCALE +STATIC char * S_wrap_wsetlocale(pTHX_ const int category, const char *locale); +#define PERL_ARGS_ASSERT_WRAP_WSETLOCALE # endif # endif #endif From 9eb675372f59b3cef94994e9aceb0a1607950acc Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Wed, 5 Oct 2022 06:35:19 -0600 Subject: [PATCH 02/12] locale.c: Move find_locale_from_environment() in file This is in preparation for this function to be used under more circumstances. --- embed.fnc | 4 +- embed.h | 4 +- locale.c | 159 +++++++++++++++++++++++++++--------------------------- proto.h | 6 ++- 4 files changed, 89 insertions(+), 84 deletions(-) diff --git a/embed.fnc b/embed.fnc index 0e66ce458253..0e663f02b2f5 100644 --- a/embed.fnc +++ b/embed.fnc @@ -3381,7 +3381,6 @@ S |const char *|setlocale_from_aggregate_LC_ALL \ S |const char*|update_PL_curlocales_i|const unsigned int index \ |NN const char * new_locale \ |recalc_lc_all_t recalc_LC_ALL -S |const char *|find_locale_from_environment|const unsigned int index # endif # else # if defined(USE_LOCALE_THREADS) \ @@ -3402,6 +3401,9 @@ S |void |less_dicey_void_setlocale_i \ |const line_t line # endif # endif +# if defined(USE_POSIX_2008_LOCALE) && ! defined(USE_QUERYLOCALE) +S |const char *|find_locale_from_environment|const unsigned int index +# endif # if defined(USE_POSIX_2008_LOCALE) && defined(USE_QUERYLOCALE) S |const char *|calculate_LC_ALL|const locale_t cur_obj # else diff --git a/embed.h b/embed.h index 9828d4083be2..2043e7139265 100644 --- a/embed.h +++ b/embed.h @@ -1598,7 +1598,6 @@ # if defined(PERL_IN_LOCALE_C) # if defined(USE_LOCALE) # if defined(USE_POSIX_2008_LOCALE) -#define find_locale_from_environment(a) S_find_locale_from_environment(aTHX_ a) #define update_PL_curlocales_i(a,b,c) S_update_PL_curlocales_i(aTHX_ a,b,c) # endif # endif @@ -1777,6 +1776,9 @@ #define setlocale_from_aggregate_LC_ALL(a,b) S_setlocale_from_aggregate_LC_ALL(aTHX_ a,b) #define use_curlocale_scratch() S_use_curlocale_scratch(aTHX) # endif +# if defined(USE_POSIX_2008_LOCALE) && ! defined(USE_QUERYLOCALE) +#define find_locale_from_environment(a) S_find_locale_from_environment(aTHX_ a) +# endif # if defined(USE_POSIX_2008_LOCALE) && defined(USE_QUERYLOCALE) #define calculate_LC_ALL(a) S_calculate_LC_ALL(aTHX_ a) # endif diff --git a/locale.c b/locale.c index b64394c48ed7..345254a4bfa0 100644 --- a/locale.c +++ b/locale.c @@ -1051,86 +1051,6 @@ S_setlocale_from_aggregate_LC_ALL(pTHX_ const char * locale, const line_t line) return retval; } -# ifndef USE_QUERYLOCALE - -STATIC const char * -S_find_locale_from_environment(pTHX_ const unsigned int index) -{ - /* On systems without querylocale(), it is problematic getting the results - * of the POSIX 2008 equivalent of setlocale(category, "") (which gets the - * locale from the environment). - * - * To ensure that we know exactly what those values are, we do the setting - * ourselves, using the documented algorithm (assuming the documentation is - * correct) rather than use "" as the locale. This will lead to results - * that differ from native behavior if the native behavior differs from the - * standard documented value, but khw believes it is better to know what's - * going on, even if different from native, than to just guess. - * - * Another option would be, in a critical section, to save the global - * locale's current value, and do a straight setlocale(LC_ALL, ""). That - * would return our desired values, destroying the global locale's, which - * we would then restore. But that could cause races with any other thread - * that is using the global locale and isn't using the mutex. And, the - * only reason someone would have done that is because they are calling a - * library function, like in gtk, that calls setlocale(), and which can't - * be changed to use the mutex. That wouldn't be a problem if this were to - * be done before any threads had switched, say during perl construction - * time. But this code would still be needed for the general case. */ - - const char * default_name; - unsigned int i; - const char * locale_names[LC_ALL_INDEX_]; - - /* We rely on PerlEnv_getenv() returning a mortalized copy */ - const char * const lc_all = PerlEnv_getenv("LC_ALL"); - - /* Use any "LC_ALL" environment variable, as it overrides everything - * else. */ - if (lc_all && strNE(lc_all, "")) { - return lc_all; - } - - /* Otherwise, we need to dig deeper. Unless overridden, the default is - * the LANG environment variable; "C" if it doesn't exist. */ - default_name = PerlEnv_getenv("LANG"); - if (! default_name || strEQ(default_name, "")) { - default_name = "C"; - } - - /* If setting an individual category, use its corresponding value found in - * the environment, if any; otherwise use the default we already - * calculated. */ - if (index != LC_ALL_INDEX_) { - const char * const new_value = PerlEnv_getenv(category_names[index]); - - return (new_value && strNE(new_value, "")) - ? new_value - : default_name; - } - - /* Here, we are getting LC_ALL. Any categories that don't have a - * corresponding environment variable set should be set to 'default_name' - * - * Simply find the values for all categories, and call the function to - * compute LC_ALL. */ - for (i = 0; i < LC_ALL_INDEX_; i++) { - const char * const env_override = PerlEnv_getenv(category_names[i]); - - locale_names[i] = (env_override && strNE(env_override, "")) - ? env_override - : default_name; - - DEBUG_Lv(PerlIO_printf(Perl_debug_log, - "find_locale_from_environment i=%d, name=%s, locale=%s\n", - i, category_names[i], locale_names[i])); - } - - return calculate_LC_ALL(locale_names); -} - -# endif - STATIC const char * S_emulate_setlocale_i(pTHX_ @@ -1557,6 +1477,85 @@ S_stdize_locale(pTHX_ const int category, return retval; } +#if defined(USE_POSIX_2008_LOCALE) && ! defined(USE_QUERYLOCALE) + +STATIC const char * +S_find_locale_from_environment(pTHX_ const unsigned int index) +{ + /* On systems without querylocale(), it is problematic getting the results + * of the POSIX 2008 equivalent of setlocale(category, "") (which gets the + * locale from the environment). + * + * To ensure that we know exactly what those values are, we do the setting + * ourselves, using the documented algorithm (assuming the documentation is + * correct) rather than use "" as the locale. This will lead to results + * that differ from native behavior if the native behavior differs from the + * standard documented value, but khw believes it is better to know what's + * going on, even if different from native, than to just guess. + * + * Another option would be, in a critical section, to save the global + * locale's current value, and do a straight setlocale(LC_ALL, ""). That + * would return our desired values, destroying the global locale's, which + * we would then restore. But that could cause races with any other thread + * that is using the global locale and isn't using the mutex. And, the + * only reason someone would have done that is because they are calling a + * library function, like in gtk, that calls setlocale(), and which can't + * be changed to use the mutex. That wouldn't be a problem if this were to + * be done before any threads had switched, say during perl construction + * time. But this code would still be needed for the general case. */ + + const char * default_name; + unsigned int i; + const char * locale_names[LC_ALL_INDEX_]; + + /* We rely on PerlEnv_getenv() returning a mortalized copy */ + const char * const lc_all = PerlEnv_getenv("LC_ALL"); + + /* Use any "LC_ALL" environment variable, as it overrides everything + * else. */ + if (lc_all && strNE(lc_all, "")) { + return lc_all; + } + + /* Otherwise, we need to dig deeper. Unless overridden, the default is + * the LANG environment variable; "C" if it doesn't exist. */ + default_name = PerlEnv_getenv("LANG"); + if (! default_name || strEQ(default_name, "")) { + default_name = "C"; + } + + /* If setting an individual category, use its corresponding value found in + * the environment, if any; otherwise use the default we already + * calculated. */ + if (index != LC_ALL_INDEX_) { + const char * const new_value = PerlEnv_getenv(category_names[index]); + + return (new_value && strNE(new_value, "")) + ? new_value + : default_name; + } + + /* Here, we are getting LC_ALL. Any categories that don't have a + * corresponding environment variable set should be set to 'default_name' + * + * Simply find the values for all categories, and call the function to + * compute LC_ALL. */ + for (i = 0; i < LC_ALL_INDEX_; i++) { + const char * const env_override = PerlEnv_getenv(category_names[i]); + + locale_names[i] = (env_override && strNE(env_override, "")) + ? env_override + : default_name; + + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "find_locale_from_environment i=%d, name=%s, locale=%s\n", + i, category_names[i], locale_names[i])); + } + + return calculate_LC_ALL(locale_names); +} + +#endif #if defined(USE_POSIX_2008_LOCALE) || ! defined(LC_ALL) STATIC diff --git a/proto.h b/proto.h index 7af4b8952bea..eee1e17be6b6 100644 --- a/proto.h +++ b/proto.h @@ -5060,8 +5060,6 @@ STATIC void S_validate_suid(pTHX_ PerlIO *rsfp); # if defined(PERL_IN_LOCALE_C) # if defined(USE_LOCALE) # if defined(USE_POSIX_2008_LOCALE) -STATIC const char * S_find_locale_from_environment(pTHX_ const unsigned int index); -#define PERL_ARGS_ASSERT_FIND_LOCALE_FROM_ENVIRONMENT STATIC const char* S_update_PL_curlocales_i(pTHX_ const unsigned int index, const char * new_locale, recalc_lc_all_t recalc_LC_ALL); #define PERL_ARGS_ASSERT_UPDATE_PL_CURLOCALES_I \ assert(new_locale) @@ -5756,6 +5754,10 @@ STATIC const char * S_setlocale_from_aggregate_LC_ALL(pTHX_ const char * locale, STATIC locale_t S_use_curlocale_scratch(pTHX); #define PERL_ARGS_ASSERT_USE_CURLOCALE_SCRATCH # endif +# if defined(USE_POSIX_2008_LOCALE) && ! defined(USE_QUERYLOCALE) +STATIC const char * S_find_locale_from_environment(pTHX_ const unsigned int index); +#define PERL_ARGS_ASSERT_FIND_LOCALE_FROM_ENVIRONMENT +# endif # if defined(USE_POSIX_2008_LOCALE) && defined(USE_QUERYLOCALE) STATIC const char * S_calculate_LC_ALL(pTHX_ const locale_t cur_obj); #define PERL_ARGS_ASSERT_CALCULATE_LC_ALL From 18a494aabd0b46f65b9a7d6c9da70a05fd4acd6c Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Wed, 5 Oct 2022 09:46:25 -0600 Subject: [PATCH 03/12] locale.c: Refactor S_find_locale_from_environment() This changes this function a bit to make the next commit easier, which will extend the function to being usable from Windows. This also moves declarations closer to first use, as now allowed in C99. --- locale.c | 63 ++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 25 deletions(-) diff --git a/locale.c b/locale.c index 345254a4bfa0..372f7fb951ff 100644 --- a/locale.c +++ b/locale.c @@ -1504,10 +1504,6 @@ S_find_locale_from_environment(pTHX_ const unsigned int index) * be done before any threads had switched, say during perl construction * time. But this code would still be needed for the general case. */ - const char * default_name; - unsigned int i; - const char * locale_names[LC_ALL_INDEX_]; - /* We rely on PerlEnv_getenv() returning a mortalized copy */ const char * const lc_all = PerlEnv_getenv("LC_ALL"); @@ -1517,35 +1513,52 @@ S_find_locale_from_environment(pTHX_ const unsigned int index) return lc_all; } - /* Otherwise, we need to dig deeper. Unless overridden, the default is - * the LANG environment variable; "C" if it doesn't exist. */ - default_name = PerlEnv_getenv("LANG"); - if (! default_name || strEQ(default_name, "")) { - default_name = "C"; - } - /* If setting an individual category, use its corresponding value found in - * the environment, if any; otherwise use the default we already - * calculated. */ + * the environment, if any */ if (index != LC_ALL_INDEX_) { const char * const new_value = PerlEnv_getenv(category_names[index]); - return (new_value && strNE(new_value, "")) - ? new_value - : default_name; + if (new_value && strNE(new_value, "")) { + return new_value; + } + + /* If no corresponding environment variable, see if LANG exists. If + * so, use it. */ + const char * default_name = PerlEnv_getenv("LANG"); + if (default_name && strNE(default_name, "")) { + return default_name; + } + + /* If no LANG, use "C" */ + return "C"; } - /* Here, we are getting LC_ALL. Any categories that don't have a - * corresponding environment variable set should be set to 'default_name' - * - * Simply find the values for all categories, and call the function to - * compute LC_ALL. */ - for (i = 0; i < LC_ALL_INDEX_; i++) { + /* Here is LC_ALL, and no LC_ALL environment variable. LANG is used (or + * "C" if no LANG), but overridden for individual categories that have + * corresponding environment variables */ + const char * default_name = PerlEnv_getenv("LANG"); + + /* Convert "" to NULL to save conditionals in the loop below */ + if (default_name != NULL && strEQ(default_name, "")) { + default_name = NULL; + } + + /* Loop through all the individual categories, setting each to any + * corresponding environment variable; or to the default if none exists for + * the category */ + const char * locale_names[LC_ALL_INDEX_]; + for (unsigned i = 0; i < LC_ALL_INDEX_; i++) { const char * const env_override = PerlEnv_getenv(category_names[i]); - locale_names[i] = (env_override && strNE(env_override, "")) - ? env_override - : default_name; + if (env_override && strNE(env_override, "")) { + locale_names[i] = env_override; + } + else if (default_name) { + locale_names[i] = default_name; + } + else { + locale_names[i] = "C"; + } DEBUG_Lv(PerlIO_printf(Perl_debug_log, "find_locale_from_environment i=%d, name=%s, locale=%s\n", From 95a46288c3bf396c627cf8c6a8551ac249f0efc3 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 6 Oct 2022 07:39:22 -0600 Subject: [PATCH 04/12] locale.c: Meld two functions into one There is code in locale.c to emulate POSIX 'setlocale(foo, "")'. And there is separate code to emulate this on Windows. This commit collapses them, ensuring the same algorithm is used on both systems. --- embed.fnc | 5 +- embed.h | 8 +-- locale.c | 165 ++++++++++++++++++++++++------------------------------ proto.h | 10 ++-- 4 files changed, 85 insertions(+), 103 deletions(-) diff --git a/embed.fnc b/embed.fnc index 0e663f02b2f5..4edb6a507c5d 100644 --- a/embed.fnc +++ b/embed.fnc @@ -3401,14 +3401,15 @@ S |void |less_dicey_void_setlocale_i \ |const line_t line # endif # endif -# if defined(USE_POSIX_2008_LOCALE) && ! defined(USE_QUERYLOCALE) +# if defined(WIN32) || ( defined(USE_POSIX_2008_LOCALE) \ + && ! defined(USE_QUERYLOCALE)) S |const char *|find_locale_from_environment|const unsigned int index # endif # if defined(USE_POSIX_2008_LOCALE) && defined(USE_QUERYLOCALE) S |const char *|calculate_LC_ALL|const locale_t cur_obj # else : regen/embed.pl can't currently cope with 'elif' -# if defined(USE_POSIX_2008_LOCALE) || ! defined(LC_ALL) +# if defined(WIN32) || defined(USE_POSIX_2008_LOCALE) || ! defined(LC_ALL) S |const char *|calculate_LC_ALL|NN const char ** individ_locales # endif # endif diff --git a/embed.h b/embed.h index 2043e7139265..70be607047e2 100644 --- a/embed.h +++ b/embed.h @@ -1522,7 +1522,7 @@ # if !(defined(USE_POSIX_2008_LOCALE) && defined(USE_QUERYLOCALE)) # if defined(PERL_IN_LOCALE_C) # if defined(USE_LOCALE) -# if defined(USE_POSIX_2008_LOCALE) || ! defined(LC_ALL) +# if defined(WIN32) || defined(USE_POSIX_2008_LOCALE) || ! defined(LC_ALL) #define calculate_LC_ALL(a) S_calculate_LC_ALL(aTHX_ a) # endif # endif @@ -1776,9 +1776,6 @@ #define setlocale_from_aggregate_LC_ALL(a,b) S_setlocale_from_aggregate_LC_ALL(aTHX_ a,b) #define use_curlocale_scratch() S_use_curlocale_scratch(aTHX) # endif -# if defined(USE_POSIX_2008_LOCALE) && ! defined(USE_QUERYLOCALE) -#define find_locale_from_environment(a) S_find_locale_from_environment(aTHX_ a) -# endif # if defined(USE_POSIX_2008_LOCALE) && defined(USE_QUERYLOCALE) #define calculate_LC_ALL(a) S_calculate_LC_ALL(aTHX_ a) # endif @@ -1786,6 +1783,9 @@ #define win32_setlocale(a,b) S_win32_setlocale(aTHX_ a,b) #define wrap_wsetlocale(a,b) S_wrap_wsetlocale(aTHX_ a,b) # endif +# if defined(WIN32) || ( defined(USE_POSIX_2008_LOCALE) && ! defined(USE_QUERYLOCALE)) +#define find_locale_from_environment(a) S_find_locale_from_environment(aTHX_ a) +# endif # endif # endif # if defined(PERL_IN_MALLOC_C) diff --git a/locale.c b/locale.c index 372f7fb951ff..371c1ea4616c 100644 --- a/locale.c +++ b/locale.c @@ -1477,14 +1477,20 @@ S_stdize_locale(pTHX_ const int category, return retval; } -#if defined(USE_POSIX_2008_LOCALE) && ! defined(USE_QUERYLOCALE) +#if defined(WIN32) || ( defined(USE_POSIX_2008_LOCALE) \ + && ! defined(USE_QUERYLOCALE)) STATIC const char * S_find_locale_from_environment(pTHX_ const unsigned int index) { - /* On systems without querylocale(), it is problematic getting the results - * of the POSIX 2008 equivalent of setlocale(category, "") (which gets the - * locale from the environment). + /* On Windows systems, the concept of the POSIX ordering of environment + * variables is missing. To increase portability of programs across + * platforms, the POSIX ordering is emulated on Windows. + * + * And on POSIX 2008 systems without querylocale(), it is problematic + * getting the results of the POSIX 2008 equivalent of + * setlocale(category, "") + * (which gets the locale from the environment). * * To ensure that we know exactly what those values are, we do the setting * ourselves, using the documented algorithm (assuming the documentation is @@ -1493,16 +1499,27 @@ S_find_locale_from_environment(pTHX_ const unsigned int index) * standard documented value, but khw believes it is better to know what's * going on, even if different from native, than to just guess. * - * Another option would be, in a critical section, to save the global - * locale's current value, and do a straight setlocale(LC_ALL, ""). That - * would return our desired values, destroying the global locale's, which - * we would then restore. But that could cause races with any other thread - * that is using the global locale and isn't using the mutex. And, the - * only reason someone would have done that is because they are calling a - * library function, like in gtk, that calls setlocale(), and which can't - * be changed to use the mutex. That wouldn't be a problem if this were to - * be done before any threads had switched, say during perl construction - * time. But this code would still be needed for the general case. */ + * Another option for the POSIX 2008 case would be, in a critical section, + * to save the global locale's current value, and do a straight + * setlocale(LC_ALL, ""). That would return our desired values, destroying + * the global locale's, which we would then restore. But that could cause + * races with any other thread that is using the global locale and isn't + * using the mutex. And, the only reason someone would have done that is + * because they are calling a library function, like in gtk, that calls + * setlocale(), and which can't be changed to use the mutex. That wouldn't + * be a problem if this were to be done before any threads had switched, + * say during perl construction time. But this code would still be needed + * for the general case. + * + * The Windows and POSIX 2008 differ in that the ultimate fallback is "C" + * in POSIX, and is the system default locale in Windows. To get that + * system default value, we actually have to call setlocale() on Windows. + * Since this function doesn't actually change the locale, that means the + * locale must be saved and restored around the change. A critical section + * is used for this, but since Windows has long had per-thread locales, + * it's likely that will be a no-op. The function description could be + * changed to not guarantee that it is a read-only operation. + * */ /* We rely on PerlEnv_getenv() returning a mortalized copy */ const char * const lc_all = PerlEnv_getenv("LC_ALL"); @@ -1529,13 +1546,24 @@ S_find_locale_from_environment(pTHX_ const unsigned int index) return default_name; } - /* If no LANG, use "C" */ + /* If no LANG, use "C" on POSIX 2008, the system default on Windows */ +# ifndef WIN32 return "C"; +# else + SETLOCALE_LOCK; + const char * orginal = setlocale(categories[index], NULL); + const char * ret = wrap_wsetlocale(categories[index], ""); + setlocale(categories[index], original); + SETLOCALE_UNLOCK; + return ret; +# endif + } - /* Here is LC_ALL, and no LC_ALL environment variable. LANG is used (or - * "C" if no LANG), but overridden for individual categories that have - * corresponding environment variables */ + /* Here is LC_ALL, and no LC_ALL environment variable. LANG is used as a + * default, but overridden for individual categories that have + * corresponding environment variables. If no LANG exists, the default is + * "C" on POSIX 2008, or the system default for the category on Windows. */ const char * default_name = PerlEnv_getenv("LANG"); /* Convert "" to NULL to save conditionals in the loop below */ @@ -1557,7 +1585,17 @@ S_find_locale_from_environment(pTHX_ const unsigned int index) locale_names[i] = default_name; } else { + +# ifndef WIN32 locale_names[i] = "C"; +# else + SETLOCALE_LOCK; + const char * orginal = setlocale(categories[index], NULL); + locale_names[i] = wrap_wsetlocale(categories[index], ""); + setlocale(categories[index], original); + SETLOCALE_UNLOCK; +# endif + } DEBUG_Lv(PerlIO_printf(Perl_debug_log, @@ -1569,7 +1607,7 @@ S_find_locale_from_environment(pTHX_ const unsigned int index) } #endif -#if defined(USE_POSIX_2008_LOCALE) || ! defined(LC_ALL) +#if defined(WIN32) || defined(USE_POSIX_2008_LOCALE) || ! defined(LC_ALL) STATIC const char * @@ -1587,8 +1625,11 @@ S_calculate_LC_ALL(pTHX_ const char ** individ_locales) * data, which is either a locale_t object, for systems with querylocale(), * or an array we keep updated to the proper values, otherwise. * - * This returns a mortalized string containing the locale name(s) of - * LC_ALL. + * For Windows, we also may need to construct an LC_ALL when setting the + * locale to the system default. + * + * This function returns a mortalized string containing the locale name(s) + * of LC_ALL. * * If all individual categories are the same locale, we can just set LC_ALL * to that locale. But if not, we have to create an aggregation of all the @@ -1596,12 +1637,15 @@ S_calculate_LC_ALL(pTHX_ const char ** individ_locales) * for these non-uniform locales for LC_ALL. Some use a '/' or other * delimiter of the locales with a predetermined order of categories; a * Configure probe would be needed to tell us how to decipher those. glibc - * uses a series of name=value pairs, like + * and Windows use a series of name=value pairs, like * LC_NUMERIC=C;LC_TIME=en_US.UTF-8;... - * The syntax we use for our aggregation doesn't much matter, as we take - * care not to use the native setlocale() function on whatever style is - * chosen. But, it would be possible for someone to call Perl_setlocale() - * using a native style we don't understand. So far no one has complained. + * This function returns that syntax, which is suitable for input to the + * Windows setlocale(). It could also be suitable for glibc, but because + * the non-Windows code is common to systems that use a different syntax, + * we don't depend on it for glibc. Instead we take care not to use the + * native setlocale() function on whatever non-Windows style is chosen. + * But, it would be possible for someone to call Perl_setlocale() using a + * native style we don't understand. So far no one has complained. * * For systems that have categories we don't know about, the algorithm * below won't know about those missing categories, leading to potential @@ -2560,81 +2604,18 @@ S_win32_setlocale(pTHX_ int category, const char* locale) * use the particular category's variable if set; otherwise to use the LANG * variable. */ - bool override_LC_ALL = FALSE; - char * result; - unsigned int i; if (locale && strEQ(locale, "")) { - -# ifdef LC_ALL - - locale = PerlEnv_getenv("LC_ALL"); - if (! locale) { - if (category == LC_ALL) { - override_LC_ALL = TRUE; - } - else { - -# endif - - for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++) { - if (category == categories[i]) { - locale = PerlEnv_getenv(category_names[i]); - goto found_locale; - } - } - - locale = PerlEnv_getenv("LANG"); - if (! locale) { - locale = ""; - } - - found_locale: ; - -# ifdef LC_ALL - - } - } - -# endif - + locale = find_locale_from_environment(get_category_index(category, "")); } #ifdef USE_WSETLOCALE - result = wrap_wsetlocale(category, locale); + char * result = wrap_wsetlocale(category, locale); #else - result = setlocale(category, locale); + char * result = setlocale(category, locale); #endif DEBUG_L(PerlIO_printf(Perl_debug_log, "%s\n", setlocale_debug_string_r(category, locale, result))); - - if (! override_LC_ALL) { - return result; - } - - /* Here the input category was LC_ALL, and we have set it to what is in the - * LANG variable or the system default if there is no LANG. But these have - * lower priority than the other LC_foo variables, so override it for each - * one that is set. (If they are set to "", it means to use the same thing - * we just set LC_ALL to, so can skip) */ - - for (i = 0; i < LC_ALL_INDEX_; i++) { - result = PerlEnv_getenv(category_names[i]); - if (result && strNE(result, "")) { -#ifdef USE_WSETLOCALE - wrap_wsetlocale(categories[i], result); -#else - setlocale(categories[i], result); -#endif - DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s\n", - setlocale_debug_string_i(i, result, "not captured"))); - } - } - - result = setlocale(LC_ALL, NULL); - DEBUG_L(PerlIO_printf(Perl_debug_log, "%s\n", - setlocale_debug_string_c(LC_ALL, NULL, result))); - return result; } diff --git a/proto.h b/proto.h index eee1e17be6b6..92975b2df462 100644 --- a/proto.h +++ b/proto.h @@ -4718,7 +4718,7 @@ PERL_CALLCONV Signal_t Perl_sighandler(int sig) #if !(defined(USE_POSIX_2008_LOCALE) && defined(USE_QUERYLOCALE)) # if defined(PERL_IN_LOCALE_C) # if defined(USE_LOCALE) -# if defined(USE_POSIX_2008_LOCALE) || ! defined(LC_ALL) +# if defined(WIN32) || defined(USE_POSIX_2008_LOCALE) || ! defined(LC_ALL) STATIC const char * S_calculate_LC_ALL(pTHX_ const char ** individ_locales); #define PERL_ARGS_ASSERT_CALCULATE_LC_ALL \ assert(individ_locales) @@ -5754,10 +5754,6 @@ STATIC const char * S_setlocale_from_aggregate_LC_ALL(pTHX_ const char * locale, STATIC locale_t S_use_curlocale_scratch(pTHX); #define PERL_ARGS_ASSERT_USE_CURLOCALE_SCRATCH # endif -# if defined(USE_POSIX_2008_LOCALE) && ! defined(USE_QUERYLOCALE) -STATIC const char * S_find_locale_from_environment(pTHX_ const unsigned int index); -#define PERL_ARGS_ASSERT_FIND_LOCALE_FROM_ENVIRONMENT -# endif # if defined(USE_POSIX_2008_LOCALE) && defined(USE_QUERYLOCALE) STATIC const char * S_calculate_LC_ALL(pTHX_ const locale_t cur_obj); #define PERL_ARGS_ASSERT_CALCULATE_LC_ALL @@ -5772,6 +5768,10 @@ STATIC char* S_win32_setlocale(pTHX_ int category, const char* locale); STATIC char * S_wrap_wsetlocale(pTHX_ const int category, const char *locale); #define PERL_ARGS_ASSERT_WRAP_WSETLOCALE # endif +# if defined(WIN32) || ( defined(USE_POSIX_2008_LOCALE) && ! defined(USE_QUERYLOCALE)) +STATIC const char * S_find_locale_from_environment(pTHX_ const unsigned int index); +#define PERL_ARGS_ASSERT_FIND_LOCALE_FROM_ENVIRONMENT +# endif # endif #endif #if defined(PERL_IN_LOCALE_C) || defined(PERL_IN_SV_C) || defined(PERL_IN_MATHOMS_C) From 84f750efb693e080b1d8ed526cd3792c854c8c17 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 6 Oct 2022 07:46:43 -0600 Subject: [PATCH 05/12] locale.c: Allow function to have side effects The previous commit changed find_locale_from_environment() to work on Windows, and took care to not make the function have side effects. But in the only use of this function so far (and likely forever), those side effects are fine. Changing to allow them simplifies things. --- locale.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/locale.c b/locale.c index 371c1ea4616c..ac6c11bd8802 100644 --- a/locale.c +++ b/locale.c @@ -1483,7 +1483,9 @@ S_stdize_locale(pTHX_ const int category, STATIC const char * S_find_locale_from_environment(pTHX_ const unsigned int index) { - /* On Windows systems, the concept of the POSIX ordering of environment + /* NB: This function may actually change the locale on Windows. + * + * On Windows systems, the concept of the POSIX ordering of environment * variables is missing. To increase portability of programs across * platforms, the POSIX ordering is emulated on Windows. * @@ -1514,12 +1516,7 @@ S_find_locale_from_environment(pTHX_ const unsigned int index) * The Windows and POSIX 2008 differ in that the ultimate fallback is "C" * in POSIX, and is the system default locale in Windows. To get that * system default value, we actually have to call setlocale() on Windows. - * Since this function doesn't actually change the locale, that means the - * locale must be saved and restored around the change. A critical section - * is used for this, but since Windows has long had per-thread locales, - * it's likely that will be a no-op. The function description could be - * changed to not guarantee that it is a read-only operation. - * */ + */ /* We rely on PerlEnv_getenv() returning a mortalized copy */ const char * const lc_all = PerlEnv_getenv("LC_ALL"); @@ -1550,12 +1547,7 @@ S_find_locale_from_environment(pTHX_ const unsigned int index) # ifndef WIN32 return "C"; # else - SETLOCALE_LOCK; - const char * orginal = setlocale(categories[index], NULL); - const char * ret = wrap_wsetlocale(categories[index], ""); - setlocale(categories[index], original); - SETLOCALE_UNLOCK; - return ret; + return wrap_wsetlocale(categories[index], ""); # endif } @@ -1589,11 +1581,7 @@ S_find_locale_from_environment(pTHX_ const unsigned int index) # ifndef WIN32 locale_names[i] = "C"; # else - SETLOCALE_LOCK; - const char * orginal = setlocale(categories[index], NULL); locale_names[i] = wrap_wsetlocale(categories[index], ""); - setlocale(categories[index], original); - SETLOCALE_UNLOCK; # endif } @@ -2606,6 +2594,8 @@ S_win32_setlocale(pTHX_ int category, const char* locale) if (locale && strEQ(locale, "")) { + /* Note this function may change the locale, but that's ok because we + * are about to change it anyway */ locale = find_locale_from_environment(get_category_index(category, "")); } From 1934c8db43705cbc3247a860a16d34b53d7af5ba Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 4 Oct 2022 20:32:38 -0600 Subject: [PATCH 06/12] locale.c: Windows special case NULL input first This gets the trivial case out of the way, and can use plain setlocale, as the locale string is non-existent, so doesn't need to handle different character sets. --- locale.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/locale.c b/locale.c index ac6c11bd8802..3c6d42eaacb9 100644 --- a/locale.c +++ b/locale.c @@ -2593,7 +2593,11 @@ S_win32_setlocale(pTHX_ int category, const char* locale) * variable. */ - if (locale && strEQ(locale, "")) { + if (locale == NULL) { + return wrap_wsetlocale(category, NULL); + } + + if (strEQ(locale, "")) { /* Note this function may change the locale, but that's ok because we * are about to change it anyway */ locale = find_locale_from_environment(get_category_index(category, "")); From b0bbaf60b9d691c75175066278c0767ad94a0d7e Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 4 Oct 2022 20:43:30 -0600 Subject: [PATCH 07/12] locale.c: Remove unused cpp alternatives The wide setlocale function in Windows has been in the field since 5.32, long enough, that we won't be forced to discontinue its use. So can remove the never-used overrides, cleaning it up slightly --- locale.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/locale.c b/locale.c index 3c6d42eaacb9..8c3e20b881cb 100644 --- a/locale.c +++ b/locale.c @@ -2538,10 +2538,6 @@ Perl_Win_wstring_to_utf8_string(const wchar_t * wstring) return utf8_string; } -#define USE_WSETLOCALE - -#ifdef USE_WSETLOCALE - STATIC char * S_wrap_wsetlocale(pTHX_ int category, const char *locale) { PERL_ARGS_ASSERT_WRAP_WSETLOCALE; @@ -2573,8 +2569,6 @@ S_wrap_wsetlocale(pTHX_ int category, const char *locale) { return result; } -#endif - STATIC char * S_win32_setlocale(pTHX_ int category, const char* locale) { @@ -2603,11 +2597,7 @@ S_win32_setlocale(pTHX_ int category, const char* locale) locale = find_locale_from_environment(get_category_index(category, "")); } -#ifdef USE_WSETLOCALE char * result = wrap_wsetlocale(category, locale); -#else - char * result = setlocale(category, locale); -#endif DEBUG_L(PerlIO_printf(Perl_debug_log, "%s\n", setlocale_debug_string_r(category, locale, result))); return result; From 32d7125da2317108fb7a98ba18c44a4243ea1c55 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 4 Oct 2022 04:49:47 -0600 Subject: [PATCH 08/12] locale.c: Make static 2 Win-only functions These are non-API, used in this file, and because of #ifdefs, not accessible outside it, so there is no current need to make them publicly available. If we were ever to need them to be accessible more widely, they would not belong in this file. --- embed.fnc | 4 ++-- embed.h | 10 ++-------- locale.c | 4 ++-- proto.h | 4 ++-- 4 files changed, 8 insertions(+), 14 deletions(-) diff --git a/embed.fnc b/embed.fnc index 4edb6a507c5d..923ded2ca240 100644 --- a/embed.fnc +++ b/embed.fnc @@ -3426,8 +3426,8 @@ S |void |print_collxfrm_input_and_return \ # endif # ifdef WIN32 S |char* |win32_setlocale|int category|NULLOK const char* locale -pTC |wchar_t *|Win_utf8_string_to_wstring|NULLOK const char * utf8_string -pTC |char * |Win_wstring_to_utf8_string|NULLOK const wchar_t * wstring +ST |wchar_t *|Win_utf8_string_to_wstring|NULLOK const char * utf8_string +ST |char * |Win_wstring_to_utf8_string|NULLOK const wchar_t * wstring S |char *|wrap_wsetlocale |const int category \ |NULLOK const char *locale # endif diff --git a/embed.h b/embed.h index 70be607047e2..525ce3532584 100644 --- a/embed.h +++ b/embed.h @@ -826,14 +826,6 @@ #define dump_mstats(a) Perl_dump_mstats(aTHX_ a) #define get_mstats(a,b,c) Perl_get_mstats(aTHX_ a,b,c) #endif -#if defined(PERL_IN_LOCALE_C) -# if defined(USE_LOCALE) -# if defined(WIN32) -#define Win_utf8_string_to_wstring Perl_Win_utf8_string_to_wstring -#define Win_wstring_to_utf8_string Perl_Win_wstring_to_utf8_string -# endif -# endif -#endif #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) #define check_regnode_after(a,b) Perl_check_regnode_after(aTHX_ a,b) #define regnext(a) Perl_regnext(aTHX_ a) @@ -1780,6 +1772,8 @@ #define calculate_LC_ALL(a) S_calculate_LC_ALL(aTHX_ a) # endif # if defined(WIN32) +#define Win_utf8_string_to_wstring S_Win_utf8_string_to_wstring +#define Win_wstring_to_utf8_string S_Win_wstring_to_utf8_string #define win32_setlocale(a,b) S_win32_setlocale(aTHX_ a,b) #define wrap_wsetlocale(a,b) S_wrap_wsetlocale(aTHX_ a,b) # endif diff --git a/locale.c b/locale.c index 8c3e20b881cb..9fefcc183f58 100644 --- a/locale.c +++ b/locale.c @@ -2495,7 +2495,7 @@ S_new_collate(pTHX_ const char *newcoll) #ifdef WIN32 wchar_t * -Perl_Win_utf8_string_to_wstring(const char * utf8_string) +S_Win_utf8_string_to_wstring(const char * utf8_string) { wchar_t *wstring; @@ -2518,7 +2518,7 @@ Perl_Win_utf8_string_to_wstring(const char * utf8_string) } char * -Perl_Win_wstring_to_utf8_string(const wchar_t * wstring) +S_Win_wstring_to_utf8_string(const wchar_t * wstring) { char *utf8_string; diff --git a/proto.h b/proto.h index 92975b2df462..f5ca738992e4 100644 --- a/proto.h +++ b/proto.h @@ -5759,9 +5759,9 @@ STATIC const char * S_calculate_LC_ALL(pTHX_ const locale_t cur_obj); #define PERL_ARGS_ASSERT_CALCULATE_LC_ALL # endif # if defined(WIN32) -PERL_CALLCONV wchar_t * Perl_Win_utf8_string_to_wstring(const char * utf8_string); +STATIC wchar_t * S_Win_utf8_string_to_wstring(const char * utf8_string); #define PERL_ARGS_ASSERT_WIN_UTF8_STRING_TO_WSTRING -PERL_CALLCONV char * Perl_Win_wstring_to_utf8_string(const wchar_t * wstring); +STATIC char * S_Win_wstring_to_utf8_string(const wchar_t * wstring); #define PERL_ARGS_ASSERT_WIN_WSTRING_TO_UTF8_STRING STATIC char* S_win32_setlocale(pTHX_ int category, const char* locale); #define PERL_ARGS_ASSERT_WIN32_SETLOCALE From 554ee972b3c1c409a3afb29df8dd7dd3278423da Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 4 Oct 2022 05:04:32 -0600 Subject: [PATCH 09/12] locale.c: Generalize static functions This changes these functions to take the code page as input, instead of being just UTF-8. Macros are created to call them with UTF-8. I'm doing this because there is no loss of efficiency, and it is somewhat jarring, given Perl terminology, to call a function with 'Byte' in the name with a parameter with 'utf8' in the name. --- embed.fnc | 6 ++++-- embed.h | 4 ++-- locale.c | 24 ++++++++++++++---------- proto.h | 8 ++++---- 4 files changed, 24 insertions(+), 18 deletions(-) diff --git a/embed.fnc b/embed.fnc index 923ded2ca240..5a42e111a889 100644 --- a/embed.fnc +++ b/embed.fnc @@ -3426,8 +3426,10 @@ S |void |print_collxfrm_input_and_return \ # endif # ifdef WIN32 S |char* |win32_setlocale|int category|NULLOK const char* locale -ST |wchar_t *|Win_utf8_string_to_wstring|NULLOK const char * utf8_string -ST |char * |Win_wstring_to_utf8_string|NULLOK const wchar_t * wstring +ST |wchar_t *|Win_byte_string_to_wstring|const UINT code_page \ + |NULLOK const char * byte_string +ST |char * |Win_wstring_to_byte_string|const UINT code_page \ + |NULLOK const wchar_t * wstring S |char *|wrap_wsetlocale |const int category \ |NULLOK const char *locale # endif diff --git a/embed.h b/embed.h index 525ce3532584..b5531e0f0d47 100644 --- a/embed.h +++ b/embed.h @@ -1772,8 +1772,8 @@ #define calculate_LC_ALL(a) S_calculate_LC_ALL(aTHX_ a) # endif # if defined(WIN32) -#define Win_utf8_string_to_wstring S_Win_utf8_string_to_wstring -#define Win_wstring_to_utf8_string S_Win_wstring_to_utf8_string +#define Win_byte_string_to_wstring S_Win_byte_string_to_wstring +#define Win_wstring_to_byte_string S_Win_wstring_to_byte_string #define win32_setlocale(a,b) S_win32_setlocale(aTHX_ a,b) #define wrap_wsetlocale(a,b) S_wrap_wsetlocale(aTHX_ a,b) # endif diff --git a/locale.c b/locale.c index 9fefcc183f58..b2f307706349 100644 --- a/locale.c +++ b/locale.c @@ -2495,11 +2495,11 @@ S_new_collate(pTHX_ const char *newcoll) #ifdef WIN32 wchar_t * -S_Win_utf8_string_to_wstring(const char * utf8_string) +S_Win_byte_string_to_wstring(const UINT code_page, const char * byte_string) { wchar_t *wstring; - int req_size = MultiByteToWideChar(CP_UTF8, 0, utf8_string, -1, NULL, 0); + int req_size = MultiByteToWideChar(code_page, 0, byte_string, -1, NULL, 0); if (! req_size) { errno = EINVAL; return NULL; @@ -2507,7 +2507,7 @@ S_Win_utf8_string_to_wstring(const char * utf8_string) Newx(wstring, req_size, wchar_t); - if (! MultiByteToWideChar(CP_UTF8, 0, utf8_string, -1, wstring, req_size)) + if (! MultiByteToWideChar(code_page, 0, byte_string, -1, wstring, req_size)) { Safefree(wstring); errno = EINVAL; @@ -2517,27 +2517,31 @@ S_Win_utf8_string_to_wstring(const char * utf8_string) return wstring; } +#define Win_utf8_string_to_wstring(s) Win_byte_string_to_wstring(CP_UTF8, (s)) + char * -S_Win_wstring_to_utf8_string(const wchar_t * wstring) +S_Win_wstring_to_byte_string(const UINT code_page, const wchar_t * wstring) { - char *utf8_string; int req_size = - WideCharToMultiByte(CP_UTF8, 0, wstring, -1, NULL, 0, NULL, NULL); + WideCharToMultiByte(code_page, 0, wstring, -1, NULL, 0, NULL, NULL); - Newx(utf8_string, req_size, char); + char *byte_string; + Newx(byte_string, req_size, char); - if (! WideCharToMultiByte(CP_UTF8, 0, wstring, -1, utf8_string, + if (! WideCharToMultiByte(code_page, 0, wstring, -1, byte_string, req_size, NULL, NULL)) { - Safefree(utf8_string); + Safefree(byte_string); errno = EINVAL; return NULL; } - return utf8_string; + return byte_string; } +#define Win_wstring_to_utf8_string(ws) Win_wstring_to_byte_string(CP_UTF8, (ws)) + STATIC char * S_wrap_wsetlocale(pTHX_ int category, const char *locale) { PERL_ARGS_ASSERT_WRAP_WSETLOCALE; diff --git a/proto.h b/proto.h index f5ca738992e4..ae21a902191c 100644 --- a/proto.h +++ b/proto.h @@ -5759,10 +5759,10 @@ STATIC const char * S_calculate_LC_ALL(pTHX_ const locale_t cur_obj); #define PERL_ARGS_ASSERT_CALCULATE_LC_ALL # endif # if defined(WIN32) -STATIC wchar_t * S_Win_utf8_string_to_wstring(const char * utf8_string); -#define PERL_ARGS_ASSERT_WIN_UTF8_STRING_TO_WSTRING -STATIC char * S_Win_wstring_to_utf8_string(const wchar_t * wstring); -#define PERL_ARGS_ASSERT_WIN_WSTRING_TO_UTF8_STRING +STATIC wchar_t * S_Win_byte_string_to_wstring(const UINT code_page, const char * byte_string); +#define PERL_ARGS_ASSERT_WIN_BYTE_STRING_TO_WSTRING +STATIC char * S_Win_wstring_to_byte_string(const UINT code_page, const wchar_t * wstring); +#define PERL_ARGS_ASSERT_WIN_WSTRING_TO_BYTE_STRING STATIC char* S_win32_setlocale(pTHX_ int category, const char* locale); #define PERL_ARGS_ASSERT_WIN32_SETLOCALE STATIC char * S_wrap_wsetlocale(pTHX_ const int category, const char *locale); From 002f13c8f11c2d55dd76a1647290b26df33b3cb9 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 4 Oct 2022 10:58:16 -0600 Subject: [PATCH 10/12] Add some const to wrap_wsetlocale And move declarations closer to first use as allowed in C99 --- embed.fnc | 3 ++- locale.c | 17 ++++++++--------- proto.h | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/embed.fnc b/embed.fnc index 5a42e111a889..fa3bfb34c66d 100644 --- a/embed.fnc +++ b/embed.fnc @@ -3430,7 +3430,8 @@ ST |wchar_t *|Win_byte_string_to_wstring|const UINT code_page \ |NULLOK const char * byte_string ST |char * |Win_wstring_to_byte_string|const UINT code_page \ |NULLOK const wchar_t * wstring -S |char *|wrap_wsetlocale |const int category \ +S |const char *|wrap_wsetlocale \ + |const int category \ |NULLOK const char *locale # endif # if defined(HAS_NL_LANGINFO) || defined(HAS_NL_LANGINFO_L) diff --git a/locale.c b/locale.c index b2f307706349..c5ae1f5daa6a 100644 --- a/locale.c +++ b/locale.c @@ -2542,13 +2542,12 @@ S_Win_wstring_to_byte_string(const UINT code_page, const wchar_t * wstring) #define Win_wstring_to_utf8_string(ws) Win_wstring_to_byte_string(CP_UTF8, (ws)) -STATIC char * -S_wrap_wsetlocale(pTHX_ int category, const char *locale) { +STATIC const char * +S_wrap_wsetlocale(pTHX_ const int category, const char *locale) +{ PERL_ARGS_ASSERT_WRAP_WSETLOCALE; - wchar_t *wlocale = NULL; - wchar_t *wresult; - char *result; + const wchar_t * wlocale = NULL; if (locale) { wlocale = Win_utf8_string_to_wstring(locale); @@ -2560,14 +2559,14 @@ S_wrap_wsetlocale(pTHX_ int category, const char *locale) { wlocale = NULL; } - wresult = _wsetlocale(category, wlocale); + const wchar_t * wresult = _wsetlocale(category, wlocale); Safefree(wlocale); if (! wresult) { return NULL; } - result = Win_wstring_to_utf8_string(wresult); + const char * result = Win_wstring_to_utf8_string(wresult); SAVEFREEPV(result); /* is there something better we can do here? */ return result; @@ -2601,10 +2600,10 @@ S_win32_setlocale(pTHX_ int category, const char* locale) locale = find_locale_from_environment(get_category_index(category, "")); } - char * result = wrap_wsetlocale(category, locale); + const char * result = wrap_wsetlocale(category, locale); DEBUG_L(PerlIO_printf(Perl_debug_log, "%s\n", setlocale_debug_string_r(category, locale, result))); - return result; + return (char *) result; } #endif diff --git a/proto.h b/proto.h index ae21a902191c..30e935549ab0 100644 --- a/proto.h +++ b/proto.h @@ -5765,7 +5765,7 @@ STATIC char * S_Win_wstring_to_byte_string(const UINT code_page, const wchar_t * #define PERL_ARGS_ASSERT_WIN_WSTRING_TO_BYTE_STRING STATIC char* S_win32_setlocale(pTHX_ int category, const char* locale); #define PERL_ARGS_ASSERT_WIN32_SETLOCALE -STATIC char * S_wrap_wsetlocale(pTHX_ const int category, const char *locale); +STATIC const char * S_wrap_wsetlocale(pTHX_ const int category, const char *locale); #define PERL_ARGS_ASSERT_WRAP_WSETLOCALE # endif # if defined(WIN32) || ( defined(USE_POSIX_2008_LOCALE) && ! defined(USE_QUERYLOCALE)) From 55bed416d812a9cb63d8cda9335d449bb56b33b7 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 6 Oct 2022 09:12:59 -0600 Subject: [PATCH 11/12] locale.c: Make win32_setlocale return const * Add a bit of safety, and makes it correspond to the other setlocale returns we use. --- embed.fnc | 2 +- locale.c | 4 ++-- proto.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/embed.fnc b/embed.fnc index fa3bfb34c66d..7045868c7d92 100644 --- a/embed.fnc +++ b/embed.fnc @@ -3425,7 +3425,7 @@ S |void |print_collxfrm_input_and_return \ # endif # endif # ifdef WIN32 -S |char* |win32_setlocale|int category|NULLOK const char* locale +S |const char*|win32_setlocale|int category|NULLOK const char* locale ST |wchar_t *|Win_byte_string_to_wstring|const UINT code_page \ |NULLOK const char * byte_string ST |char * |Win_wstring_to_byte_string|const UINT code_page \ diff --git a/locale.c b/locale.c index c5ae1f5daa6a..55936d627708 100644 --- a/locale.c +++ b/locale.c @@ -2572,7 +2572,7 @@ S_wrap_wsetlocale(pTHX_ const int category, const char *locale) return result; } -STATIC char * +STATIC const char * S_win32_setlocale(pTHX_ int category, const char* locale) { /* This, for Windows, emulates POSIX setlocale() behavior. There is no @@ -2603,7 +2603,7 @@ S_win32_setlocale(pTHX_ int category, const char* locale) const char * result = wrap_wsetlocale(category, locale); DEBUG_L(PerlIO_printf(Perl_debug_log, "%s\n", setlocale_debug_string_r(category, locale, result))); - return (char *) result; + return result; } #endif diff --git a/proto.h b/proto.h index 30e935549ab0..9e51fcf6c2dc 100644 --- a/proto.h +++ b/proto.h @@ -5763,7 +5763,7 @@ STATIC wchar_t * S_Win_byte_string_to_wstring(const UINT code_page, const char * #define PERL_ARGS_ASSERT_WIN_BYTE_STRING_TO_WSTRING STATIC char * S_Win_wstring_to_byte_string(const UINT code_page, const wchar_t * wstring); #define PERL_ARGS_ASSERT_WIN_WSTRING_TO_BYTE_STRING -STATIC char* S_win32_setlocale(pTHX_ int category, const char* locale); +STATIC const char* S_win32_setlocale(pTHX_ int category, const char* locale); #define PERL_ARGS_ASSERT_WIN32_SETLOCALE STATIC const char * S_wrap_wsetlocale(pTHX_ const int category, const char *locale); #define PERL_ARGS_ASSERT_WRAP_WSETLOCALE From fee390e4a1f84043e5b168557cd2ea606367e1c7 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 4 Oct 2022 20:39:06 -0600 Subject: [PATCH 12/12] locale.c: Add comments/white space; slight tidying C99 allows declarations to be closer to their first use. This also removes a redundant conditional that would set a variable to what it already was initialized to. --- locale.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/locale.c b/locale.c index 55936d627708..c0d2185d3faf 100644 --- a/locale.c +++ b/locale.c @@ -2497,7 +2497,7 @@ S_new_collate(pTHX_ const char *newcoll) wchar_t * S_Win_byte_string_to_wstring(const UINT code_page, const char * byte_string) { - wchar_t *wstring; + /* Caller must arrange to free the returned string */ int req_size = MultiByteToWideChar(code_page, 0, byte_string, -1, NULL, 0); if (! req_size) { @@ -2505,6 +2505,7 @@ S_Win_byte_string_to_wstring(const UINT code_page, const char * byte_string) return NULL; } + wchar_t *wstring; Newx(wstring, req_size, wchar_t); if (! MultiByteToWideChar(code_page, 0, byte_string, -1, wstring, req_size)) @@ -2522,6 +2523,7 @@ S_Win_byte_string_to_wstring(const UINT code_page, const char * byte_string) char * S_Win_wstring_to_byte_string(const UINT code_page, const wchar_t * wstring) { + /* Caller must arrange to free the returned string */ int req_size = WideCharToMultiByte(code_page, 0, wstring, -1, NULL, 0, NULL, NULL); @@ -2547,6 +2549,10 @@ S_wrap_wsetlocale(pTHX_ const int category, const char *locale) { PERL_ARGS_ASSERT_WRAP_WSETLOCALE; + /* Calls _wsetlocale(), converting the parameters/return to/from + * Perl-expected forms as if plain setlocale() were being called instead. + */ + const wchar_t * wlocale = NULL; if (locale) { @@ -2555,16 +2561,13 @@ S_wrap_wsetlocale(pTHX_ const int category, const char *locale) return NULL; } } - else { - wlocale = NULL; - } const wchar_t * wresult = _wsetlocale(category, wlocale); Safefree(wlocale); if (! wresult) { - return NULL; - } + return NULL; + } const char * result = Win_wstring_to_utf8_string(wresult); SAVEFREEPV(result); /* is there something better we can do here? */