Skip to content

Commit 4e7e02d

Browse files
committed
ICU-20575 fix broken default locale mapping for C.UTF-8
- Correct regression in 1afef30 / ICU-20187 - PR#418 incorrectly dropped the mapping from C to en-us-POSIX without handling cases where a codepage was set (such as C.UTF-8). - The `strcmp("C", …)` in uprv_getPOSIXIDForCategory() was intended to detect specific platform behavior, it is not a general mapping. - Add an additional test in uprv_getDefaultLocaleID() for locales that end up as "C" or "POSIX" after removing codepage suffix - also fix regression where aa@bb would become aa__BB__BB
1 parent 711e7e0 commit 4e7e02d

File tree

1 file changed

+17
-6
lines changed

1 file changed

+17
-6
lines changed

icu4c/source/common/putil.cpp

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1560,6 +1560,10 @@ static const char *uprv_getPOSIXIDForCategory(int category)
15601560
{
15611561
/* Nothing worked. Give it a nice POSIX default value. */
15621562
posixID = "en_US_POSIX";
1563+
// Note: this test will not catch 'C.UTF-8',
1564+
// that will be handled in uprv_getDefaultLocaleID().
1565+
// Leave this mapping here for the uprv_getPOSIXIDForDefaultCodepage()
1566+
// callers.
15631567
}
15641568
return posixID;
15651569
}
@@ -1631,8 +1635,8 @@ The leftmost codepage (.xxx) wins.
16311635
}
16321636

16331637
// Copy the ID into owned memory.
1634-
// Over-allocate in case we replace "@" with "__".
1635-
char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 1 + 1));
1638+
// Over-allocate in case we replace "@" with "__" or "C" with "en_US_POSIX"
1639+
char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 11 + 1));
16361640
if (correctedPOSIXLocale == nullptr) {
16371641
return nullptr;
16381642
}
@@ -1641,11 +1645,18 @@ The leftmost codepage (.xxx) wins.
16411645
char *limit;
16421646
if ((limit = uprv_strchr(correctedPOSIXLocale, '.')) != nullptr) {
16431647
*limit = 0;
1644-
if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) {
1645-
*limit = 0;
1646-
}
1648+
}
1649+
if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) {
1650+
*limit = 0;
16471651
}
16481652

1653+
if ((uprv_strcmp("C", correctedPOSIXLocale) == 0) // no @ variant
1654+
|| (uprv_strcmp("POSIX", correctedPOSIXLocale) == 0)) {
1655+
// Raw input was C.* or POSIX.*, Give it a nice POSIX default value.
1656+
// (The "C"/"POSIX" case is handled in uprv_getPOSIXIDForCategory())
1657+
uprv_strcpy(correctedPOSIXLocale, "en_US_POSIX");
1658+
}
1659+
16491660
/* Note that we scan the *uncorrected* ID. */
16501661
const char *p;
16511662
if ((p = uprv_strrchr(posixID, '@')) != nullptr) {
@@ -1668,7 +1679,7 @@ The leftmost codepage (.xxx) wins.
16681679
if ((q = uprv_strchr(p, '.')) != nullptr) {
16691680
/* How big will the resulting string be? */
16701681
int32_t len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1671-
uprv_strncat(correctedPOSIXLocale, p, q-p);
1682+
uprv_strncat(correctedPOSIXLocale, p, q-p); // do not include charset
16721683
correctedPOSIXLocale[len] = 0;
16731684
}
16741685
else {

0 commit comments

Comments
 (0)