Skip to content

Commit cedc9b7

Browse files
bpo-20087: Update locale alias mapping with glibc 2.27 supported locales. (ПР-6708)
1 parent b2043bb commit cedc9b7

File tree

3 files changed

+55
-5
lines changed

3 files changed

+55
-5
lines changed

Lib/locale.py

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -846,6 +846,16 @@ def getpreferredencoding(do_setlocale = True):
846846
#
847847
# SS 2014-10-01:
848848
# Updated alias mapping with glibc 2.19 supported locales.
849+
#
850+
# SS 2018-05-05:
851+
# Updated alias mapping with glibc 2.27 supported locales.
852+
#
853+
# These are the differences compared to the old mapping (Python 3.6.5
854+
# and older):
855+
#
856+
# updated 'ca_es@valencia' -> 'ca_ES.ISO8859-15@valencia' to 'ca_ES.UTF-8@valencia'
857+
# updated 'kk_kz' -> 'kk_KZ.RK1048' to 'kk_KZ.ptcp154'
858+
# updated 'russian' -> 'ru_RU.ISO8859-5' to 'ru_RU.KOI8-R'
849859

850860
locale_alias = {
851861
'a3': 'az_AZ.KOI8-C',
@@ -856,10 +866,13 @@ def getpreferredencoding(do_setlocale = True):
856866
'aa_et': 'aa_ET.UTF-8',
857867
'af': 'af_ZA.ISO8859-1',
858868
'af_za': 'af_ZA.ISO8859-1',
869+
'agr_pe': 'agr_PE.UTF-8',
870+
'ak_gh': 'ak_GH.UTF-8',
859871
'am': 'am_ET.UTF-8',
860872
'am_et': 'am_ET.UTF-8',
861873
'american': 'en_US.ISO8859-1',
862874
'an_es': 'an_ES.ISO8859-15',
875+
'anp_in': 'anp_IN.UTF-8',
863876
'ar': 'ar_AA.ISO8859-6',
864877
'ar_aa': 'ar_AA.ISO8859-6',
865878
'ar_ae': 'ar_AE.ISO8859-6',
@@ -877,6 +890,7 @@ def getpreferredencoding(do_setlocale = True):
877890
'ar_qa': 'ar_QA.ISO8859-6',
878891
'ar_sa': 'ar_SA.ISO8859-6',
879892
'ar_sd': 'ar_SD.ISO8859-6',
893+
'ar_ss': 'ar_SS.UTF-8',
880894
'ar_sy': 'ar_SY.ISO8859-6',
881895
'ar_tn': 'ar_TN.ISO8859-6',
882896
'ar_ye': 'ar_YE.ISO8859-6',
@@ -888,6 +902,7 @@ def getpreferredencoding(do_setlocale = True):
888902
'az': 'az_AZ.ISO8859-9E',
889903
'az_az': 'az_AZ.ISO8859-9E',
890904
'az_az.iso88599e': 'az_AZ.ISO8859-9E',
905+
'az_ir': 'az_IR.UTF-8',
891906
'be': 'be_BY.CP1251',
892907
'be@latin': 'be_BY.UTF-8@latin',
893908
'be_bg.utf8': 'bg_BG.UTF-8',
@@ -898,13 +913,17 @@ def getpreferredencoding(do_setlocale = True):
898913
'ber_ma': 'ber_MA.UTF-8',
899914
'bg': 'bg_BG.CP1251',
900915
'bg_bg': 'bg_BG.CP1251',
916+
'bhb_in.utf8': 'bhb_IN.UTF-8',
901917
'bho_in': 'bho_IN.UTF-8',
918+
'bho_np': 'bho_NP.UTF-8',
919+
'bi_vu': 'bi_VU.UTF-8',
902920
'bn_bd': 'bn_BD.UTF-8',
903921
'bn_in': 'bn_IN.UTF-8',
904922
'bo_cn': 'bo_CN.UTF-8',
905923
'bo_in': 'bo_IN.UTF-8',
906924
'bokmal': 'nb_NO.ISO8859-1',
907925
'bokm\xe5l': 'nb_NO.ISO8859-1',
926+
'bokm\xef\xbf\xbd': 'nb_NO.ISO8859-1',
908927
'br': 'br_FR.ISO8859-1',
909928
'br_fr': 'br_FR.ISO8859-1',
910929
'brx_in': 'brx_IN.UTF-8',
@@ -923,13 +942,17 @@ def getpreferredencoding(do_setlocale = True):
923942
'ca': 'ca_ES.ISO8859-1',
924943
'ca_ad': 'ca_AD.ISO8859-1',
925944
'ca_es': 'ca_ES.ISO8859-1',
926-
'ca_es@valencia': 'ca_ES.ISO8859-15@valencia',
945+
'ca_es@valencia': 'ca_ES.UTF-8@valencia',
927946
'ca_fr': 'ca_FR.ISO8859-1',
928947
'ca_it': 'ca_IT.ISO8859-1',
929948
'catalan': 'ca_ES.ISO8859-1',
949+
'ce_ru': 'ce_RU.UTF-8',
930950
'cextend': 'en_US.ISO8859-1',
931951
'chinese-s': 'zh_CN.eucCN',
932952
'chinese-t': 'zh_TW.eucTW',
953+
'chr_us': 'chr_US.UTF-8',
954+
'ckb_iq': 'ckb_IQ.UTF-8',
955+
'cmn_tw': 'cmn_TW.UTF-8',
933956
'crh_ua': 'crh_UA.UTF-8',
934957
'croatian': 'hr_HR.ISO8859-2',
935958
'cs': 'cs_CZ.ISO8859-2',
@@ -951,6 +974,7 @@ def getpreferredencoding(do_setlocale = True):
951974
'de_be': 'de_BE.ISO8859-1',
952975
'de_ch': 'de_CH.ISO8859-1',
953976
'de_de': 'de_DE.ISO8859-1',
977+
'de_it': 'de_IT.ISO8859-1',
954978
'de_li.utf8': 'de_LI.UTF-8',
955979
'de_lu': 'de_LU.ISO8859-1',
956980
'deutsch': 'de_DE.ISO8859-1',
@@ -977,10 +1001,12 @@ def getpreferredencoding(do_setlocale = True):
9771001
'en_gb': 'en_GB.ISO8859-1',
9781002
'en_hk': 'en_HK.ISO8859-1',
9791003
'en_ie': 'en_IE.ISO8859-1',
1004+
'en_il': 'en_IL.UTF-8',
9801005
'en_in': 'en_IN.ISO8859-1',
9811006
'en_ng': 'en_NG.UTF-8',
9821007
'en_nz': 'en_NZ.ISO8859-1',
9831008
'en_ph': 'en_PH.ISO8859-1',
1009+
'en_sc.utf8': 'en_SC.UTF-8',
9841010
'en_sg': 'en_SG.ISO8859-1',
9851011
'en_uk': 'en_GB.ISO8859-1',
9861012
'en_us': 'en_US.ISO8859-1',
@@ -991,6 +1017,7 @@ def getpreferredencoding(do_setlocale = True):
9911017
'en_zw.utf8': 'en_ZS.UTF-8',
9921018
'eng_gb': 'en_GB.ISO8859-1',
9931019
'english': 'en_EN.ISO8859-1',
1020+
'english.iso88591': 'en_US.ISO8859-1',
9941021
'english_uk': 'en_GB.ISO8859-1',
9951022
'english_united-states': 'en_US.ISO8859-1',
9961023
'english_united-states.437': 'C',
@@ -1045,6 +1072,7 @@ def getpreferredencoding(do_setlocale = True):
10451072
'fr_fr': 'fr_FR.ISO8859-1',
10461073
'fr_lu': 'fr_LU.ISO8859-1',
10471074
'fran\xe7ais': 'fr_FR.ISO8859-1',
1075+
'fran\xef\xbf\xbdis': 'fr_FR.ISO8859-1',
10481076
'fre_fr': 'fr_FR.ISO8859-1',
10491077
'french': 'fr_FR.ISO8859-1',
10501078
'french.iso88591': 'fr_CH.ISO8859-1',
@@ -1071,12 +1099,14 @@ def getpreferredencoding(do_setlocale = True):
10711099
'gv': 'gv_GB.ISO8859-1',
10721100
'gv_gb': 'gv_GB.ISO8859-1',
10731101
'ha_ng': 'ha_NG.UTF-8',
1102+
'hak_tw': 'hak_TW.UTF-8',
10741103
'he': 'he_IL.ISO8859-8',
10751104
'he_il': 'he_IL.ISO8859-8',
10761105
'hebrew': 'he_IL.ISO8859-8',
10771106
'hi': 'hi_IN.ISCII-DEV',
10781107
'hi_in': 'hi_IN.ISCII-DEV',
10791108
'hi_in.isciidev': 'hi_IN.ISCII-DEV',
1109+
'hif_fj': 'hif_FJ.UTF-8',
10801110
'hne': 'hne_IN.UTF-8',
10811111
'hne_in': 'hne_IN.UTF-8',
10821112
'hr': 'hr_HR.ISO8859-2',
@@ -1131,7 +1161,8 @@ def getpreferredencoding(do_setlocale = True):
11311161
'ka_ge.georgianacademy': 'ka_GE.GEORGIAN-ACADEMY',
11321162
'ka_ge.georgianps': 'ka_GE.GEORGIAN-PS',
11331163
'ka_ge.georgianrs': 'ka_GE.GEORGIAN-ACADEMY',
1134-
'kk_kz': 'kk_KZ.RK1048',
1164+
'kab_dz': 'kab_DZ.UTF-8',
1165+
'kk_kz': 'kk_KZ.ptcp154',
11351166
'kl': 'kl_GL.ISO8859-1',
11361167
'kl_gl': 'kl_GL.ISO8859-1',
11371168
'km_kh': 'km_KH.UTF-8',
@@ -1157,6 +1188,7 @@ def getpreferredencoding(do_setlocale = True):
11571188
'li_nl': 'li_NL.UTF-8',
11581189
'lij_it': 'lij_IT.UTF-8',
11591190
'lithuanian': 'lt_LT.ISO8859-13',
1191+
'ln_cd': 'ln_CD.UTF-8',
11601192
'lo': 'lo_LA.MULELAO-1',
11611193
'lo_la': 'lo_LA.MULELAO-1',
11621194
'lo_la.cp1133': 'lo_LA.IBM-CP1133',
@@ -1166,13 +1198,18 @@ def getpreferredencoding(do_setlocale = True):
11661198
'lt_lt': 'lt_LT.ISO8859-13',
11671199
'lv': 'lv_LV.ISO8859-13',
11681200
'lv_lv': 'lv_LV.ISO8859-13',
1201+
'lzh_tw': 'lzh_TW.UTF-8',
11691202
'mag_in': 'mag_IN.UTF-8',
11701203
'mai': 'mai_IN.UTF-8',
11711204
'mai_in': 'mai_IN.UTF-8',
1205+
'mai_np': 'mai_NP.UTF-8',
1206+
'mfe_mu': 'mfe_MU.UTF-8',
11721207
'mg_mg': 'mg_MG.ISO8859-15',
11731208
'mhr_ru': 'mhr_RU.UTF-8',
11741209
'mi': 'mi_NZ.ISO8859-1',
11751210
'mi_nz': 'mi_NZ.ISO8859-1',
1211+
'miq_ni': 'miq_NI.UTF-8',
1212+
'mjw_in': 'mjw_IN.UTF-8',
11761213
'mk': 'mk_MK.ISO8859-5',
11771214
'mk_mk': 'mk_MK.ISO8859-5',
11781215
'ml': 'ml_IN.UTF-8',
@@ -1186,7 +1223,7 @@ def getpreferredencoding(do_setlocale = True):
11861223
'mt': 'mt_MT.ISO8859-3',
11871224
'mt_mt': 'mt_MT.ISO8859-3',
11881225
'my_mm': 'my_MM.UTF-8',
1189-
'nan_tw@latin': 'nan_TW.UTF-8@latin',
1226+
'nan_tw': 'nan_TW.UTF-8',
11901227
'nb': 'nb_NO.ISO8859-1',
11911228
'nb_no': 'nb_NO.ISO8859-1',
11921229
'nds_de': 'nds_DE.UTF-8',
@@ -1225,6 +1262,8 @@ def getpreferredencoding(do_setlocale = True):
12251262
'pa_in': 'pa_IN.UTF-8',
12261263
'pa_pk': 'pa_PK.UTF-8',
12271264
'pap_an': 'pap_AN.UTF-8',
1265+
'pap_aw': 'pap_AW.UTF-8',
1266+
'pap_cw': 'pap_CW.UTF-8',
12281267
'pd': 'pd_US.ISO8859-1',
12291268
'pd_de': 'pd_DE.ISO8859-1',
12301269
'pd_us': 'pd_US.ISO8859-1',
@@ -1243,14 +1282,16 @@ def getpreferredencoding(do_setlocale = True):
12431282
'pt': 'pt_PT.ISO8859-1',
12441283
'pt_br': 'pt_BR.ISO8859-1',
12451284
'pt_pt': 'pt_PT.ISO8859-1',
1285+
'quz_pe': 'quz_PE.UTF-8',
1286+
'raj_in': 'raj_IN.UTF-8',
12461287
'ro': 'ro_RO.ISO8859-2',
12471288
'ro_ro': 'ro_RO.ISO8859-2',
12481289
'romanian': 'ro_RO.ISO8859-2',
12491290
'ru': 'ru_RU.UTF-8',
12501291
'ru_ru': 'ru_RU.UTF-8',
12511292
'ru_ua': 'ru_UA.KOI8-U',
12521293
'rumanian': 'ro_RO.ISO8859-2',
1253-
'russian': 'ru_RU.ISO8859-5',
1294+
'russian': 'ru_RU.KOI8-R',
12541295
'rw': 'rw_RW.ISO8859-1',
12551296
'rw_rw': 'rw_RW.ISO8859-1',
12561297
'sa_in': 'sa_IN.UTF-8',
@@ -1262,12 +1303,14 @@ def getpreferredencoding(do_setlocale = True):
12621303
'sd_pk': 'sd_PK.UTF-8',
12631304
'se_no': 'se_NO.UTF-8',
12641305
'serbocroatian': 'sr_RS.UTF-8@latin',
1306+
'sgs_lt': 'sgs_LT.UTF-8',
12651307
'sh': 'sr_RS.UTF-8@latin',
12661308
'sh_ba.iso88592@bosnia': 'sr_CS.ISO8859-2',
12671309
'sh_hr': 'sh_HR.ISO8859-2',
12681310
'sh_hr.iso88592': 'hr_HR.ISO8859-2',
12691311
'sh_sp': 'sr_CS.ISO8859-2',
12701312
'sh_yu': 'sr_RS.UTF-8@latin',
1313+
'shn_mm': 'shn_MM.UTF-8',
12711314
'shs_ca': 'shs_CA.UTF-8',
12721315
'si': 'si_LK.UTF-8',
12731316
'si_lk': 'si_LK.UTF-8',
@@ -1281,6 +1324,7 @@ def getpreferredencoding(do_setlocale = True):
12811324
'slovak': 'sk_SK.ISO8859-2',
12821325
'slovene': 'sl_SI.ISO8859-2',
12831326
'slovenian': 'sl_SI.ISO8859-2',
1327+
'sm_ws': 'sm_WS.UTF-8',
12841328
'so_dj': 'so_DJ.ISO8859-1',
12851329
'so_et': 'so_ET.UTF-8',
12861330
'so_ke': 'so_KE.ISO8859-1',
@@ -1327,6 +1371,7 @@ def getpreferredencoding(do_setlocale = True):
13271371
'ta_in.tscii': 'ta_IN.TSCII-0',
13281372
'ta_in.tscii0': 'ta_IN.TSCII-0',
13291373
'ta_lk': 'ta_LK.UTF-8',
1374+
'tcy_in.utf8': 'tcy_IN.UTF-8',
13301375
'te': 'te_IN.UTF-8',
13311376
'te_in': 'te_IN.UTF-8',
13321377
'tg': 'tg_TJ.KOI8-C',
@@ -1336,6 +1381,7 @@ def getpreferredencoding(do_setlocale = True):
13361381
'th_th.tactis': 'th_TH.TIS620',
13371382
'th_th.tis620': 'th_TH.TIS620',
13381383
'thai': 'th_TH.ISO8859-11',
1384+
'the_np': 'the_NP.UTF-8',
13391385
'ti_er': 'ti_ER.UTF-8',
13401386
'ti_et': 'ti_ET.UTF-8',
13411387
'tig_er': 'tig_ER.UTF-8',
@@ -1344,6 +1390,8 @@ def getpreferredencoding(do_setlocale = True):
13441390
'tl_ph': 'tl_PH.ISO8859-1',
13451391
'tn': 'tn_ZA.ISO8859-15',
13461392
'tn_za': 'tn_ZA.ISO8859-15',
1393+
'to_to': 'to_TO.UTF-8',
1394+
'tpi_pg': 'tpi_PG.UTF-8',
13471395
'tr': 'tr_TR.ISO8859-9',
13481396
'tr_cy': 'tr_CY.ISO8859-9',
13491397
'tr_tr': 'tr_TR.ISO8859-9',
@@ -1386,6 +1434,7 @@ def getpreferredencoding(do_setlocale = True):
13861434
'yi_us': 'yi_US.CP1255',
13871435
'yo_ng': 'yo_NG.UTF-8',
13881436
'yue_hk': 'yue_HK.UTF-8',
1437+
'yuw_pg': 'yuw_PG.UTF-8',
13891438
'zh': 'zh_CN.eucCN',
13901439
'zh_cn': 'zh_CN.gb2312',
13911440
'zh_cn.big5': 'zh_TW.big5',

Lib/test/test_locale.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ def test_latin_modifier(self):
441441

442442
def test_valencia_modifier(self):
443443
self.check('ca_ES.UTF-8@valencia', 'ca_ES.UTF-8@valencia')
444-
self.check('ca_ES@valencia', 'ca_ES.ISO8859-15@valencia')
444+
self.check('ca_ES@valencia', 'ca_ES.UTF-8@valencia')
445445
self.check('ca@valencia', 'ca_ES.ISO8859-1@valencia')
446446

447447
def test_devanagari_modifier(self):
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Updated alias mapping with glibc 2.27 supported locales.

0 commit comments

Comments
 (0)