Skip to content

Commit 3a7d70f

Browse files
committed
Fix GH-10648: add check function pointer into mbfl_encoding
Previously, mbstring used the same logic for encoding validation as for encoding conversion. However, there are cases where we want to use different logic for validation and conversion. For example, if a string ends up with missing input required by the encoding, or if a character is input that is invalid as an encoding but can be converted, the conversion should succeed and the validation should fail. To achieve this, a function pointer mb_check_fn has been added to struct mbfl_encoding to implement the logic used for validation. Also, added implementation of validation logic for UTF-7, UTF7-IMAP, and JIS.
1 parent 8c87a5c commit 3a7d70f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+599
-75
lines changed

ext/mbstring/libmbfl/filters/mbfilter_7bit.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ const mbfl_encoding mbfl_encoding_7bit = {
6464
&vtbl_7bit_wchar,
6565
&vtbl_wchar_7bit,
6666
mb_7bit_to_wchar,
67-
mb_wchar_to_7bit
67+
mb_wchar_to_7bit,
68+
NULL
6869
};
6970

7071
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)

ext/mbstring/libmbfl/filters/mbfilter_base64.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ const mbfl_encoding mbfl_encoding_base64 = {
4444
NULL,
4545
NULL,
4646
mb_base64_to_wchar,
47-
mb_wchar_to_base64
47+
mb_wchar_to_base64,
48+
NULL
4849
};
4950

5051
const struct mbfl_convert_vtbl vtbl_8bit_b64 = {

ext/mbstring/libmbfl/filters/mbfilter_big5.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ const mbfl_encoding mbfl_encoding_big5 = {
6969
&vtbl_big5_wchar,
7070
&vtbl_wchar_big5,
7171
mb_big5_to_wchar,
72-
mb_wchar_to_big5
72+
mb_wchar_to_big5,
73+
NULL
7374
};
7475

7576
const mbfl_encoding mbfl_encoding_cp950 = {
@@ -82,7 +83,8 @@ const mbfl_encoding mbfl_encoding_cp950 = {
8283
&vtbl_cp950_wchar,
8384
&vtbl_wchar_cp950,
8485
mb_cp950_to_wchar,
85-
mb_wchar_to_cp950
86+
mb_wchar_to_cp950,
87+
NULL
8688
};
8789

8890
const struct mbfl_convert_vtbl vtbl_big5_wchar = {

ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,8 @@ const mbfl_encoding mbfl_encoding_cp50220 = {
6161
&vtbl_cp50220_wchar,
6262
&vtbl_wchar_cp50220,
6363
mb_cp5022x_to_wchar,
64-
mb_wchar_to_cp50220
64+
mb_wchar_to_cp50220,
65+
NULL
6566
};
6667

6768
const mbfl_encoding mbfl_encoding_cp50221 = {
@@ -74,7 +75,8 @@ const mbfl_encoding mbfl_encoding_cp50221 = {
7475
&vtbl_cp50221_wchar,
7576
&vtbl_wchar_cp50221,
7677
mb_cp5022x_to_wchar,
77-
mb_wchar_to_cp50221
78+
mb_wchar_to_cp50221,
79+
NULL
7880
};
7981

8082
const mbfl_encoding mbfl_encoding_cp50222 = {
@@ -87,7 +89,8 @@ const mbfl_encoding mbfl_encoding_cp50222 = {
8789
&vtbl_cp50222_wchar,
8890
&vtbl_wchar_cp50222,
8991
mb_cp5022x_to_wchar,
90-
mb_wchar_to_cp50222
92+
mb_wchar_to_cp50222,
93+
NULL
9194
};
9295

9396
const struct mbfl_convert_vtbl vtbl_cp50220_wchar = {

ext/mbstring/libmbfl/filters/mbfilter_cp51932.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ const mbfl_encoding mbfl_encoding_cp51932 = {
6969
&vtbl_cp51932_wchar,
7070
&vtbl_wchar_cp51932,
7171
mb_cp51932_to_wchar,
72-
mb_wchar_to_cp51932
72+
mb_wchar_to_cp51932,
73+
NULL
7374
};
7475

7576
const struct mbfl_convert_vtbl vtbl_cp51932_wchar = {

ext/mbstring/libmbfl/filters/mbfilter_cp932.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,8 @@ const mbfl_encoding mbfl_encoding_cp932 = {
100100
&vtbl_cp932_wchar,
101101
&vtbl_wchar_cp932,
102102
mb_cp932_to_wchar,
103-
mb_wchar_to_cp932
103+
mb_wchar_to_cp932,
104+
NULL
104105
};
105106

106107
const struct mbfl_convert_vtbl vtbl_cp932_wchar = {
@@ -133,7 +134,8 @@ const mbfl_encoding mbfl_encoding_sjiswin = {
133134
&vtbl_sjiswin_wchar,
134135
&vtbl_wchar_sjiswin,
135136
mb_cp932_to_wchar,
136-
mb_wchar_to_sjiswin
137+
mb_wchar_to_sjiswin,
138+
NULL
137139
};
138140

139141
const struct mbfl_convert_vtbl vtbl_sjiswin_wchar = {

ext/mbstring/libmbfl/filters/mbfilter_cp936.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@ const mbfl_encoding mbfl_encoding_cp936 = {
6868
&vtbl_cp936_wchar,
6969
&vtbl_wchar_cp936,
7070
mb_cp936_to_wchar,
71-
mb_wchar_to_cp936
71+
mb_wchar_to_cp936,
72+
NULL
7273
};
7374

7475
const struct mbfl_convert_vtbl vtbl_cp936_wchar = {

ext/mbstring/libmbfl/filters/mbfilter_euc_cn.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@ const mbfl_encoding mbfl_encoding_euc_cn = {
6767
&vtbl_euccn_wchar,
6868
&vtbl_wchar_euccn,
6969
mb_euccn_to_wchar,
70-
mb_wchar_to_euccn
70+
mb_wchar_to_euccn,
71+
NULL
7172
};
7273

7374
const struct mbfl_convert_vtbl vtbl_euccn_wchar = {

ext/mbstring/libmbfl/filters/mbfilter_euc_jp.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@ const mbfl_encoding mbfl_encoding_euc_jp = {
6868
&vtbl_eucjp_wchar,
6969
&vtbl_wchar_eucjp,
7070
mb_eucjp_to_wchar,
71-
mb_wchar_to_eucjp
71+
mb_wchar_to_eucjp,
72+
NULL
7273
};
7374

7475
const struct mbfl_convert_vtbl vtbl_eucjp_wchar = {

ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ const mbfl_encoding mbfl_encoding_eucjp_win = {
6969
&vtbl_eucjpwin_wchar,
7070
&vtbl_wchar_eucjpwin,
7171
mb_eucjpwin_to_wchar,
72-
mb_wchar_to_eucjpwin
72+
mb_wchar_to_eucjpwin,
73+
NULL
7374
};
7475

7576
const struct mbfl_convert_vtbl vtbl_eucjpwin_wchar = {

ext/mbstring/libmbfl/filters/mbfilter_euc_kr.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ const mbfl_encoding mbfl_encoding_euc_kr = {
6666
&vtbl_euckr_wchar,
6767
&vtbl_wchar_euckr,
6868
mb_euckr_to_wchar,
69-
mb_wchar_to_euckr
69+
mb_wchar_to_euckr,
70+
NULL
7071
};
7172

7273
const struct mbfl_convert_vtbl vtbl_euckr_wchar = {

ext/mbstring/libmbfl/filters/mbfilter_euc_tw.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@ const mbfl_encoding mbfl_encoding_euc_tw = {
6868
&vtbl_euctw_wchar,
6969
&vtbl_wchar_euctw,
7070
mb_euctw_to_wchar,
71-
mb_wchar_to_euctw
71+
mb_wchar_to_euctw,
72+
NULL
7273
};
7374

7475
const struct mbfl_convert_vtbl vtbl_euctw_wchar = {

ext/mbstring/libmbfl/filters/mbfilter_gb18030.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ const mbfl_encoding mbfl_encoding_gb18030 = {
4949
&vtbl_gb18030_wchar,
5050
&vtbl_wchar_gb18030,
5151
mb_gb18030_to_wchar,
52-
mb_wchar_to_gb18030
52+
mb_wchar_to_gb18030,
53+
NULL
5354
};
5455

5556
const struct mbfl_convert_vtbl vtbl_gb18030_wchar = {

ext/mbstring/libmbfl/filters/mbfilter_htmlent.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ const mbfl_encoding mbfl_encoding_html_ent = {
6666
&vtbl_html_wchar,
6767
&vtbl_wchar_html,
6868
mb_htmlent_to_wchar,
69-
mb_wchar_to_htmlent
69+
mb_wchar_to_htmlent,
70+
NULL
7071
};
7172

7273
const struct mbfl_convert_vtbl vtbl_wchar_html = {

ext/mbstring/libmbfl/filters/mbfilter_hz.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ const mbfl_encoding mbfl_encoding_hz = {
4747
&vtbl_hz_wchar,
4848
&vtbl_wchar_hz,
4949
mb_hz_to_wchar,
50-
mb_wchar_to_hz
50+
mb_wchar_to_hz,
51+
NULL
5152
};
5253

5354
const struct mbfl_convert_vtbl vtbl_hz_wchar = {

ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ const mbfl_encoding mbfl_encoding_2022jpms = {
5151
&vtbl_2022jpms_wchar,
5252
&vtbl_wchar_2022jpms,
5353
mb_iso2022jpms_to_wchar,
54-
mb_wchar_to_iso2022jpms
54+
mb_wchar_to_iso2022jpms,
55+
NULL
5556
};
5657

5758
const struct mbfl_convert_vtbl vtbl_2022jpms_wchar = {

ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@ const mbfl_encoding mbfl_encoding_2022kr = {
5454
&vtbl_2022kr_wchar,
5555
&vtbl_wchar_2022kr,
5656
mb_iso2022kr_to_wchar,
57-
mb_wchar_to_iso2022kr
57+
mb_wchar_to_iso2022kr,
58+
NULL
5859
};
5960

6061
const struct mbfl_convert_vtbl vtbl_wchar_2022kr = {

ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ const mbfl_encoding mbfl_encoding_2022jp_kddi = {
7171
&vtbl_2022jp_kddi_wchar,
7272
&vtbl_wchar_2022jp_kddi,
7373
mb_iso2022jp_kddi_to_wchar,
74-
mb_wchar_to_iso2022jp_kddi
74+
mb_wchar_to_iso2022jp_kddi,
75+
NULL
7576
};
7677

7778
const struct mbfl_convert_vtbl vtbl_2022jp_kddi_wchar = {

ext/mbstring/libmbfl/filters/mbfilter_jis.c

Lines changed: 96 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ static int mbfl_filt_conv_jis_wchar_flush(mbfl_convert_filter *filter);
3737
static size_t mb_iso2022jp_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
3838
static void mb_wchar_to_iso2022jp(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
3939
static void mb_wchar_to_jis(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
40+
static bool mb_check_iso2022jp(unsigned char *in, size_t in_len);
4041

4142
const mbfl_encoding mbfl_encoding_jis = {
4243
mbfl_no_encoding_jis,
@@ -49,6 +50,7 @@ const mbfl_encoding mbfl_encoding_jis = {
4950
&vtbl_wchar_jis,
5051
mb_iso2022jp_to_wchar,
5152
mb_wchar_to_jis,
53+
mb_check_iso2022jp
5254
};
5355

5456
const mbfl_encoding mbfl_encoding_2022jp = {
@@ -61,7 +63,8 @@ const mbfl_encoding mbfl_encoding_2022jp = {
6163
&vtbl_2022jp_wchar,
6264
&vtbl_wchar_2022jp,
6365
mb_iso2022jp_to_wchar,
64-
mb_wchar_to_iso2022jp
66+
mb_wchar_to_iso2022jp,
67+
mb_check_iso2022jp
6568
};
6669

6770
const struct mbfl_convert_vtbl vtbl_jis_wchar = {
@@ -780,3 +783,95 @@ static void mb_wchar_to_jis(uint32_t *in, size_t len, mb_convert_buf *buf, bool
780783

781784
MB_CONVERT_BUF_STORE(buf, out, limit);
782785
}
786+
787+
static bool mb_check_iso2022jp(unsigned char *in, size_t in_len)
788+
{
789+
unsigned char *p = in, *e = p + in_len;
790+
unsigned int state = ASCII;
791+
792+
while (p < e) {
793+
unsigned char c = *p++;
794+
if (c == 0x1B) {
795+
/* ESC seen; this is an escape sequence */
796+
if ((e - p) < 2) {
797+
return false;
798+
}
799+
unsigned char c2 = *p++;
800+
if (c2 == '$') {
801+
unsigned char c3 = *p++;
802+
if (c3 == '@' || c3 == 'B') {
803+
state = JISX_0208;
804+
} else if (c3 == '(') {
805+
if (p == e) {
806+
return false;
807+
}
808+
unsigned char c4 = *p++;
809+
if (c4 == '@' || c4 == 'B') {
810+
state = JISX_0208;
811+
} else if (c4 == 'D') {
812+
state = JISX_0212;
813+
} else {
814+
return false;
815+
}
816+
} else {
817+
return false;
818+
}
819+
} else if (c2 == '(') {
820+
unsigned char c3 = *p++;
821+
if (c3 == 'B' || c3 == 'H') {
822+
state = ASCII;
823+
} else if (c3 == 'J') {
824+
state = JISX_0201_LATIN;
825+
} else if (c3 == 'I') {
826+
state = JISX_0201_KANA;
827+
} else {
828+
return false;
829+
}
830+
} else {
831+
return false;
832+
}
833+
} else if (c == 0xE) {
834+
/* "Kana In" marker; this is just for JIS-7/8, but we also accept it for ISO-2022-JP */
835+
state = JISX_0201_KANA;
836+
} else if (c == 0xF) {
837+
/* "Kana Out" marker */
838+
state = ASCII;
839+
} else if (state >= JISX_0208 && (c > 0x20 && c < 0x7F)) {
840+
if (p == e) {
841+
return false;
842+
}
843+
unsigned char c2 = *p++;
844+
if (c2 > 0x20 && c2 < 0x7F) {
845+
unsigned int s = (c - 0x21)*94 + c2 - 0x21;
846+
uint32_t w = 0;
847+
if (state == JISX_0208) {
848+
if (s < jisx0208_ucs_table_size) {
849+
w = jisx0208_ucs_table[s];
850+
}
851+
if (w > 0) {
852+
continue;
853+
}
854+
} else {
855+
if (s < jisx0212_ucs_table_size) {
856+
w = jisx0212_ucs_table[s];
857+
}
858+
if (w > 0) {
859+
continue;
860+
}
861+
}
862+
return false;
863+
} else {
864+
return false;
865+
}
866+
} else if (c < 0x80) {
867+
continue;
868+
} else if (c >= 0xA1 && c <= 0xDF) {
869+
/* GR-invoked Kana; Conversion is accepted, check is not. */
870+
return false;
871+
} else {
872+
return false;
873+
}
874+
}
875+
876+
return state == ASCII;
877+
}

ext/mbstring/libmbfl/filters/mbfilter_qprint.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ const mbfl_encoding mbfl_encoding_qprint = {
4646
NULL,
4747
NULL,
4848
mb_qprint_to_wchar,
49-
mb_wchar_to_qprint
49+
mb_wchar_to_qprint,
50+
NULL
5051
};
5152

5253
const struct mbfl_convert_vtbl vtbl_8bit_qprint = {

ext/mbstring/libmbfl/filters/mbfilter_singlebyte.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,8 @@ static int mbfl_conv_reverselookup_table(int c, mbfl_convert_filter *filter, int
8686
&vtbl_##id##_wchar, \
8787
&vtbl_wchar_##id, \
8888
mb_##id##_to_wchar, \
89-
mb_wchar_to_##id \
89+
mb_wchar_to_##id, \
90+
NULL \
9091
}
9192

9293
/* For single-byte encodings which use a conversion table */

ext/mbstring/libmbfl/filters/mbfilter_sjis.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ const mbfl_encoding mbfl_encoding_sjis = {
7171
&vtbl_sjis_wchar,
7272
&vtbl_wchar_sjis,
7373
mb_sjis_to_wchar,
74-
mb_wchar_to_sjis
74+
mb_wchar_to_sjis,
75+
NULL
7576
};
7677

7778
const struct mbfl_convert_vtbl vtbl_sjis_wchar = {

0 commit comments

Comments
 (0)