Skip to content

Commit 421f0b2

Browse files
committed
Refactoring of UTF-8 with mobile vendor extensions (DoCoMo, KDDI, SoftBank)
1 parent 7b4e820 commit 421f0b2

File tree

1 file changed

+49
-56
lines changed

1 file changed

+49
-56
lines changed

ext/mbstring/libmbfl/filters/mbfilter_utf8_mobile.c

Lines changed: 49 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,11 @@
3434

3535
extern void mbfl_filt_ident_utf8(unsigned char c, mbfl_identify_filter *filter);
3636
extern void mbfl_filt_conv_utf8_wchar_flush(mbfl_convert_filter *filter);
37+
extern void mbfl_filt_put_invalid_char(int c, mbfl_convert_filter *filter);
3738

3839
extern const unsigned char mblen_table_utf8[];
3940

4041
static const char *mbfl_encoding_utf8_docomo_aliases[] = {"UTF-8-DOCOMO", "UTF8-DOCOMO", NULL};
41-
static const char *mbfl_encoding_utf8_kddi_a_aliases[] = {NULL};
4242
static const char *mbfl_encoding_utf8_kddi_b_aliases[] = {"UTF-8-Mobile#KDDI", "UTF-8-KDDI", "UTF8-KDDI", NULL};
4343
static const char *mbfl_encoding_utf8_sb_aliases[] = {"UTF-8-SOFTBANK", "UTF8-SOFTBANK", NULL};
4444

@@ -57,7 +57,7 @@ const mbfl_encoding mbfl_encoding_utf8_kddi_a = {
5757
mbfl_no_encoding_utf8_kddi_a,
5858
"UTF-8-Mobile#KDDI-A",
5959
"UTF-8",
60-
mbfl_encoding_utf8_kddi_a_aliases,
60+
NULL,
6161
mblen_table_utf8,
6262
0,
6363
&vtbl_utf8_kddi_a_wchar,
@@ -182,52 +182,48 @@ const struct mbfl_convert_vtbl vtbl_wchar_utf8_sb = {
182182
mbfl_filt_conv_common_flush
183183
};
184184

185-
void mbfl_filt_put_invalid_char(int c, mbfl_convert_filter *filter);
186-
187-
/*
188-
* UTF-8 => wchar
189-
*/
190185
void mbfl_filt_conv_utf8_mobile_wchar(int c, mbfl_convert_filter *filter)
191186
{
192187
int s, s1 = 0, c1 = 0, snd = 0;
193188

194189
retry:
195-
switch (filter->status & 0xff) {
190+
switch (filter->status & 0xFF) {
196191
case 0x00:
197192
if (c < 0x80) {
198193
(*filter->output_function)(c, filter->data);
199-
} else if (c >= 0xc2 && c <= 0xdf) { /* 2byte code first char: 0xc2-0xdf */
194+
} else if (c >= 0xc2 && c <= 0xdf) { /* 2-byte char, first byte: 0xC2-0xDF */
200195
filter->status = 0x10;
201196
filter->cache = c & 0x1f;
202-
} else if (c >= 0xe0 && c <= 0xef) { /* 3byte code first char: 0xe0-0xef */
197+
} else if (c >= 0xe0 && c <= 0xef) { /* 3-byte char, first byte: 0xE0-0xEF */
203198
filter->status = 0x20;
204199
filter->cache = c & 0xf;
205-
} else if (c >= 0xf0 && c <= 0xf4) { /* 3byte code first char: 0xf0-0xf4 */
200+
} else if (c >= 0xf0 && c <= 0xf4) { /* 4-byte char, first byte: 0xF0-0xF4 */
206201
filter->status = 0x30;
207202
filter->cache = c & 0x7;
208203
} else {
209204
mbfl_filt_put_invalid_char(c, filter);
210205
}
211206
break;
212-
case 0x10: /* 2byte code 2nd char: 0x80-0xbf */
213-
case 0x21: /* 3byte code 3rd char: 0x80-0xbf */
214-
case 0x32: /* 4byte code 4th char: 0x80-0xbf */
207+
208+
case 0x10: /* 2-byte char, 2nd byte: 0x80-0xBF */
209+
case 0x21: /* 3-byte char, 3rd byte: 0x80-0xBF */
210+
case 0x32: /* 4-byte char, 4th byte: 0x80-0xBF */
215211
filter->status = 0;
216212
if (c >= 0x80 && c <= 0xbf) {
217-
s = (filter->cache<<6) | (c & 0x3f);
213+
s = (filter->cache << 6) | (c & 0x3f);
218214
filter->cache = 0;
219215

220-
if (filter->from->no_encoding == mbfl_no_encoding_utf8_docomo &&
221-
mbfilter_conv_r_map_tbl(s, &s1, mbfl_docomo2uni_pua, 4) > 0) {
216+
if (filter->from == &mbfl_encoding_utf8_docomo &&
217+
mbfilter_conv_r_map_tbl(s, &s1, mbfl_docomo2uni_pua, 4)) {
222218
s = mbfilter_sjis_emoji_docomo2unicode(s1, &snd);
223-
} else if (filter->from->no_encoding == mbfl_no_encoding_utf8_kddi_a &&
224-
mbfilter_conv_r_map_tbl(s, &s1, mbfl_kddi2uni_pua, 7) > 0) {
219+
} else if (filter->from == &mbfl_encoding_utf8_kddi_a &&
220+
mbfilter_conv_r_map_tbl(s, &s1, mbfl_kddi2uni_pua, 7)) {
225221
s = mbfilter_sjis_emoji_kddi2unicode(s1, &snd);
226-
} else if (filter->from->no_encoding == mbfl_no_encoding_utf8_kddi_b &&
227-
mbfilter_conv_r_map_tbl(s, &s1, mbfl_kddi2uni_pua_b, 8) > 0) {
222+
} else if (filter->from == &mbfl_encoding_utf8_kddi_b &&
223+
mbfilter_conv_r_map_tbl(s, &s1, mbfl_kddi2uni_pua_b, 8)) {
228224
s = mbfilter_sjis_emoji_kddi2unicode(s1, &snd);
229-
} else if (filter->from->no_encoding == mbfl_no_encoding_utf8_sb &&
230-
mbfilter_conv_r_map_tbl(s, &s1, mbfl_sb2uni_pua, 6) > 0) {
225+
} else if (filter->from == &mbfl_encoding_utf8_sb &&
226+
mbfilter_conv_r_map_tbl(s, &s1, mbfl_sb2uni_pua, 6)) {
231227
s = mbfilter_sjis_emoji_sb2unicode(s1, &snd);
232228
}
233229

@@ -240,75 +236,72 @@ void mbfl_filt_conv_utf8_mobile_wchar(int c, mbfl_convert_filter *filter)
240236
goto retry;
241237
}
242238
break;
243-
case 0x20: /* 3byte code 2nd char: 0:0xa0-0xbf,D:0x80-9F,1-C,E-F:0x80-0x9f */
244-
s = (filter->cache<<6) | (c & 0x3f);
245-
c1 = filter->cache & 0xf;
246-
247-
if ((c >= 0x80 && c <= 0xbf) &&
248-
((c1 == 0x0 && c >= 0xa0) ||
249-
(c1 == 0xd && c < 0xa0) ||
250-
(c1 > 0x0 && c1 != 0xd))) {
239+
240+
case 0x20: /* 3-byte char, 2nd byte: 0:0xA0-0xBF,D:0x80-9F,1-C,E-F:0x80-0x9F */
241+
s = (filter->cache << 6) | (c & 0x3f);
242+
c1 = filter->cache & 0xF;
243+
244+
if ((c >= 0x80 && c <= 0xBF) &&
245+
((c1 == 0 && c >= 0xA0) || (c1 == 0xD && c < 0xA0) || (c1 > 0 && c1 != 0xD))) {
251246
filter->cache = s;
252247
filter->status++;
253248
} else {
254249
mbfl_filt_put_invalid_char(filter->cache, filter);
255250
goto retry;
256251
}
257252
break;
258-
case 0x30: /* 4byte code 2nd char: 0:0x90-0xbf,1-3:0x80-0xbf,4:0x80-0x8f */
259-
s = (filter->cache<<6) | (c & 0x3f);
253+
254+
case 0x30: /* 4-byte char, 2nd byte: 0:0x90-0xBF,1-3:0x80-0xBF,4:0x80-0x8F */
255+
s = (filter->cache << 6) | (c & 0x3f);
260256
c1 = filter->cache & 0x7;
261257

262-
if ((c >= 0x80 && c <= 0xbf) &&
263-
((c1 == 0x0 && c >= 0x90) ||
264-
(c1 == 0x4 && c < 0x90) ||
265-
(c1 > 0x0 && c1 != 0x4))) {
258+
if ((c >= 0x80 && c <= 0xBF) &&
259+
((c1 == 0 && c >= 0x90) || (c1 == 0x4 && c < 0x90) || (c1 > 0 && c1 != 0x4))) {
266260
filter->cache = s;
267261
filter->status++;
268262
} else {
269263
mbfl_filt_put_invalid_char(filter->cache, filter);
270264
goto retry;
271265
}
272266
break;
273-
case 0x31: /* 4byte code 3rd char: 0x80-0xbf */
274-
if (c >= 0x80 && c <= 0xbf) {
275-
filter->cache = (filter->cache<<6) | (c & 0x3f);
267+
268+
case 0x31: /* 4-byte char, 3rd byte: 0x80-0xBF */
269+
if (c >= 0x80 && c <= 0xBF) {
270+
filter->cache = (filter->cache << 6) | (c & 0x3f);
276271
filter->status++;
277272
} else {
278273
mbfl_filt_put_invalid_char(filter->cache, filter);
279274
goto retry;
280275
}
281276
break;
277+
282278
default:
283279
filter->status = 0;
284280
break;
285281
}
286282
}
287283

288-
/*
289-
* wchar => UTF-8
290-
*/
291284
void mbfl_filt_conv_wchar_utf8_mobile(int c, mbfl_convert_filter *filter)
292285
{
293286
if (c >= 0 && c < 0x110000) {
294287
int s1, c1;
295288

296-
if ((filter->to->no_encoding == mbfl_no_encoding_utf8_docomo &&
297-
mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter) > 0 &&
298-
mbfilter_conv_map_tbl(s1, &c1, mbfl_docomo2uni_pua, 4) > 0) ||
299-
(filter->to->no_encoding == mbfl_no_encoding_utf8_kddi_a &&
300-
mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0 &&
301-
mbfilter_conv_map_tbl(s1, &c1, mbfl_kddi2uni_pua, 7) > 0) ||
302-
(filter->to->no_encoding == mbfl_no_encoding_utf8_kddi_b &&
303-
mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0 &&
304-
mbfilter_conv_map_tbl(s1, &c1, mbfl_kddi2uni_pua_b, 8) > 0) ||
305-
(filter->to->no_encoding == mbfl_no_encoding_utf8_sb &&
306-
mbfilter_unicode2sjis_emoji_sb(c, &s1, filter) > 0 &&
307-
mbfilter_conv_map_tbl(s1, &c1, mbfl_sb2uni_pua, 6) > 0)) {
289+
if ((filter->to == &mbfl_encoding_utf8_docomo &&
290+
mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter) &&
291+
mbfilter_conv_map_tbl(s1, &c1, mbfl_docomo2uni_pua, 4)) ||
292+
(filter->to == &mbfl_encoding_utf8_kddi_a &&
293+
mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) &&
294+
mbfilter_conv_map_tbl(s1, &c1, mbfl_kddi2uni_pua, 7)) ||
295+
(filter->to == &mbfl_encoding_utf8_kddi_b &&
296+
mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) &&
297+
mbfilter_conv_map_tbl(s1, &c1, mbfl_kddi2uni_pua_b, 8)) ||
298+
(filter->to == &mbfl_encoding_utf8_sb &&
299+
mbfilter_unicode2sjis_emoji_sb(c, &s1, filter) &&
300+
mbfilter_conv_map_tbl(s1, &c1, mbfl_sb2uni_pua, 6))) {
308301
c = c1;
309302
}
310303

311-
if (filter->status == 1 && filter->cache > 0) {
304+
if (filter->status) {
312305
return;
313306
}
314307

0 commit comments

Comments
 (0)