|
7 | 7 | \*******************************************************************/
|
8 | 8 |
|
9 | 9 | #include <cstring>
|
| 10 | +#include <locale> |
| 11 | +#include <codecvt> |
| 12 | +#include <iomanip> |
| 13 | +#include <sstream> |
10 | 14 |
|
11 | 15 | #include "unicode.h"
|
12 | 16 |
|
@@ -258,3 +262,79 @@ const char **narrow_argv(int argc, const wchar_t **argv_wide)
|
258 | 262 |
|
259 | 263 | return argv_narrow;
|
260 | 264 | }
|
| 265 | + |
| 266 | +/*******************************************************************\ |
| 267 | +
|
| 268 | +Function: utf8_to_utf16_big_endian |
| 269 | +
|
| 270 | + Inputs: String in UTF-8 format |
| 271 | +
|
| 272 | + Outputs: String in UTF-16BE format |
| 273 | +
|
| 274 | + Purpose: Note this requires g++-5 libstdc++ / libc++ / MSVC2010+ |
| 275 | +
|
| 276 | +\*******************************************************************/ |
| 277 | + |
| 278 | +std::wstring utf8_to_utf16_big_endian(const std::string& in) |
| 279 | +{ |
| 280 | + std::wstring_convert<std::codecvt_utf8_utf16<wchar_t> > converter; |
| 281 | + return converter.from_bytes(in); |
| 282 | +} |
| 283 | + |
| 284 | +/*******************************************************************\ |
| 285 | +
|
| 286 | +Function: utf8_to_utf16_little_endian |
| 287 | +
|
| 288 | + Inputs: String in UTF-8 format |
| 289 | +
|
| 290 | + Outputs: String in UTF-16LE format |
| 291 | +
|
| 292 | + Purpose: Note this requires g++-5 libstdc++ / libc++ / MSVC2010+ |
| 293 | +
|
| 294 | +\*******************************************************************/ |
| 295 | + |
| 296 | +std::wstring utf8_to_utf16_little_endian(const std::string& in) |
| 297 | +{ |
| 298 | + const std::codecvt_mode mode=std::codecvt_mode::little_endian; |
| 299 | + |
| 300 | + // default largest value codecvt_utf8_utf16 reads without error is 0x10ffff |
| 301 | + // see: http://en.cppreference.com/w/cpp/locale/codecvt_utf8_utf16 |
| 302 | + const unsigned long maxcode=0x10ffff; |
| 303 | + |
| 304 | + typedef std::codecvt_utf8_utf16<wchar_t, maxcode, mode> codecvt_utf8_utf16t; |
| 305 | + std::wstring_convert<codecvt_utf8_utf16t> converter; |
| 306 | + return converter.from_bytes(in); |
| 307 | +} |
| 308 | + |
| 309 | +/*******************************************************************\ |
| 310 | +
|
| 311 | +Function: utf16_little_endian_to_ascii |
| 312 | +
|
| 313 | + Inputs: String in UTF-16LE format |
| 314 | +
|
| 315 | + Outputs: String in US-ASCII format, with \uxxxx escapes for other |
| 316 | + characters |
| 317 | +
|
| 318 | + Purpose: |
| 319 | +
|
| 320 | +\*******************************************************************/ |
| 321 | + |
| 322 | +std::string utf16_little_endian_to_ascii(const std::wstring& in) |
| 323 | +{ |
| 324 | + std::ostringstream result; |
| 325 | + std::locale loc; |
| 326 | + for(const auto c : in) |
| 327 | + { |
| 328 | + if(c<=255 && isprint(c, loc)) |
| 329 | + result << (unsigned char)c; |
| 330 | + else |
| 331 | + { |
| 332 | + result << "\\u" |
| 333 | + << std::hex |
| 334 | + << std::setw(4) |
| 335 | + << std::setfill('0') |
| 336 | + << (unsigned int)c; |
| 337 | + } |
| 338 | + } |
| 339 | + return result.str(); |
| 340 | +} |
0 commit comments