Skip to content

Commit 6ef7327

Browse files
committed
Conversion utf8 to utf16 and pretty-printing of Java strings
Added two functions for utf8 to utf16 conversion function depending on whether we use little or big endian. Added a function utf16_little_endian_to_ascii to display nicely java strings as an ascii sequence.
1 parent 9e3ea18 commit 6ef7327

File tree

2 files changed

+44
-0
lines changed

2 files changed

+44
-0
lines changed

src/util/unicode.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ Author: Daniel Kroening, [email protected]
77
\*******************************************************************/
88

99
#include <cstring>
10+
#include <locale>
11+
#include <codecvt>
1012

1113
#include "unicode.h"
1214

@@ -258,3 +260,41 @@ const char **narrow_argv(int argc, const wchar_t **argv_wide)
258260

259261
return argv_narrow;
260262
}
263+
264+
std::wstring utf8_to_utf16_big_endian(const std::string& in)
265+
{
266+
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t> > converter;
267+
return converter.from_bytes(in);
268+
}
269+
270+
std::wstring utf8_to_utf16_little_endian(const std::string& in)
271+
{
272+
const std::codecvt_mode mode=std::codecvt_mode::little_endian;
273+
274+
// default largest value codecvt_utf8_utf16 reads without error is 0x10ffff
275+
// see: http://en.cppreference.com/w/cpp/locale/codecvt_utf8_utf16
276+
const unsigned long maxcode=0x10ffff;
277+
278+
typedef std::codecvt_utf8_utf16<wchar_t, maxcode, mode> codecvt_utf8_utf16t;
279+
std::wstring_convert<codecvt_utf8_utf16t> converter;
280+
return converter.from_bytes(in);
281+
}
282+
283+
std::string utf16_little_endian_to_ascii(const std::wstring& in)
284+
{
285+
std::string result;
286+
std::locale loc;
287+
for(const auto c : in)
288+
{
289+
if(c<=255 && isprint(c, loc))
290+
result+=(unsigned char)c;
291+
else
292+
{
293+
result+="\\u";
294+
char hex[5];
295+
snprintf(hex, sizeof(hex), "%04x", (wchar_t)c);
296+
result+=hex;
297+
}
298+
}
299+
return result;
300+
}

src/util/unicode.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ std::wstring widen(const std::string &s);
2222
std::string utf32_to_utf8(const std::basic_string<unsigned int> &s);
2323
std::string utf16_to_utf8(const std::basic_string<unsigned short int> &s);
2424

25+
std::wstring utf8_to_utf16_big_endian(const std::string&);
26+
std::wstring utf8_to_utf16_little_endian(const std::string&);
27+
std::string utf16_little_endian_to_ascii(const std::wstring& in);
28+
2529
const char **narrow_argv(int argc, const wchar_t **argv_wide);
2630

2731
#endif // CPROVER_UTIL_UNICODE_H

0 commit comments

Comments
 (0)