Skip to content

Commit

Permalink
add utf16string_to_string
Browse files Browse the repository at this point in the history
  • Loading branch information
gfgtdf committed Oct 8, 2014
1 parent 692f9e2 commit b037cbb
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 19 deletions.
60 changes: 41 additions & 19 deletions src/serialization/unicode.cpp
Expand Up @@ -54,31 +54,32 @@ size_t byte_size_from_ucs4_codepoint(ucs4::char_t ch)
} // anonymous namespace

namespace implementation {

inline void push_ucs4char_to_string(std::string& out, ucs4::char_t ch)
{
size_t count = byte_size_from_ucs4_codepoint(ch);

if(count == 1) {
out.push_back(static_cast<char>(ch));
} else {
for(int j = static_cast<int>(count) - 1; j >= 0; --j) {
unsigned char c = (ch >> (6 * j)) & 0x3f;
c |= 0x80;
if(j == static_cast<int>(count) - 1) {
c |= 0xff << (8 - count);
}
out.push_back(c);
}
}
}

std::string ucs4string_to_string(const ucs4::string &src)
{
std::string ret;

try {
for(ucs4::string::const_iterator i = src.begin(); i != src.end(); ++i) {
unsigned int count;
ucs4::char_t ch = *i;

// Determine the bytes required
count = byte_size_from_ucs4_codepoint(ch);

if(count == 1) {
ret.push_back(static_cast<char>(ch));
} else {
for(int j = static_cast<int>(count) - 1; j >= 0; --j) {
unsigned char c = (ch >> (6 * j)) & 0x3f;
c |= 0x80;
if(j == static_cast<int>(count) - 1) {
c |= 0xff << (8 - count);
}
ret.push_back(c);
}
}

push_ucs4char_to_string(ret, *i);
}

return ret;
Expand Down Expand Up @@ -140,6 +141,27 @@ ucs4::string utf16string_to_ucs4string(const utf16::string & src)
return res;
}

std::string utf16string_to_string(const utf16::string & src)
{
std::string res;

try {
utf16::iterator i1(src);
const utf16::iterator i2(utf16::iterator::end(src));

while(i1 != i2) {
push_ucs4char_to_string(res, *i1);
++i1;
}
}
catch(utf8::invalid_utf8_exception&) {
ERR_GENERAL << "Invalid UTF-16 string" << std::endl;
return res;
}

return res;
}

utf16::string ucs4string_to_utf16string(const ucs4::string &src)
{
utf16::string res;
Expand Down
6 changes: 6 additions & 0 deletions src/serialization/unicode.hpp
Expand Up @@ -108,6 +108,7 @@ namespace implementation {
ucs4::string string_to_ucs4string(const std::string &);
std::string ucs4char_to_string(const ucs4::char_t);
ucs4::string utf16string_to_ucs4string(const utf16::string &);
std::string utf16string_to_string(const utf16::string &);
utf16::string ucs4string_to_utf16string(const ucs4::string &);
} // end namespace implementation

Expand Down Expand Up @@ -148,4 +149,9 @@ ucs4::string unicode_cast<ucs4::string, utf16::string>(const utf16::string &in)
return implementation::utf16string_to_ucs4string(in);
}

template <> inline
std::string unicode_cast<std::string, utf16::string>(const utf16::string &in) {
return implementation::utf16string_to_string(in);
}

#endif

0 comments on commit b037cbb

Please sign in to comment.