Skip to content

Commit

Permalink
CUtf8Utils: add "checkStrForUtf8()"
Browse files Browse the repository at this point in the history
New UTF-8 string check, based on current Unicode standard (6.3), with three result values: plain ASCII, valid UTF-8, high ACSII.
  • Loading branch information
Karlson2k committed Dec 1, 2013
1 parent 766165a commit 88788c3
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 0 deletions.
26 changes: 26 additions & 0 deletions xbmc/utils/Utf8Utils.cpp
Expand Up @@ -21,6 +21,32 @@
#include "Utf8Utils.h"


CUtf8Utils::utf8CheckResult CUtf8Utils::checkStrForUtf8(const std::string& str)
{
const char* const strC = str.c_str();
const size_t len = str.length();
size_t pos = 0;
bool isPlainAscii = true;

while (pos < len)
{
const size_t chrLen = SizeOfUtf8Char(strC + pos);
if (chrLen == 0)
return hiAscii; // non valid UTF-8 sequence
else if (chrLen > 1)
isPlainAscii = false;

pos += chrLen;
}

if (isPlainAscii)
return plainAscii; // only single-byte characters (valid for US-ASCII and for UTF-8)

return utf8string; // valid UTF-8 with at least one valid UTF-8 multi-byte sequence
}



size_t CUtf8Utils::FindValidUtf8Char(const std::string& str, const size_t startPos /*= 0*/)
{
const char* strC = str.c_str();
Expand Down
15 changes: 15 additions & 0 deletions xbmc/utils/Utf8Utils.h
Expand Up @@ -26,6 +26,21 @@
class CUtf8Utils
{
public:
enum utf8CheckResult
{
plainAscii = -1, // only US-ASCII characters (valid for UTF-8 too)
hiAscii = 0, // non-UTF-8 sequence with high ASCII characters
// (possible single-byte national encoding like WINDOWS-1251, multi-byte encoding like UTF-32 or invalid UTF-8)
utf8string = 1 // valid UTF-8 sequences, but not US-ASCII only
};

/**
* Check given string for valid UTF-8 sequences
* @param str string to check
* @return result of check, "plainAscii" for empty string
*/
static utf8CheckResult checkStrForUtf8(const std::string& str);

static size_t FindValidUtf8Char(const std::string& str, const size_t startPos = 0);
static size_t RFindValidUtf8Char(const std::string& str, const size_t startPos);

Expand Down

0 comments on commit 88788c3

Please sign in to comment.