Skip to content
This repository
Browse code

Merge pull request #3317 from Karlson2k/charsetconverter_rework_02

CharsetСonverter rework - Part 2
  • Loading branch information...
commit c21f135590db2411beb672340be6f84e134be9a2 2 parents d0fc511 + 0a49c4d
jmarshallnz authored September 27, 2013
1  configure.in
@@ -920,6 +920,7 @@ fi
920 920
 AC_LANG_PUSH([C++])
921 921
 AC_CHECK_TYPES([std::u16string, std::u32string], [], [], [[#include <string>]])
922 922
 AC_CHECK_TYPES([char16_t, char32_t])
  923
+AC_CHECK_SIZEOF([wchar_t])
923 924
 AC_LANG_POP([C++])
924 925
 
925 926
 # Add top source directory for all builds so we can use config.h
118  xbmc/utils/CharsetConverter.cpp
@@ -32,30 +32,52 @@
32 32
 #include <iconv.h>
33 33
 
34 34
 #if defined(TARGET_DARWIN)
35  
-#ifdef __POWERPC__
36  
-  #define WCHAR_CHARSET "UTF-32BE"
37  
-#else
38  
-  #define WCHAR_CHARSET "UTF-32LE"
39  
-#endif
  35
+  #define WCHAR_IS_UTF32 1
  36
+  #undef WCHAR_IS_UTF16
  37
+  #ifdef __POWERPC__
  38
+    #define WCHAR_CHARSET "UTF-32BE"
  39
+  #else
  40
+    #define WCHAR_CHARSET "UTF-32LE"
  41
+  #endif
40 42
   #define UTF8_SOURCE "UTF-8-MAC"
41 43
 #elif defined(TARGET_WINDOWS)
  44
+  #undef WCHAR_IS_UTF32
  45
+  #define WCHAR_IS_UTF16 1
42 46
   #define WCHAR_CHARSET "UTF-16LE"
43 47
   #define UTF8_SOURCE "UTF-8"
44 48
   #pragma comment(lib, "libfribidi.lib")
45 49
   #pragma comment(lib, "libiconv.lib")
46 50
 #elif defined(TARGET_ANDROID)
  51
+  #define WCHAR_IS_UTF32 1
  52
+  #undef WCHAR_IS_UTF16
47 53
   #define UTF8_SOURCE "UTF-8"
48  
-#ifdef __BIG_ENDIAN__
49  
-  #define WCHAR_CHARSET "UTF-32BE"
50  
-#else
51  
-  #define WCHAR_CHARSET "UTF-32LE"
52  
-#endif
  54
+  #ifdef __BIG_ENDIAN__
  55
+    #define WCHAR_CHARSET "UTF-32BE"
  56
+  #else
  57
+    #define WCHAR_CHARSET "UTF-32LE"
  58
+  #endif
53 59
 #else
54 60
   #define WCHAR_CHARSET "WCHAR_T"
55 61
   #define UTF8_SOURCE "UTF-8"
  62
+  #ifdef HAVE_CONFIG_H
  63
+    #include "config.h"
  64
+  #endif // HAVE_CONFIG_H
  65
+  #undef WCHAR_IS_UTF32
  66
+  #undef WCHAR_IS_UTF16
  67
+  #ifdef SIZEOF_WCHAR_T
  68
+    #if SIZEOF_WCHAR_T == 4
  69
+      #define WCHAR_IS_UTF32 1
  70
+    #elif SIZEOF_WCHAR_T == 2
  71
+      #define WCHAR_IS_UTF16 1
  72
+    #endif
  73
+  #endif
56 74
 #endif
57 75
 
58 76
 
  77
+static iconv_t m_iconvUtf8ToUtf32                = (iconv_t)-1;
  78
+static iconv_t m_iconvUtf32ToUtf8                = (iconv_t)-1;
  79
+static iconv_t m_iconvUtf32ToW                   = (iconv_t)-1;
  80
+static iconv_t m_iconvWToUtf32                   = (iconv_t)-1;
59 81
 static iconv_t m_iconvSubtitleCharsetToW         = (iconv_t)-1;
60 82
 static iconv_t m_iconvUtf8ToStringCharset        = (iconv_t)-1;
61 83
 static iconv_t m_iconvStringCharsetToUtf8        = (iconv_t)-1;
@@ -426,6 +448,10 @@ void CCharsetConverter::reset(void)
426 448
 {
427 449
   CSingleLock lock(m_critSection);
428 450
 
  451
+  ICONV_SAFE_CLOSE(m_iconvUtf8ToUtf32);
  452
+  ICONV_SAFE_CLOSE(m_iconvUtf32ToUtf8);
  453
+  ICONV_SAFE_CLOSE(m_iconvUtf32ToW);
  454
+  ICONV_SAFE_CLOSE(m_iconvWToUtf32);
429 455
   ICONV_SAFE_CLOSE(m_iconvUtf8ToStringCharset);
430 456
   ICONV_SAFE_CLOSE(m_iconvStringCharsetToUtf8);
431 457
   ICONV_SAFE_CLOSE(m_iconvSubtitleCharsetToW);
@@ -450,6 +476,78 @@ void CCharsetConverter::reset(void)
450 476
   }
451 477
 }
452 478
 
  479
+bool CCharsetConverter::utf8ToUtf32(const std::string& utf8StringSrc, std::u32string& utf32StringDst, bool failOnBadChar /*= true*/)
  480
+{
  481
+  CSingleLock lock(m_critSection);
  482
+  return convert(m_iconvUtf8ToUtf32, 1, UTF8_SOURCE, "UTF-32", utf8StringSrc, utf32StringDst, failOnBadChar);
  483
+}
  484
+
  485
+std::u32string CCharsetConverter::utf8ToUtf32(const std::string& utf8StringSrc, bool failOnBadChar /*= true*/)
  486
+{
  487
+  std::u32string converted;
  488
+  utf8ToUtf32(utf8StringSrc, converted, failOnBadChar);
  489
+  return converted;
  490
+}
  491
+
  492
+bool CCharsetConverter::utf8ToUtf32Visual(const std::string& utf8StringSrc, std::u32string& utf32StringDst, bool bVisualBiDiFlip /*= false*/, bool forceLTRReadingOrder /*= false*/, bool failOnBadChar /*= false*/)
  493
+{
  494
+  if (bVisualBiDiFlip)
  495
+  {
  496
+    std::string strFlipped;
  497
+    if (!logicalToVisualBiDi(utf8StringSrc, strFlipped, FRIBIDI_UTF8, forceLTRReadingOrder ? FRIBIDI_TYPE_LTR : FRIBIDI_TYPE_PDF))
  498
+      return false;
  499
+    CSingleLock lock(m_critSection);
  500
+    return convert(m_iconvUtf8ToUtf32, 1, UTF8_SOURCE, "UTF-32", strFlipped, utf32StringDst, failOnBadChar);
  501
+  }
  502
+  CSingleLock lock(m_critSection);
  503
+  return convert(m_iconvUtf8ToUtf32, 1, UTF8_SOURCE, "UTF-32", utf8StringSrc, utf32StringDst, failOnBadChar);
  504
+}
  505
+
  506
+bool CCharsetConverter::utf32ToUtf8(const std::u32string& utf32StringSrc, std::string& utf8StringDst, bool failOnBadChar /*= true*/)
  507
+{
  508
+  CSingleLock lock(m_critSection);
  509
+  return convert(m_iconvUtf32ToUtf8, m_Utf8CharMaxSize, "UTF-32", "UTF-8", utf32StringSrc, utf8StringDst, failOnBadChar);
  510
+}
  511
+
  512
+std::string CCharsetConverter::utf32ToUtf8(const std::u32string& utf32StringSrc, bool failOnBadChar /*= false*/)
  513
+{
  514
+  std::string converted;
  515
+  utf32ToUtf8(utf32StringSrc, converted, failOnBadChar);
  516
+  return converted;
  517
+}
  518
+
  519
+bool CCharsetConverter::utf32ToW(const std::u32string& utf32StringSrc, std::wstring& wStringDst, bool failOnBadChar /*= true*/)
  520
+{
  521
+#ifdef WCHAR_IS_UTF32
  522
+  wStringDst.assign((const wchar_t*)utf32StringSrc.c_str(), utf32StringSrc.length());
  523
+  return true;
  524
+#else // !WCHAR_IS_UTF32
  525
+  CSingleLock lock(m_critSection);
  526
+  return convert(m_iconvUtf32ToW, 1, "UTF-32", WCHAR_CHARSET, utf32StringSrc, wStringDst, failOnBadChar);
  527
+#endif // !WCHAR_IS_UTF32
  528
+}
  529
+
  530
+bool CCharsetConverter::utf32logicalToVisualBiDi(const std::u32string& logicalStringSrc, std::u32string& visualStringDst, bool forceLTRReadingOrder /*= false*/)
  531
+{
  532
+  visualStringDst.clear();
  533
+  std::string utf8Str;
  534
+  if (!utf32ToUtf8(logicalStringSrc, utf8Str, false))
  535
+    return false;
  536
+
  537
+  return utf8ToUtf32Visual(utf8Str, visualStringDst, true, forceLTRReadingOrder);
  538
+}
  539
+
  540
+bool CCharsetConverter::wToUtf32(const std::wstring& wStringSrc, std::u32string& utf32StringDst, bool failOnBadChar /*= true*/)
  541
+{
  542
+#ifdef WCHAR_IS_UTF32
  543
+  utf32StringDst.assign((const char32_t*)wStringSrc.c_str(), wStringSrc.length());
  544
+  return true;
  545
+#else // !WCHAR_IS_UTF32
  546
+  CSingleLock lock(m_critSection);
  547
+  return convert(m_iconvWToUtf32, 1, WCHAR_CHARSET, "UTF-32", wStringSrc, utf32StringDst, failOnBadChar);
  548
+#endif // !WCHAR_IS_UTF32
  549
+}
  550
+
453 551
 // The bVisualBiDiFlip forces a flip of characters for hebrew/arabic languages, only set to false if the flipping
454 552
 // of the string is already made or the string is not displayed in the GUI
455 553
 bool CCharsetConverter::utf8ToW(const std::string& utf8StringSrc, std::wstring& wStringDst, bool bVisualBiDiFlip /*= true*/, 
81  xbmc/utils/CharsetConverter.h
@@ -42,6 +42,87 @@ class CCharsetConverter : public ISettingCallback
42 42
 
43 43
   void clear();
44 44
 
  45
+  /**
  46
+   * Convert UTF-8 string to UTF-32 string.
  47
+   * No RTL logical-visual transformation is performed.
  48
+   * @param utf8StringSrc       is source UTF-8 string to convert
  49
+   * @param utf32StringDst      is output UTF-32 string, empty on any error
  50
+   * @param failOnBadChar       if set to true function will fail on invalid character,
  51
+   *                            otherwise invalid character will be skipped
  52
+   * @return true on successful conversion, false on any error
  53
+   */
  54
+  bool utf8ToUtf32(const std::string& utf8StringSrc, std::u32string& utf32StringDst, bool failOnBadChar = true);
  55
+  /**
  56
+   * Convert UTF-8 string to UTF-32 string.
  57
+   * No RTL logical-visual transformation is performed.
  58
+   * @param utf8StringSrc       is source UTF-8 string to convert
  59
+   * @param failOnBadChar       if set to true function will fail on invalid character,
  60
+   *                            otherwise invalid character will be skipped
  61
+   * @return converted string on successful conversion, empty string on any error
  62
+   */
  63
+  std::u32string utf8ToUtf32(const std::string& utf8StringSrc, bool failOnBadChar = true);
  64
+  /**
  65
+   * Convert UTF-8 string to UTF-32 string.
  66
+   * RTL logical-visual transformation is optionally performed.
  67
+   * Use it for readable text, GUI strings etc.
  68
+   * @param utf8StringSrc       is source UTF-8 string to convert
  69
+   * @param utf32StringDst      is output UTF-32 string, empty on any error
  70
+   * @param bVisualBiDiFlip     allow RTL visual-logical transformation if set to true, must be set
  71
+   *                            to false is logical-visual transformation is already done
  72
+   * @param forceLTRReadingOrder        force LTR reading order
  73
+   * @param failOnBadChar       if set to true function will fail on invalid character,
  74
+   *                            otherwise invalid character will be skipped
  75
+   * @return true on successful conversion, false on any error
  76
+   */
  77
+  bool utf8ToUtf32Visual(const std::string& utf8StringSrc, std::u32string& utf32StringDst, bool bVisualBiDiFlip = false, bool forceLTRReadingOrder = false, bool failOnBadChar = false);
  78
+  /**
  79
+   * Convert UTF-32 string to UTF-8 string.
  80
+   * No RTL visual-logical transformation is performed.
  81
+   * @param utf32StringSrc      is source UTF-32 string to convert
  82
+   * @param utf8StringDst       is output UTF-8 string, empty on any error
  83
+   * @param failOnBadChar       if set to true function will fail on invalid character,
  84
+   *                            otherwise invalid character will be skipped
  85
+   * @return true on successful conversion, false on any error
  86
+   */
  87
+  bool utf32ToUtf8(const std::u32string& utf32StringSrc, std::string& utf8StringDst, bool failOnBadChar = false);
  88
+  /**
  89
+   * Convert UTF-32 string to UTF-8 string.
  90
+   * No RTL visual-logical transformation is performed.
  91
+   * @param utf32StringSrc      is source UTF-32 string to convert
  92
+   * @param failOnBadChar       if set to true function will fail on invalid character,
  93
+   *                            otherwise invalid character will be skipped
  94
+   * @return converted string on successful conversion, empty string on any error
  95
+   */
  96
+  std::string utf32ToUtf8(const std::u32string& utf32StringSrc, bool failOnBadChar = false);
  97
+  /**
  98
+   * Convert UTF-32 string to wchar_t string (wstring).
  99
+   * No RTL visual-logical transformation is performed.
  100
+   * @param utf32StringSrc      is source UTF-32 string to convert
  101
+   * @param wStringDst          is output wchar_t string, empty on any error
  102
+   * @param failOnBadChar       if set to true function will fail on invalid character,
  103
+   *                            otherwise invalid character will be skipped
  104
+   * @return true on successful conversion, false on any error
  105
+   */
  106
+  bool utf32ToW(const std::u32string& utf32StringSrc, std::wstring& wStringDst, bool failOnBadChar = false);
  107
+  /**
  108
+   * Perform logical to visual flip.
  109
+   * @param logicalStringSrc    is source string with logical characters order
  110
+   * @param visualStringDst     is output string with visual characters order, empty on any error
  111
+   * @param forceLTRReadingOrder        force LTR reading order
  112
+   * @return true on success, false otherwise
  113
+   */
  114
+  bool utf32logicalToVisualBiDi(const std::u32string& logicalStringSrc, std::u32string& visualStringDst, bool forceLTRReadingOrder = false);
  115
+  /**
  116
+   * Strictly convert wchar_t string (wstring) to UTF-32 string.
  117
+   * No RTL visual-logical transformation is performed.
  118
+   * @param wStringSrc          is source wchar_t string to convert
  119
+   * @param utf32StringDst      is output UTF-32 string, empty on any error
  120
+   * @param failOnBadChar       if set to true function will fail on invalid character,
  121
+   *                            otherwise invalid character will be skipped
  122
+   * @return true on successful conversion, false on any error
  123
+   */
  124
+  bool wToUtf32(const std::wstring& wStringSrc, std::u32string& utf32StringDst, bool failOnBadChar = false);
  125
+
45 126
   bool utf8ToW(const std::string& utf8StringSrc, std::wstring& wStringDst,
46 127
                 bool bVisualBiDiFlip = true, bool forceLTRReadingOrder = false,
47 128
                 bool failOnBadChar = false, bool* bWasFlipped = NULL);

0 notes on commit c21f135

Please sign in to comment.
Something went wrong with that request. Please try again.