-
-
Notifications
You must be signed in to change notification settings - Fork 6.2k
/
StringUtils.h
303 lines (268 loc) · 12.7 KB
/
StringUtils.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
#pragma once
/*
* Copyright (C) 2005-2015 Team Kodi
* http://kodi.tv
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with XBMC; see the file COPYING. If not, see
* <http://www.gnu.org/licenses/>.
*
*/
//-----------------------------------------------------------------------
//
// File: StringUtils.h
//
// Purpose: ATL split string utility
// Author: Paul J. Weiss
//
// Modified to support J O'Leary's std::string class by kraqh3d
//
//------------------------------------------------------------------------
#include <stdarg.h>
#include <stdint.h>
#include <string>
#include <vector>
#include <sstream>
#include <locale>
#include <fmt/format.h>
#if FMT_VERSION >= 40000
#include <fmt/printf.h>
#endif
#include "LangInfo.h"
#include "XBDateTime.h"
#include "utils/params_check_macros.h"
class StringUtils
{
public:
/*! \brief Get a formatted string similar to sprintf
Beware that this does not support directly passing in
std::string objects. You need to call c_str() to pass
the const char* buffer representing the value of the
std::string object.
\param fmt Format of the resulting string
\param ... variable number of value type arguments
\return Formatted string
*/
template<typename... Args>
static std::string Format(const std::string& fmt, Args&&... args)
{
// coverity[fun_call_w_exception : FALSE]
auto result = fmt::format(fmt, std::forward<Args>(args)...);
if (result == fmt)
result = fmt::sprintf(fmt, std::forward<Args>(args)...);
return result;
}
template<typename... Args>
static std::wstring Format(const std::wstring& fmt, Args&&... args)
{
// coverity[fun_call_w_exception : FALSE]
auto result = fmt::format(fmt, std::forward<Args>(args)...);
if (result == fmt)
result = fmt::sprintf(fmt, std::forward<Args>(args)...);
return result;
}
static std::string FormatV(PRINTF_FORMAT_STRING const char *fmt, va_list args);
static std::wstring FormatV(PRINTF_FORMAT_STRING const wchar_t *fmt, va_list args);
static void ToUpper(std::string &str);
static void ToUpper(std::wstring &str);
static void ToLower(std::string &str);
static void ToLower(std::wstring &str);
static void ToCapitalize(std::string &str);
static void ToCapitalize(std::wstring &str);
static bool EqualsNoCase(const std::string &str1, const std::string &str2);
static bool EqualsNoCase(const std::string &str1, const char *s2);
static bool EqualsNoCase(const char *s1, const char *s2);
static int CompareNoCase(const std::string &str1, const std::string &str2);
static int CompareNoCase(const char *s1, const char *s2);
static int ReturnDigits(const std::string &str);
static std::string Left(const std::string &str, size_t count);
static std::string Mid(const std::string &str, size_t first, size_t count = std::string::npos);
static std::string Right(const std::string &str, size_t count);
static std::string& Trim(std::string &str);
static std::string& Trim(std::string &str, const char* const chars);
static std::string& TrimLeft(std::string &str);
static std::string& TrimLeft(std::string &str, const char* const chars);
static std::string& TrimRight(std::string &str);
static std::string& TrimRight(std::string &str, const char* const chars);
static std::string& RemoveDuplicatedSpacesAndTabs(std::string& str);
static int Replace(std::string &str, char oldChar, char newChar);
static int Replace(std::string &str, const std::string &oldStr, const std::string &newStr);
static int Replace(std::wstring &str, const std::wstring &oldStr, const std::wstring &newStr);
static bool StartsWith(const std::string &str1, const std::string &str2);
static bool StartsWith(const std::string &str1, const char *s2);
static bool StartsWith(const char *s1, const char *s2);
static bool StartsWithNoCase(const std::string &str1, const std::string &str2);
static bool StartsWithNoCase(const std::string &str1, const char *s2);
static bool StartsWithNoCase(const char *s1, const char *s2);
static bool EndsWith(const std::string &str1, const std::string &str2);
static bool EndsWith(const std::string &str1, const char *s2);
static bool EndsWithNoCase(const std::string &str1, const std::string &str2);
static bool EndsWithNoCase(const std::string &str1, const char *s2);
template<typename CONTAINER>
static std::string Join(const CONTAINER &strings, const std::string& delimiter)
{
std::string result;
for (const auto& str : strings)
result += str + delimiter;
if (!result.empty())
result.erase(result.size() - delimiter.size());
return result;
}
/*! \brief Splits the given input string using the given delimiter into separate strings.
If the given input string is empty the result will be an empty array (not
an array containing an empty string).
\param input Input string to be split
\param delimiter Delimiter to be used to split the input string
\param iMaxStrings (optional) Maximum number of splitted strings
*/
static std::vector<std::string> Split(const std::string& input, const std::string& delimiter, unsigned int iMaxStrings = 0);
static std::vector<std::string> Split(const std::string& input, const char delimiter, size_t iMaxStrings = 0);
static std::vector<std::string> Split(const std::string& input, const std::vector<std::string> &delimiters);
/*! \brief Splits the given input strings using the given delimiters into further separate strings.
If the given input string vector is empty the result will be an empty array (not
an array containing an empty string).
Delimiter strings are applied in order, so once the (optional) maximum number of
items is produced no other delimiters are applied. This produces different results
to applying all delimiters at once e.g. "a/b#c/d" becomes "a", "b#c", "d" rather
than "a", "b", "c/d"
\param input Input vector of strings each to be split
\param delimiters Delimiter strings to be used to split the input strings
\param iMaxStrings (optional) Maximum number of resulting split strings
*/
static std::vector<std::string> SplitMulti(const std::vector<std::string> &input, const std::vector<std::string> &delimiters, unsigned int iMaxStrings = 0);
static int FindNumber(const std::string& strInput, const std::string &strFind);
static int64_t AlphaNumericCompare(const wchar_t *left, const wchar_t *right);
static long TimeStringToSeconds(const std::string &timeString);
static void RemoveCRLF(std::string& strLine);
/*! \brief utf8 version of strlen - skips any non-starting bytes in the count, thus returning the number of utf8 characters
\param s c-string to find the length of.
\return the number of utf8 characters in the string.
*/
static size_t utf8_strlen(const char *s);
/*! \brief convert a time in seconds to a string based on the given time format
\param seconds time in seconds
\param format the format we want the time in.
\return the formatted time
\sa TIME_FORMAT
*/
static std::string SecondsToTimeString(long seconds, TIME_FORMAT format = TIME_FORMAT_GUESS);
/*! \brief check whether a string is a natural number.
Matches [ \t]*[0-9]+[ \t]*
\param str the string to check
\return true if the string is a natural number, false otherwise.
*/
static bool IsNaturalNumber(const std::string& str);
/*! \brief check whether a string is an integer.
Matches [ \t]*[\-]*[0-9]+[ \t]*
\param str the string to check
\return true if the string is an integer, false otherwise.
*/
static bool IsInteger(const std::string& str);
/* The next several isasciiXX and asciiXXvalue functions are locale independent (US-ASCII only),
* as opposed to standard ::isXX (::isalpha, ::isdigit...) which are locale dependent.
* Next functions get parameter as char and don't need double cast ((int)(unsigned char) is required for standard functions). */
inline static bool isasciidigit(char chr) // locale independent
{
return chr >= '0' && chr <= '9';
}
inline static bool isasciixdigit(char chr) // locale independent
{
return (chr >= '0' && chr <= '9') || (chr >= 'a' && chr <= 'f') || (chr >= 'A' && chr <= 'F');
}
static int asciidigitvalue(char chr); // locale independent
static int asciixdigitvalue(char chr); // locale independent
inline static bool isasciiuppercaseletter(char chr) // locale independent
{
return (chr >= 'A' && chr <= 'Z');
}
inline static bool isasciilowercaseletter(char chr) // locale independent
{
return (chr >= 'a' && chr <= 'z');
}
inline static bool isasciialphanum(char chr) // locale independent
{
return isasciiuppercaseletter(chr) || isasciilowercaseletter(chr) || isasciidigit(chr);
}
static std::string SizeToString(int64_t size);
static const std::string Empty;
static size_t FindWords(const char *str, const char *wordLowerCase);
static int FindEndBracket(const std::string &str, char opener, char closer, int startPos = 0);
static int DateStringToYYYYMMDD(const std::string &dateString);
static void WordToDigits(std::string &word);
static std::string CreateUUID();
static bool ValidateUUID(const std::string &uuid); // NB only validates syntax
static double CompareFuzzy(const std::string &left, const std::string &right);
static int FindBestMatch(const std::string &str, const std::vector<std::string> &strings, double &matchscore);
static bool ContainsKeyword(const std::string &str, const std::vector<std::string> &keywords);
/*! \brief Convert the string of binary chars to the actual string.
Convert the string representation of binary chars to the actual string.
For example \1\2\3 is converted to a string with binary char \1, \2 and \3
\param param String to convert
\return Converted string
*/
static std::string BinaryStringToString(const std::string& in);
/*! \brief Format the string with locale separators.
Format the string with locale separators.
For example 10000.57 in en-us is '10,000.57' but in italian is '10.000,57'
\param param String to format
\return Formatted string
*/
template<typename T>
static std::string FormatNumber(T num)
{
std::stringstream ss;
// ifdef is needed because when you set _ITERATOR_DEBUG_LEVEL=0 and you use custom numpunct you will get runtime error in debug mode
// for more info https://connect.microsoft.com/VisualStudio/feedback/details/2655363
#if !(defined(_DEBUG) && defined(TARGET_WINDOWS))
ss.imbue(g_langInfo.GetOriginalLocale());
#endif
ss.precision(1);
ss << std::fixed << num;
return ss.str();
}
/*! \brief Escapes the given string to be able to be used as a parameter.
Escapes backslashes and double-quotes with an additional backslash and
adds double-quotes around the whole string.
\param param String to escape/paramify
\return Escaped/Paramified string
*/
static std::string Paramify(const std::string ¶m);
/*! \brief Split a string by the specified delimiters.
Splits a string using one or more delimiting characters, ignoring empty tokens.
Differs from Split() in two ways:
1. The delimiters are treated as individual characters, rather than a single delimiting string.
2. Empty tokens are ignored.
\return a vector of tokens
*/
static std::vector<std::string> Tokenize(const std::string& input, const std::string& delimiters);
static void Tokenize(const std::string& input, std::vector<std::string>& tokens, const std::string& delimiters);
static std::vector<std::string> Tokenize(const std::string& input, const char delimiter);
static void Tokenize(const std::string& input, std::vector<std::string>& tokens, const char delimiter);
static uint64_t ToUint64(std::string str, uint64_t fallback) noexcept;
/*!
* Returns bytes in a human readable format using the smallest unit that will fit `bytes` in at
* most three digits. The number of decimals are adjusted with significance such that 'small'
* numbers will have more decimals than larger ones.
*
* For example: 1024 bytes will be formatted as "1.00kB", 10240 bytes as "10.0kB" and
* 102400 bytes as "100kB". See TestStringUtils for more examples.
*/
static std::string FormatFileSize(uint64_t bytes);
};
struct sortstringbyname
{
bool operator()(const std::string& strItem1, const std::string& strItem2)
{
return StringUtils::CompareNoCase(strItem1, strItem2) < 0;
}
};