-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstring.c
403 lines (337 loc) · 8.94 KB
/
string.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
/* Copyright (c) 2006-2014 Jonas Fonseca <jonas.fonseca@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include "tig/tig.h"
#include "tig/string.h"
/*
* Strings.
*/
bool
string_isnumber(const char *str)
{
int pos;
for (pos = 0; str[pos]; pos++) {
if (!isdigit(str[pos]))
return FALSE;
}
return pos > 0;
}
bool
iscommit(const char *str)
{
int pos;
for (pos = 0; str[pos]; pos++) {
if (!isxdigit(str[pos]))
return FALSE;
}
return 7 <= pos && pos < SIZEOF_REV;
}
int
suffixcmp(const char *str, int slen, const char *suffix)
{
size_t len = slen >= 0 ? slen : strlen(str);
size_t suffixlen = strlen(suffix);
return suffixlen < len ? strcmp(str + len - suffixlen, suffix) : -1;
}
void
string_ncopy_do(char *dst, size_t dstlen, const char *src, size_t srclen)
{
if (srclen > dstlen - 1)
srclen = dstlen - 1;
strncpy(dst, src, srclen);
dst[srclen] = 0;
}
void
string_copy_rev(char *dst, const char *src)
{
size_t srclen;
if (!*src)
return;
for (srclen = 0; srclen < SIZEOF_REV; srclen++)
if (isspace(src[srclen]))
break;
string_ncopy_do(dst, SIZEOF_REV, src, srclen);
}
void
string_copy_rev_from_commit_line(char *dst, const char *src)
{
string_copy_rev(dst, src + STRING_SIZE("commit "));
}
size_t
string_expanded_length(const char *src, size_t srclen, size_t tabsize, size_t max_size)
{
size_t size, pos;
for (size = pos = 0; pos < srclen && size < max_size; pos++) {
if (src[pos] == '\t') {
size_t expanded = tabsize - (size % tabsize);
size += expanded;
} else {
size++;
}
}
return pos;
}
size_t
string_expand(char *dst, size_t dstlen, const char *src, int tabsize)
{
size_t size, pos;
for (size = pos = 0; size < dstlen - 1 && src[pos]; pos++) {
if (src[pos] == '\t') {
size_t expanded = tabsize - (size % tabsize);
if (expanded + size >= dstlen - 1)
expanded = dstlen - size - 1;
memcpy(dst + size, " ", expanded);
size += expanded;
} else {
dst[size++] = src[pos];
}
}
dst[size] = 0;
return pos;
}
char *
chomp_string(char *name)
{
int namelen;
while (isspace(*name))
name++;
namelen = strlen(name) - 1;
while (namelen > 0 && isspace(name[namelen]))
name[namelen--] = 0;
return name;
}
bool PRINTF_LIKE(4, 5)
string_nformat(char *buf, size_t bufsize, size_t *bufpos, const char *fmt, ...)
{
size_t pos = bufpos ? *bufpos : 0;
int retval;
FORMAT_BUFFER(buf + pos, bufsize - pos, fmt, retval, FALSE);
if (bufpos && retval > 0)
*bufpos = pos + retval;
return pos >= bufsize ? FALSE : TRUE;
}
int
strcmp_null(const char *s1, const char *s2)
{
if (!s1 || !s2) {
return (!!s1) - (!!s2);
}
return strcmp(s1, s2);
}
int
strcmp_numeric(const char *s1, const char *s2)
{
int number = 0;
int num1, num2;
for (; *s1 && *s2 && *s1 == *s2; s1++, s2++) {
int c = *s1;
if (isdigit(c)) {
number = 10 * number + (c - '0');
} else {
number = 0;
}
}
num1 = number * 10 + atoi(s1);
num2 = number * 10 + atoi(s2);
if (num1 != num2)
return num2 - num1;
if (!!*s1 != !!*s2)
return !!*s2 - !!*s1;
return *s1 - *s2;
}
/*
* Unicode / UTF-8 handling
*
* NOTE: Much of the following code for dealing with Unicode is derived from
* ELinks' UTF-8 code developed by Scrool <scroolik@gmail.com>. Origin file is
* src/intl/charset.c from the UTF-8 branch commit elinks-0.11.0-g31f2c28.
*/
int
unicode_width(unsigned long c, int tab_size)
{
if (c >= 0x1100 &&
(c <= 0x115f /* Hangul Jamo */
|| c == 0x2329
|| c == 0x232a
|| (c >= 0x2e80 && c <= 0xa4cf && c != 0x303f)
/* CJK ... Yi */
|| (c >= 0xac00 && c <= 0xd7a3) /* Hangul Syllables */
|| (c >= 0xf900 && c <= 0xfaff) /* CJK Compatibility Ideographs */
|| (c >= 0xfe30 && c <= 0xfe6f) /* CJK Compatibility Forms */
|| (c >= 0xff00 && c <= 0xff60) /* Fullwidth Forms */
|| (c >= 0xffe0 && c <= 0xffe6)
|| (c >= 0x20000 && c <= 0x2fffd)
|| (c >= 0x30000 && c <= 0x3fffd)))
return 2;
if ((c >= 0x0300 && c <= 0x036f) /* combining diacretical marks */
|| (c >= 0x1dc0 && c <= 0x1dff) /* combining diacretical marks supplement */
|| (c >= 0x20d0 && c <= 0x20ff) /* combining diacretical marks for symbols */
|| (c >= 0xfe20 && c <= 0xfe2f)) /* combining half marks */
return 0;
if (c == '\t')
return tab_size;
return 1;
}
/* Number of bytes used for encoding a UTF-8 character indexed by first byte.
* Illegal bytes are set one. */
static const unsigned char utf8_bytes[256] = {
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 5,5,5,5,6,6,1,1,
};
unsigned char
utf8_char_length(const char *string)
{
int c = *(unsigned char *) string;
return utf8_bytes[c];
}
/* Decode UTF-8 multi-byte representation into a Unicode character. */
unsigned long
utf8_to_unicode(const char *string, size_t length)
{
unsigned long unicode;
switch (length) {
case 1:
unicode = string[0];
break;
case 2:
unicode = (string[0] & 0x1f) << 6;
unicode += (string[1] & 0x3f);
break;
case 3:
unicode = (string[0] & 0x0f) << 12;
unicode += ((string[1] & 0x3f) << 6);
unicode += (string[2] & 0x3f);
break;
case 4:
unicode = (string[0] & 0x0f) << 18;
unicode += ((string[1] & 0x3f) << 12);
unicode += ((string[2] & 0x3f) << 6);
unicode += (string[3] & 0x3f);
break;
case 5:
unicode = (string[0] & 0x0f) << 24;
unicode += ((string[1] & 0x3f) << 18);
unicode += ((string[2] & 0x3f) << 12);
unicode += ((string[3] & 0x3f) << 6);
unicode += (string[4] & 0x3f);
break;
case 6:
unicode = (string[0] & 0x01) << 30;
unicode += ((string[1] & 0x3f) << 24);
unicode += ((string[2] & 0x3f) << 18);
unicode += ((string[3] & 0x3f) << 12);
unicode += ((string[4] & 0x3f) << 6);
unicode += (string[5] & 0x3f);
break;
default:
return 0;
}
/* Invalid characters could return the special 0xfffd value but NUL
* should be just as good. */
return unicode > 0xffff ? 0 : unicode;
}
size_t
utf8_char_count(const char *string)
{
size_t count = 0;
while (1) {
unsigned char len = utf8_char_length(string);
while (len-- > 0) {
if (!*string)
return count;
string++;
}
count += 1;
}
}
const char*
utf8_skip(const char *string, size_t skip)
{
while (skip-- > 0) {
unsigned char len = utf8_char_length(string);
assert(len > 0);
while (len-- > 0) {
if (!*string)
return string;
string++;
}
}
return string;
}
/* Calculates how much of string can be shown within the given maximum width
* and sets trimmed parameter to non-zero value if all of string could not be
* shown. If the reserve flag is TRUE, it will reserve at least one
* trailing character, which can be useful when drawing a delimiter.
*
* Returns the number of bytes to output from string to satisfy max_width. */
size_t
utf8_length(const char **start, size_t skip, int *width, size_t max_width, int *trimmed, bool reserve, int tab_size)
{
const char *string = *start;
const char *end = strchr(string, '\0');
unsigned char last_bytes = 0;
size_t last_ucwidth = 0;
*width = 0;
*trimmed = 0;
while (string < end) {
unsigned char bytes = utf8_char_length(string);
size_t ucwidth;
unsigned long unicode;
if (string + bytes > end)
break;
/* Change representation to figure out whether
* it is a single- or double-width character. */
unicode = utf8_to_unicode(string, bytes);
/* FIXME: Graceful handling of invalid Unicode character. */
if (!unicode)
break;
ucwidth = unicode_width(unicode, tab_size);
if (skip > 0) {
skip -= ucwidth <= skip ? ucwidth : skip;
*start += bytes;
}
*width += ucwidth;
if (max_width > 0 && *width > max_width) {
*trimmed = 1;
*width -= ucwidth;
if (reserve && *width == max_width) {
string -= last_bytes;
*width -= last_ucwidth;
}
break;
}
string += bytes;
if (ucwidth) {
last_bytes = bytes;
last_ucwidth = ucwidth;
} else {
last_bytes += bytes;
}
}
return string - *start;
}
int
utf8_width_max(const char *text, int max)
{
int text_width = 0;
const char *tmp = text;
int trimmed = FALSE;
utf8_length(&tmp, 0, &text_width, max, &trimmed, FALSE, 1);
return text_width;
}
/* vim: set ts=8 sw=8 noexpandtab: */