35
35
#include "file.h"
36
36
37
37
#ifndef lint
38
- FILE_RCSID ("@(#)$File: ascmagic.c,v 1.107 2020/06/08 19:58:36 christos Exp $" )
38
+ FILE_RCSID ("@(#)$File: ascmagic.c,v 1.109 2021/02/05 23:01:40 christos Exp $" )
39
39
#endif /* lint */
40
40
41
41
#include "magic.h"
@@ -50,7 +50,8 @@ FILE_RCSID("@(#)$File: ascmagic.c,v 1.107 2020/06/08 19:58:36 christos Exp $")
50
50
#define ISSPC (x ) ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' \
51
51
|| (x) == 0x85 || (x) == '\f')
52
52
53
- private unsigned char * encode_utf8 (unsigned char * , size_t , unicodechar * , size_t );
53
+ private unsigned char * encode_utf8 (unsigned char * , size_t , file_unichar_t * ,
54
+ size_t );
54
55
private size_t trim_nuls (const unsigned char * , size_t );
55
56
56
57
/*
@@ -69,7 +70,7 @@ trim_nuls(const unsigned char *buf, size_t nbytes)
69
70
protected int
70
71
file_ascmagic (struct magic_set * ms , const struct buffer * b , int text )
71
72
{
72
- unicodechar * ubuf = NULL ;
73
+ file_unichar_t * ubuf = NULL ;
73
74
size_t ulen = 0 ;
74
75
int rv = 1 ;
75
76
struct buffer bb ;
@@ -101,9 +102,9 @@ file_ascmagic(struct magic_set *ms, const struct buffer *b, int text)
101
102
}
102
103
103
104
protected int
104
- file_ascmagic_with_encoding (struct magic_set * ms ,
105
- const struct buffer * b , unicodechar * ubuf , size_t ulen , const char * code ,
106
- const char * type , int text )
105
+ file_ascmagic_with_encoding (struct magic_set * ms , const struct buffer * b ,
106
+ file_unichar_t * ubuf , size_t ulen , const char * code , const char * type ,
107
+ int text )
107
108
{
108
109
struct buffer bb ;
109
110
const unsigned char * buf = CAST (const unsigned char * , b -> fbuf );
@@ -127,7 +128,7 @@ file_ascmagic_with_encoding(struct magic_set *ms,
127
128
int executable = 0 ;
128
129
129
130
size_t last_line_end = CAST (size_t , -1 );
130
- int has_long_lines = 0 ;
131
+ size_t has_long_lines = 0 ;
131
132
132
133
nbytes = trim_nuls (buf , nbytes );
133
134
@@ -190,8 +191,11 @@ file_ascmagic_with_encoding(struct magic_set *ms,
190
191
}
191
192
192
193
/* If this line is _longer_ than MAXLINELEN, remember it. */
193
- if (i > last_line_end + MAXLINELEN )
194
- has_long_lines = 1 ;
194
+ if (i > last_line_end + MAXLINELEN ) {
195
+ size_t ll = i - last_line_end ;
196
+ if (ll > has_long_lines )
197
+ has_long_lines = ll ;
198
+ }
195
199
196
200
if (ubuf [i ] == '\033' )
197
201
has_escapes = 1 ;
@@ -269,7 +273,8 @@ file_ascmagic_with_encoding(struct magic_set *ms,
269
273
goto done ;
270
274
271
275
if (has_long_lines )
272
- if (file_printf (ms , ", with very long lines" ) == -1 )
276
+ if (file_printf (ms , ", with very long lines (%zu)" ,
277
+ has_long_lines ) == -1 )
273
278
goto done ;
274
279
275
280
/*
@@ -281,7 +286,8 @@ file_ascmagic_with_encoding(struct magic_set *ms,
281
286
if (file_printf (ms , ", with" ) == -1 )
282
287
goto done ;
283
288
284
- if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0 ) {
289
+ if (n_crlf == 0 && n_cr == 0 &&
290
+ n_nel == 0 && n_lf == 0 ) {
285
291
if (file_printf (ms , " no" ) == -1 )
286
292
goto done ;
287
293
} else {
@@ -335,7 +341,7 @@ file_ascmagic_with_encoding(struct magic_set *ms,
335
341
* after end of string, or NULL if an invalid character is found.
336
342
*/
337
343
private unsigned char *
338
- encode_utf8 (unsigned char * buf , size_t len , unicodechar * ubuf , size_t ulen )
344
+ encode_utf8 (unsigned char * buf , size_t len , file_unichar_t * ubuf , size_t ulen )
339
345
{
340
346
size_t i ;
341
347
unsigned char * end = buf + len ;
@@ -345,43 +351,45 @@ encode_utf8(unsigned char *buf, size_t len, unicodechar *ubuf, size_t ulen)
345
351
if (end - buf < 1 )
346
352
return NULL ;
347
353
* buf ++ = CAST (unsigned char , ubuf [i ]);
348
- } else if (ubuf [i ] <= 0x7ff ) {
354
+ continue ;
355
+ }
356
+ if (ubuf [i ] <= 0x7ff ) {
349
357
if (end - buf < 2 )
350
358
return NULL ;
351
359
* buf ++ = CAST (unsigned char , (ubuf [i ] >> 6 ) + 0xc0 );
352
- * buf ++ = CAST (unsigned char , (ubuf [i ] & 0x3f ) + 0x80 );
353
- } else if (ubuf [i ] <= 0xffff ) {
360
+ goto out1 ;
361
+ }
362
+ if (ubuf [i ] <= 0xffff ) {
354
363
if (end - buf < 3 )
355
364
return NULL ;
356
365
* buf ++ = CAST (unsigned char , (ubuf [i ] >> 12 ) + 0xe0 );
357
- * buf ++ = CAST ( unsigned char , (( ubuf [ i ] >> 6 ) & 0x3f ) + 0x80 ) ;
358
- * buf ++ = CAST ( unsigned char , ( ubuf [ i ] & 0x3f ) + 0x80 );
359
- } else if (ubuf [i ] <= 0x1fffff ) {
366
+ goto out2 ;
367
+ }
368
+ if (ubuf [i ] <= 0x1fffff ) {
360
369
if (end - buf < 4 )
361
370
return NULL ;
362
371
* buf ++ = CAST (unsigned char , (ubuf [i ] >> 18 ) + 0xf0 );
363
- * buf ++ = CAST (unsigned char , ((ubuf [i ] >> 12 ) & 0x3f ) + 0x80 );
364
- * buf ++ = CAST (unsigned char , ((ubuf [i ] >> 6 ) & 0x3f ) + 0x80 );
365
- * buf ++ = CAST (unsigned char , (ubuf [i ] & 0x3f ) + 0x80 );
366
- } else if (ubuf [i ] <= 0x3ffffff ) {
372
+ goto out3 ;
373
+ }
374
+ if (ubuf [i ] <= 0x3ffffff ) {
367
375
if (end - buf < 5 )
368
376
return NULL ;
369
377
* buf ++ = CAST (unsigned char , (ubuf [i ] >> 24 ) + 0xf8 );
370
- * buf ++ = CAST (unsigned char , ((ubuf [i ] >> 18 ) & 0x3f ) + 0x80 );
371
- * buf ++ = CAST (unsigned char , ((ubuf [i ] >> 12 ) & 0x3f ) + 0x80 );
372
- * buf ++ = CAST (unsigned char , ((ubuf [i ] >> 6 ) & 0x3f ) + 0x80 );
373
- * buf ++ = CAST (unsigned char , (ubuf [i ] & 0x3f ) + 0x80 );
374
- } else if (ubuf [i ] <= 0x7fffffff ) {
378
+ goto out4 ;
379
+ }
380
+ if (ubuf [i ] <= 0x7fffffff ) {
375
381
if (end - buf < 6 )
376
382
return NULL ;
377
383
* buf ++ = CAST (unsigned char , (ubuf [i ] >> 30 ) + 0xfc );
378
- * buf ++ = CAST (unsigned char , ((ubuf [i ] >> 24 ) & 0x3f ) + 0x80 );
379
- * buf ++ = CAST (unsigned char , ((ubuf [i ] >> 18 ) & 0x3f ) + 0x80 );
380
- * buf ++ = CAST (unsigned char , ((ubuf [i ] >> 12 ) & 0x3f ) + 0x80 );
381
- * buf ++ = CAST (unsigned char , ((ubuf [i ] >> 6 ) & 0x3f ) + 0x80 );
382
- * buf ++ = CAST (unsigned char , (ubuf [i ] & 0x3f ) + 0x80 );
383
- } else /* Invalid character */
384
- return NULL ;
384
+ goto out5 ;
385
+ }
386
+ /* Invalid character */
387
+ return NULL ;
388
+ out5 : * buf ++ = CAST (unsigned char , ((ubuf [i ] >> 24 ) & 0x3f ) + 0x80 );
389
+ out4 : * buf ++ = CAST (unsigned char , ((ubuf [i ] >> 18 ) & 0x3f ) + 0x80 );
390
+ out3 : * buf ++ = CAST (unsigned char , ((ubuf [i ] >> 12 ) & 0x3f ) + 0x80 );
391
+ out2 : * buf ++ = CAST (unsigned char , ((ubuf [i ] >> 6 ) & 0x3f ) + 0x80 );
392
+ out1 : * buf ++ = CAST (unsigned char , ((ubuf [i ] >> 0 ) & 0x3f ) + 0x80 );
385
393
}
386
394
387
395
return buf ;
0 commit comments