Skip to content

Commit 3b9173d

Browse files
committedApr 5, 2021
fileinfo: Port libmagic 5.40
Signed-off-by: Anatol Belski <ab@php.net>
1 parent 22019a1 commit 3b9173d

21 files changed

+101523
-75463
lines changed
 

‎ext/fileinfo/data_file.c

+95,853-73,223
Large diffs are not rendered by default.

‎ext/fileinfo/libmagic.patch

+284-536
Large diffs are not rendered by default.

‎ext/fileinfo/libmagic/apprentice.c

+16-6
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,10 @@
3434
#include "file.h"
3535

3636
#ifndef lint
37-
FILE_RCSID("@(#)$File: apprentice.c,v 1.297 2020/05/09 18:57:15 christos Exp $")
37+
FILE_RCSID("@(#)$File: apprentice.c,v 1.301 2021/02/23 00:51:11 christos Exp $")
3838
#endif /* lint */
3939

4040
#include "magic.h"
41-
#include "patchlevel.h"
4241
#include <stdlib.h>
4342

4443
#if defined(__hpux) && !defined(HAVE_STRTOULL)
@@ -536,6 +535,7 @@ file_ms_alloc(int flags)
536535
ms->elf_notes_max = FILE_ELF_NOTES_MAX;
537536
ms->regex_max = FILE_REGEX_MAX;
538537
ms->bytes_max = FILE_BYTES_MAX;
538+
ms->encoding_max = FILE_ENCODING_MAX;
539539
return ms;
540540
free:
541541
efree(ms);
@@ -1416,7 +1416,10 @@ apprentice_load(struct magic_set *ms, const char *fn, int action)
14161416
*/
14171417
set_last_default(ms, mset[j].me, mset[j].count);
14181418

1419-
/* coalesce per file arrays into a single one */
1419+
/* coalesce per file arrays into a single one, if needed */
1420+
if (mset[j].count == 0)
1421+
continue;
1422+
14201423
if (coalesce_entries(ms, mset[j].me, mset[j].count,
14211424
&map->magic[j], &map->nmagic[j]) == -1) {
14221425
errs++;
@@ -2086,6 +2089,13 @@ parse(struct magic_set *ms, struct magic_entry *me, const char *line,
20862089
return -1;
20872090
}
20882091

2092+
if (m->type == FILE_NAME && cont_level != 0) {
2093+
if (ms->flags & MAGIC_CHECK)
2094+
file_magwarn(ms, "`name%s' entries can only be "
2095+
"declared at top level", l);
2096+
return -1;
2097+
}
2098+
20892099
/* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
20902100
/* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
20912101

@@ -2699,7 +2709,7 @@ getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
26992709
ull = CAST(uint64_t, strtoull(*p, &ep, 0));
27002710
m->value.q = file_signextend(ms, m, ull);
27012711
if (*p == ep) {
2702-
file_magwarn(ms, "Unparseable number `%s'", *p);
2712+
file_magwarn(ms, "Unparsable number `%s'", *p);
27032713
} else {
27042714
size_t ts = typesize(m->type);
27052715
uint64_t x;
@@ -3101,8 +3111,8 @@ apprentice_map(struct magic_set *ms, const char *fn)
31013111
else
31023112
version = ptr[1];
31033113
if (version != VERSIONNO) {
3104-
file_error(ms, 0, "File %d.%d supports only version %d magic "
3105-
"files. `%s' is version %d", FILE_VERSION_MAJOR, patchlevel,
3114+
file_error(ms, 0, "File %d supports only version %d magic "
3115+
"files. `%s' is version %d", MAGIC_VERSION,
31063116
VERSIONNO, dbname, version);
31073117
goto error;
31083118
}

‎ext/fileinfo/libmagic/ascmagic.c

+42-34
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
#include "file.h"
3636

3737
#ifndef lint
38-
FILE_RCSID("@(#)$File: ascmagic.c,v 1.107 2020/06/08 19:58:36 christos Exp $")
38+
FILE_RCSID("@(#)$File: ascmagic.c,v 1.109 2021/02/05 23:01:40 christos Exp $")
3939
#endif /* lint */
4040

4141
#include "magic.h"
@@ -50,7 +50,8 @@ FILE_RCSID("@(#)$File: ascmagic.c,v 1.107 2020/06/08 19:58:36 christos Exp $")
5050
#define ISSPC(x) ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' \
5151
|| (x) == 0x85 || (x) == '\f')
5252

53-
private unsigned char *encode_utf8(unsigned char *, size_t, unicodechar *, size_t);
53+
private unsigned char *encode_utf8(unsigned char *, size_t, file_unichar_t *,
54+
size_t);
5455
private size_t trim_nuls(const unsigned char *, size_t);
5556

5657
/*
@@ -69,7 +70,7 @@ trim_nuls(const unsigned char *buf, size_t nbytes)
6970
protected int
7071
file_ascmagic(struct magic_set *ms, const struct buffer *b, int text)
7172
{
72-
unicodechar *ubuf = NULL;
73+
file_unichar_t *ubuf = NULL;
7374
size_t ulen = 0;
7475
int rv = 1;
7576
struct buffer bb;
@@ -101,9 +102,9 @@ file_ascmagic(struct magic_set *ms, const struct buffer *b, int text)
101102
}
102103

103104
protected int
104-
file_ascmagic_with_encoding(struct magic_set *ms,
105-
const struct buffer *b, unicodechar *ubuf, size_t ulen, const char *code,
106-
const char *type, int text)
105+
file_ascmagic_with_encoding(struct magic_set *ms, const struct buffer *b,
106+
file_unichar_t *ubuf, size_t ulen, const char *code, const char *type,
107+
int text)
107108
{
108109
struct buffer bb;
109110
const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
@@ -127,7 +128,7 @@ file_ascmagic_with_encoding(struct magic_set *ms,
127128
int executable = 0;
128129

129130
size_t last_line_end = CAST(size_t, -1);
130-
int has_long_lines = 0;
131+
size_t has_long_lines = 0;
131132

132133
nbytes = trim_nuls(buf, nbytes);
133134

@@ -190,8 +191,11 @@ file_ascmagic_with_encoding(struct magic_set *ms,
190191
}
191192

192193
/* If this line is _longer_ than MAXLINELEN, remember it. */
193-
if (i > last_line_end + MAXLINELEN)
194-
has_long_lines = 1;
194+
if (i > last_line_end + MAXLINELEN) {
195+
size_t ll = i - last_line_end;
196+
if (ll > has_long_lines)
197+
has_long_lines = ll;
198+
}
195199

196200
if (ubuf[i] == '\033')
197201
has_escapes = 1;
@@ -269,7 +273,8 @@ file_ascmagic_with_encoding(struct magic_set *ms,
269273
goto done;
270274

271275
if (has_long_lines)
272-
if (file_printf(ms, ", with very long lines") == -1)
276+
if (file_printf(ms, ", with very long lines (%zu)",
277+
has_long_lines) == -1)
273278
goto done;
274279

275280
/*
@@ -281,7 +286,8 @@ file_ascmagic_with_encoding(struct magic_set *ms,
281286
if (file_printf(ms, ", with") == -1)
282287
goto done;
283288

284-
if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) {
289+
if (n_crlf == 0 && n_cr == 0 &&
290+
n_nel == 0 && n_lf == 0) {
285291
if (file_printf(ms, " no") == -1)
286292
goto done;
287293
} else {
@@ -335,7 +341,7 @@ file_ascmagic_with_encoding(struct magic_set *ms,
335341
* after end of string, or NULL if an invalid character is found.
336342
*/
337343
private unsigned char *
338-
encode_utf8(unsigned char *buf, size_t len, unicodechar *ubuf, size_t ulen)
344+
encode_utf8(unsigned char *buf, size_t len, file_unichar_t *ubuf, size_t ulen)
339345
{
340346
size_t i;
341347
unsigned char *end = buf + len;
@@ -345,43 +351,45 @@ encode_utf8(unsigned char *buf, size_t len, unicodechar *ubuf, size_t ulen)
345351
if (end - buf < 1)
346352
return NULL;
347353
*buf++ = CAST(unsigned char, ubuf[i]);
348-
} else if (ubuf[i] <= 0x7ff) {
354+
continue;
355+
}
356+
if (ubuf[i] <= 0x7ff) {
349357
if (end - buf < 2)
350358
return NULL;
351359
*buf++ = CAST(unsigned char, (ubuf[i] >> 6) + 0xc0);
352-
*buf++ = CAST(unsigned char, (ubuf[i] & 0x3f) + 0x80);
353-
} else if (ubuf[i] <= 0xffff) {
360+
goto out1;
361+
}
362+
if (ubuf[i] <= 0xffff) {
354363
if (end - buf < 3)
355364
return NULL;
356365
*buf++ = CAST(unsigned char, (ubuf[i] >> 12) + 0xe0);
357-
*buf++ = CAST(unsigned char, ((ubuf[i] >> 6) & 0x3f) + 0x80);
358-
*buf++ = CAST(unsigned char, (ubuf[i] & 0x3f) + 0x80);
359-
} else if (ubuf[i] <= 0x1fffff) {
366+
goto out2;
367+
}
368+
if (ubuf[i] <= 0x1fffff) {
360369
if (end - buf < 4)
361370
return NULL;
362371
*buf++ = CAST(unsigned char, (ubuf[i] >> 18) + 0xf0);
363-
*buf++ = CAST(unsigned char, ((ubuf[i] >> 12) & 0x3f) + 0x80);
364-
*buf++ = CAST(unsigned char, ((ubuf[i] >> 6) & 0x3f) + 0x80);
365-
*buf++ = CAST(unsigned char, (ubuf[i] & 0x3f) + 0x80);
366-
} else if (ubuf[i] <= 0x3ffffff) {
372+
goto out3;
373+
}
374+
if (ubuf[i] <= 0x3ffffff) {
367375
if (end - buf < 5)
368376
return NULL;
369377
*buf++ = CAST(unsigned char, (ubuf[i] >> 24) + 0xf8);
370-
*buf++ = CAST(unsigned char, ((ubuf[i] >> 18) & 0x3f) + 0x80);
371-
*buf++ = CAST(unsigned char, ((ubuf[i] >> 12) & 0x3f) + 0x80);
372-
*buf++ = CAST(unsigned char, ((ubuf[i] >> 6) & 0x3f) + 0x80);
373-
*buf++ = CAST(unsigned char, (ubuf[i] & 0x3f) + 0x80);
374-
} else if (ubuf[i] <= 0x7fffffff) {
378+
goto out4;
379+
}
380+
if (ubuf[i] <= 0x7fffffff) {
375381
if (end - buf < 6)
376382
return NULL;
377383
*buf++ = CAST(unsigned char, (ubuf[i] >> 30) + 0xfc);
378-
*buf++ = CAST(unsigned char, ((ubuf[i] >> 24) & 0x3f) + 0x80);
379-
*buf++ = CAST(unsigned char, ((ubuf[i] >> 18) & 0x3f) + 0x80);
380-
*buf++ = CAST(unsigned char, ((ubuf[i] >> 12) & 0x3f) + 0x80);
381-
*buf++ = CAST(unsigned char, ((ubuf[i] >> 6) & 0x3f) + 0x80);
382-
*buf++ = CAST(unsigned char, (ubuf[i] & 0x3f) + 0x80);
383-
} else /* Invalid character */
384-
return NULL;
384+
goto out5;
385+
}
386+
/* Invalid character */
387+
return NULL;
388+
out5: *buf++ = CAST(unsigned char, ((ubuf[i] >> 24) & 0x3f) + 0x80);
389+
out4: *buf++ = CAST(unsigned char, ((ubuf[i] >> 18) & 0x3f) + 0x80);
390+
out3: *buf++ = CAST(unsigned char, ((ubuf[i] >> 12) & 0x3f) + 0x80);
391+
out2: *buf++ = CAST(unsigned char, ((ubuf[i] >> 6) & 0x3f) + 0x80);
392+
out1: *buf++ = CAST(unsigned char, ((ubuf[i] >> 0) & 0x3f) + 0x80);
385393
}
386394

387395
return buf;

‎ext/fileinfo/libmagic/compress.c

+23-4
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
#include "file.h"
3636

3737
#ifndef lint
38-
FILE_RCSID("@(#)$File: compress.c,v 1.127 2020/05/31 00:11:06 christos Exp $")
38+
FILE_RCSID("@(#)$File: compress.c,v 1.129 2020/12/08 21:26:00 christos Exp $")
3939
#endif
4040

4141
#include "magic.h"
@@ -72,7 +72,7 @@ typedef void (*sig_t)(int);
7272
#include <bzlib.h>
7373
#endif
7474

75-
#if defined(HAVE_XZLIB_H) && defined(XZLIBSUPPORT)
75+
#if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT)
7676
#define BUILTIN_XZLIB
7777
#include <lzma.h>
7878
#endif
@@ -847,8 +847,23 @@ uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old,
847847
for (i = 0; i < __arraycount(fdp); i++)
848848
fdp[i][0] = fdp[i][1] = -1;
849849

850-
if ((fd == -1 && pipe(fdp[STDIN_FILENO]) == -1) ||
851-
pipe(fdp[STDOUT_FILENO]) == -1 || pipe(fdp[STDERR_FILENO]) == -1) {
850+
/*
851+
* There are multithreaded users who run magic_file()
852+
* from dozens of threads. If two parallel magic_file() calls
853+
* analyze two large compressed files, both will spawn
854+
* an uncompressing child here, which writes out uncompressed data.
855+
* We read some portion, then close the pipe, then waitpid() the child.
856+
* If uncompressed data is larger, child shound get EPIPE and exit.
857+
* However, with *parallel* calls OTHER child may unintentionally
858+
* inherit pipe fds, thus keeping pipe open and making writes in
859+
* our child block instead of failing with EPIPE!
860+
* (For the bug to occur, two threads must mutually inherit their pipes,
861+
* and both must have large outputs. Thus it happens not that often).
862+
* To avoid this, be sure to create pipes with O_CLOEXEC.
863+
*/
864+
if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) ||
865+
file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 ||
866+
file_pipe_closexec(fdp[STDERR_FILENO]) == -1) {
852867
closep(fdp[STDIN_FILENO]);
853868
closep(fdp[STDOUT_FILENO]);
854869
return makeerror(newch, n, "Cannot create pipe, %s",
@@ -879,16 +894,20 @@ uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old,
879894
if (fdp[STDIN_FILENO][1] > 2)
880895
(void) close(fdp[STDIN_FILENO][1]);
881896
}
897+
file_clear_closexec(STDIN_FILENO);
898+
882899
///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
883900
if (copydesc(STDOUT_FILENO, fdp[STDOUT_FILENO][1]))
884901
(void) close(fdp[STDOUT_FILENO][1]);
885902
if (fdp[STDOUT_FILENO][0] > 2)
886903
(void) close(fdp[STDOUT_FILENO][0]);
904+
file_clear_closexec(STDOUT_FILENO);
887905

888906
if (copydesc(STDERR_FILENO, fdp[STDERR_FILENO][1]))
889907
(void) close(fdp[STDERR_FILENO][1]);
890908
if (fdp[STDERR_FILENO][0] > 2)
891909
(void) close(fdp[STDERR_FILENO][0]);
910+
file_clear_closexec(STDERR_FILENO);
892911

893912
(void)execvp(compr[method].argv[0],
894913
RCAST(char *const *, RCAST(intptr_t, compr[method].argv)));

‎ext/fileinfo/libmagic/config.h

-1
This file was deleted.

‎ext/fileinfo/libmagic/der.c

+1-2
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
#include "file.h"
3636

3737
#ifndef lint
38-
FILE_RCSID("@(#)$File: der.c,v 1.20 2020/06/07 19:10:37 christos Exp $")
38+
FILE_RCSID("@(#)$File: der.c,v 1.21 2020/06/15 00:58:10 christos Exp $")
3939
#endif
4040
#else
4141
#define SIZE_T_FORMAT "z"
@@ -249,7 +249,6 @@ der_data(char *buf, size_t blen, uint32_t tag, const void *q, uint32_t len)
249249
return snprintf(buf, blen,
250250
"20%c%c-%c%c-%c%c %c%c:%c%c:%c%c GMT", d[0], d[1], d[2],
251251
d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11]);
252-
break;
253252
default:
254253
break;
255254
}

0 commit comments

Comments
 (0)
Failed to load comments.