Skip to content

Commit b6004a0

Browse files
TimWollabukka
authored andcommitted
Fix GHSA-p3x9-6h7p-cgfc: libxml streams wrong content-type on redirect
libxml streams use wrong content-type header when requesting a redirected resource.
1 parent 41d49ab commit b6004a0

File tree

4 files changed

+224
-33
lines changed

4 files changed

+224
-33
lines changed
+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
--TEST--
2+
GHSA-p3x9-6h7p-cgfc: libxml streams use wrong `content-type` header when requesting a redirected resource (Basic)
3+
--EXTENSIONS--
4+
dom
5+
--SKIPIF--
6+
<?php
7+
if (@!include "./ext/standard/tests/http/server.inc") die('skip server.inc not available');
8+
http_server_skipif();
9+
?>
10+
--FILE--
11+
<?php
12+
require "./ext/standard/tests/http/server.inc";
13+
14+
function genResponses($server) {
15+
$uri = 'http://' . stream_socket_get_name($server, false);
16+
yield "data://text/plain,HTTP/1.1 302 Moved Temporarily\r\nLocation: $uri/document.xml\r\nContent-Type: text/html;charset=utf-16\r\n\r\n";
17+
$xml = <<<'EOT'
18+
<!doctype html>
19+
<html>
20+
<head>
21+
<title>GHSA-p3x9-6h7p-cgfc</title>
22+
23+
<meta charset="utf-8" />
24+
<meta http-equiv="Content-type" content="text/html; charset=utf-8" />
25+
</head>
26+
27+
<body>
28+
<h1>GHSA-p3x9-6h7p-cgfc</h1>
29+
</body>
30+
</html>
31+
EOT;
32+
// Intentionally using non-standard casing for content-type to verify it is matched not case sensitively.
33+
yield "data://text/plain,HTTP/1.1 200 OK\r\nconteNt-tyPe: text/html; charset=utf-8\r\n\r\n{$xml}";
34+
}
35+
36+
['pid' => $pid, 'uri' => $uri] = http_server('genResponses', $output);
37+
$document = new \DOMDocument();
38+
$document->loadHTMLFile($uri);
39+
40+
$h1 = $document->getElementsByTagName('h1');
41+
var_dump($h1->length);
42+
var_dump($document->saveHTML());
43+
http_server_kill($pid);
44+
?>
45+
--EXPECT--
46+
int(1)
47+
string(266) "<!DOCTYPE html>
48+
<html>
49+
<head>
50+
<title>GHSA-p3x9-6h7p-cgfc</title>
51+
52+
<meta charset="utf-8">
53+
<meta http-equiv="Content-type" content="text/html; charset=utf-8">
54+
</head>
55+
56+
<body>
57+
<h1>GHSA-p3x9-6h7p-cgfc</h1>
58+
</body>
59+
</html>
60+
"
+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
--TEST--
2+
GHSA-p3x9-6h7p-cgfc: libxml streams use wrong `content-type` header when requesting a redirected resource (Missing content-type)
3+
--EXTENSIONS--
4+
dom
5+
--SKIPIF--
6+
<?php
7+
if (@!include "./ext/standard/tests/http/server.inc") die('skip server.inc not available');
8+
http_server_skipif();
9+
?>
10+
--FILE--
11+
<?php
12+
require "./ext/standard/tests/http/server.inc";
13+
14+
function genResponses($server) {
15+
$uri = 'http://' . stream_socket_get_name($server, false);
16+
yield "data://text/plain,HTTP/1.1 302 Moved Temporarily\r\nLocation: $uri/document.xml\r\nContent-Type: text/html;charset=utf-16\r\n\r\n";
17+
$xml = <<<'EOT'
18+
<!doctype html>
19+
<html>
20+
<head>
21+
<title>GHSA-p3x9-6h7p-cgfc</title>
22+
23+
<meta charset="utf-8" />
24+
<meta http-equiv="Content-type" content="text/html; charset=utf-8" />
25+
</head>
26+
27+
<body>
28+
<h1>GHSA-p3x9-6h7p-cgfc</h1>
29+
</body>
30+
</html>
31+
EOT;
32+
// Missing content-type in actual response.
33+
yield "data://text/plain,HTTP/1.1 200 OK\r\n\r\n{$xml}";
34+
}
35+
36+
['pid' => $pid, 'uri' => $uri] = http_server('genResponses', $output);
37+
$document = new \DOMDocument();
38+
$document->loadHTMLFile($uri);
39+
40+
$h1 = $document->getElementsByTagName('h1');
41+
var_dump($h1->length);
42+
var_dump($document->saveHTML());
43+
http_server_kill($pid);
44+
?>
45+
--EXPECT--
46+
int(1)
47+
string(266) "<!DOCTYPE html>
48+
<html>
49+
<head>
50+
<title>GHSA-p3x9-6h7p-cgfc</title>
51+
52+
<meta charset="utf-8">
53+
<meta http-equiv="Content-type" content="text/html; charset=utf-8">
54+
</head>
55+
56+
<body>
57+
<h1>GHSA-p3x9-6h7p-cgfc</h1>
58+
</body>
59+
</html>
60+
"
+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
--TEST--
2+
GHSA-p3x9-6h7p-cgfc: libxml streams use wrong `content-type` header when requesting a redirected resource (Reason with colon)
3+
--EXTENSIONS--
4+
dom
5+
--SKIPIF--
6+
<?php
7+
if (@!include "./ext/standard/tests/http/server.inc") die('skip server.inc not available');
8+
http_server_skipif();
9+
?>
10+
--FILE--
11+
<?php
12+
require "./ext/standard/tests/http/server.inc";
13+
14+
function genResponses($server) {
15+
$uri = 'http://' . stream_socket_get_name($server, false);
16+
yield "data://text/plain,HTTP/1.1 302 Moved Temporarily\r\nLocation: $uri/document.xml\r\nContent-Type: text/html;charset=utf-16\r\n\r\n";
17+
$xml = <<<'EOT'
18+
<!doctype html>
19+
<html>
20+
<head>
21+
<title>GHSA-p3x9-6h7p-cgfc</title>
22+
23+
<meta charset="utf-8" />
24+
<meta http-equiv="Content-type" content="text/html; charset=utf-8" />
25+
</head>
26+
27+
<body>
28+
<h1>GHSA-p3x9-6h7p-cgfc</h1>
29+
</body>
30+
</html>
31+
EOT;
32+
// Missing content-type in actual response.
33+
yield "data://text/plain,HTTP/1.1 200 OK: This is fine\r\n\r\n{$xml}";
34+
}
35+
36+
['pid' => $pid, 'uri' => $uri] = http_server('genResponses', $output);
37+
$document = new \DOMDocument();
38+
$document->loadHTMLFile($uri);
39+
40+
$h1 = $document->getElementsByTagName('h1');
41+
var_dump($h1->length);
42+
var_dump($document->saveHTML());
43+
http_server_kill($pid);
44+
?>
45+
--EXPECT--
46+
int(1)
47+
string(266) "<!DOCTYPE html>
48+
<html>
49+
<head>
50+
<title>GHSA-p3x9-6h7p-cgfc</title>
51+
52+
<meta charset="utf-8">
53+
<meta http-equiv="Content-type" content="text/html; charset=utf-8">
54+
</head>
55+
56+
<body>
57+
<h1>GHSA-p3x9-6h7p-cgfc</h1>
58+
</body>
59+
</html>
60+
"

ext/libxml/libxml.c

+44-33
Original file line numberDiff line numberDiff line change
@@ -374,42 +374,53 @@ php_libxml_input_buffer_create_filename(const char *URI, xmlCharEncoding enc)
374374
if (Z_TYPE(s->wrapperdata) == IS_ARRAY) {
375375
zval *header;
376376

377-
ZEND_HASH_FOREACH_VAL_IND(Z_ARRVAL(s->wrapperdata), header) {
377+
/* Scan backwards: The header array might contain the headers for multiple responses, if
378+
* a redirect was followed.
379+
*/
380+
ZEND_HASH_REVERSE_FOREACH_VAL_IND(Z_ARRVAL(s->wrapperdata), header) {
378381
const char buf[] = "Content-Type:";
379-
if (Z_TYPE_P(header) == IS_STRING &&
380-
!zend_binary_strncasecmp(Z_STRVAL_P(header), Z_STRLEN_P(header), buf, sizeof(buf)-1, sizeof(buf)-1)) {
381-
char *needle = estrdup("charset=");
382-
char *haystack = estrndup(Z_STRVAL_P(header), Z_STRLEN_P(header));
383-
char *encoding = php_stristr(haystack, needle, Z_STRLEN_P(header), sizeof("charset=")-1);
384-
385-
if (encoding) {
386-
char *end;
387-
388-
encoding += sizeof("charset=")-1;
389-
if (*encoding == '"') {
390-
encoding++;
391-
}
392-
end = strchr(encoding, ';');
393-
if (end == NULL) {
394-
end = encoding + strlen(encoding);
395-
}
396-
end--; /* end == encoding-1 isn't a buffer underrun */
397-
while (*end == ' ' || *end == '\t') {
398-
end--;
399-
}
400-
if (*end == '"') {
401-
end--;
402-
}
403-
if (encoding >= end) continue;
404-
*(end+1) = '\0';
405-
enc = xmlParseCharEncoding(encoding);
406-
if (enc <= XML_CHAR_ENCODING_NONE) {
407-
enc = XML_CHAR_ENCODING_NONE;
382+
if (Z_TYPE_P(header) == IS_STRING) {
383+
/* If no colon is found in the header, we assume it's the HTTP status line and bail out. */
384+
char *colon = memchr(Z_STRVAL_P(header), ':', Z_STRLEN_P(header));
385+
char *space = memchr(Z_STRVAL_P(header), ' ', Z_STRLEN_P(header));
386+
if (colon == NULL || space < colon) {
387+
break;
388+
}
389+
390+
if (!zend_binary_strncasecmp(Z_STRVAL_P(header), Z_STRLEN_P(header), buf, sizeof(buf)-1, sizeof(buf)-1)) {
391+
char *needle = estrdup("charset=");
392+
char *haystack = estrndup(Z_STRVAL_P(header), Z_STRLEN_P(header));
393+
char *encoding = php_stristr(haystack, needle, Z_STRLEN_P(header), sizeof("charset=")-1);
394+
395+
if (encoding) {
396+
char *end;
397+
398+
encoding += sizeof("charset=")-1;
399+
if (*encoding == '"') {
400+
encoding++;
401+
}
402+
end = strchr(encoding, ';');
403+
if (end == NULL) {
404+
end = encoding + strlen(encoding);
405+
}
406+
end--; /* end == encoding-1 isn't a buffer underrun */
407+
while (*end == ' ' || *end == '\t') {
408+
end--;
409+
}
410+
if (*end == '"') {
411+
end--;
412+
}
413+
if (encoding >= end) continue;
414+
*(end+1) = '\0';
415+
enc = xmlParseCharEncoding(encoding);
416+
if (enc <= XML_CHAR_ENCODING_NONE) {
417+
enc = XML_CHAR_ENCODING_NONE;
418+
}
408419
}
420+
efree(haystack);
421+
efree(needle);
422+
break; /* found content-type */
409423
}
410-
efree(haystack);
411-
efree(needle);
412-
break; /* found content-type */
413424
}
414425
} ZEND_HASH_FOREACH_END();
415426
}

0 commit comments

Comments
 (0)