@@ -3,7 +3,7 @@
* Zend Framework (http://framework.zend.com/)
*
* @link http://github.com/zendframework/zf2 for the canonical source repository
- * @copyright Copyright (c) 2005-2014 Zend Technologies USA Inc. (http://www.zend.com)
+ * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
namespace ZendXml ;
@@ -19,12 +19,14 @@ class Security
* Heuristic scan to detect entity in XML
*
* @param string $xml
- * @throws Exception\RuntimeException
+ * @throws Exception\RuntimeException If entity expansion or external entity declaration was discovered.
*/
protected static function heuristicScan ($xml )
{
- if (strpos ($xml , ' <!ENTITY' ) ! == false ) {
- throw new Exception\ RuntimeException (self :: ENTITY_DETECT );
+ foreach (self :: getEntityComparison($xml ) as $compare ) {
+ if (strpos ($xml , $compare ) ! == false ) {
+ throw new Exception\ RuntimeException (self :: ENTITY_DETECT );
+ }
}
}
@@ -66,13 +68,12 @@ public static function scan($xml, DOMDocument $dom = null)
$result = $dom -> loadXml($xml , LIBXML_NONET );
restore_error_handler ();
- // Entity load to previous setting
- if (! self :: isPhpFpm()) {
- libxml_disable_entity_loader ($loadEntities );
- libxml_use_internal_errors ($useInternalXmlErrors );
- }
-
if (! $result ) {
+ // Entity load to previous setting
+ if (! self :: isPhpFpm()) {
+ libxml_disable_entity_loader ($loadEntities );
+ libxml_use_internal_errors ($useInternalXmlErrors );
+ }
return false ;
}
@@ -87,6 +88,12 @@ public static function scan($xml, DOMDocument $dom = null)
}
}
+ // Entity load to previous setting
+ if (! self :: isPhpFpm()) {
+ libxml_disable_entity_loader ($loadEntities );
+ libxml_use_internal_errors ($useInternalXmlErrors );
+ }
+
if (isset ($simpleXml )) {
$result = simplexml_import_dom ($dom );
if (! $result instanceof SimpleXMLElement ) {
@@ -118,13 +125,250 @@ public static function scanFile($file, DOMDocument $dom = null)
/**
* Return true if PHP is running with PHP-FPM
*
+ * This method is mainly used to determine whether or not heuristic checks
+ * (vs libxml checks) should be made, due to threading issues in libxml;
+ * under php-fpm, threading becomes a concern.
+ *
+ * However, PHP versions 5.5.22+ and 5.6.6+ contain a patch to the
+ * libxml support in PHP that makes the libxml checks viable; in such
+ * versions, this method will return false to enforce those checks, which
+ * are more strict and accurate than the heuristic checks.
+ *
* @return boolean
*/
public static function isPhpFpm ()
{
- if (substr (php_sapi_name (), 0 , 3 ) === ' fpm' ) {
+ $isVulnerableVersion = (
+ version_compare (PHP_VERSION , ' 5.5.22' , ' lt' )
+ || (
+ version_compare (PHP_VERSION , ' 5.6' , ' gte' )
+ && version_compare (PHP_VERSION , ' 5.6.6' , ' lt' )
+ )
+ );
+
+ if (substr (php_sapi_name (), 0 , 3 ) === ' fpm' && $isVulnerableVersion ) {
return true ;
}
return false ;
}
+
+ /**
+ * Determine and return the string(s) to use for the <!ENTITY comparison.
+ *
+ * @param string $xml
+ * @return string[]
+ */
+ protected static function getEntityComparison ($xml )
+ {
+ $encodingMap = self :: getAsciiEncodingMap();
+ return array_map (function ($encoding ) use ($encodingMap ) {
+ $generator = isset ($encodingMap [$encoding ]) ? $encodingMap [$encoding ] : $encodingMap [' UTF-8' ];
+ return $generator (' <!ENTITY' );
+ }, self :: detectXmlEncoding($xml , self :: detectStringEncoding($xml )));
+ }
+
+ /**
+ * Determine the string encoding.
+ *
+ * Determines string encoding from either a detected BOM or a
+ * heuristic.
+ *
+ * @param string $xml
+ * @return string File encoding
+ */
+ protected static function detectStringEncoding ($xml )
+ {
+ return self :: detectBom($xml ) ?: self :: detectXmlStringEncoding($xml );
+ }
+
+ /**
+ * Attempt to match a known BOM.
+ *
+ * Iterates through the return of getBomMap(), comparing the initial bytes
+ * of the provided string to the BOM of each; if a match is determined,
+ * it returns the encoding.
+ *
+ * @param string $string
+ * @return false|string Returns encoding on success.
+ */
+ protected static function detectBom ($string )
+ {
+ foreach (self :: getBomMap() as $criteria ) {
+ if (0 === strncmp ($string , $criteria [' bom' ], $criteria [' length' ])) {
+ return $criteria [' encoding' ];
+ }
+ }
+ return false ;
+ }
+
+ /**
+ * Attempt to detect the string encoding of an XML string.
+ *
+ * @param string $xml
+ * @return string Encoding
+ */
+ protected static function detectXmlStringEncoding ($xml )
+ {
+ foreach (self :: getAsciiEncodingMap() as $encoding => $generator ) {
+ $prefix = $generator (' <' . ' ?xml' );
+ if (0 === strncmp ($xml , $prefix , strlen ($prefix ))) {
+ return $encoding ;
+ }
+ }
+
+ // Fallback
+ return ' UTF-8' ;
+ }
+
+ /**
+ * Attempt to detect the specified XML encoding.
+ *
+ * Using the file's encoding, determines if an "encoding" attribute is
+ * present and well-formed in the XML declaration; if so, it returns a
+ * list with both the ASCII representation of that declaration and the
+ * original file encoding.
+ *
+ * If not, a list containing only the provided file encoding is returned.
+ *
+ * @param string $xml
+ * @param string $fileEncoding
+ * @return string[] Potential XML encodings
+ */
+ protected static function detectXmlEncoding ($xml , $fileEncoding )
+ {
+ $encodingMap = self :: getAsciiEncodingMap();
+ $generator = $encodingMap [$fileEncoding ];
+ $encAttr = $generator (' encoding="' );
+ $quote = $generator (' "' );
+ $close = $generator (' >' );
+
+ $closePos = strpos ($xml , $close );
+ if (false === $closePos ) {
+ return array ($fileEncoding );
+ }
+
+ $encPos = strpos ($xml , $encAttr );
+ if (false === $encPos
+ || $encPos > $closePos
+ ) {
+ return array ($fileEncoding );
+ }
+
+ $encPos + = strlen ($encAttr );
+ $quotePos = strpos ($xml , $quote , $encPos );
+ if (false === $quotePos ) {
+ return array ($fileEncoding );
+ }
+
+ $encoding = self :: substr($xml , $encPos , $quotePos );
+ return array (
+ // Following line works because we're only supporting 8-bit safe encodings at this time.
+ str_replace (' \0' , ' ' , $encoding ), // detected encoding
+ $fileEncoding , // file encoding
+ );
+ }
+
+ /**
+ * Return a list of BOM maps.
+ *
+ * Returns a list of common encoding -> BOM maps, along with the character
+ * length to compare against.
+ *
+ * @link https://en.wikipedia.org/wiki/Byte_order_mark
+ * @return array
+ */
+ protected static function getBomMap ()
+ {
+ return array (
+ array (
+ ' encoding' => ' UTF-32BE' ,
+ ' bom' => pack (' CCCC' , 0x00 , 0x00 , 0xfe , 0xff ),
+ ' length' => 4 ,
+ ),
+ array (
+ ' encoding' => ' UTF-32LE' ,
+ ' bom' => pack (' CCCC' , 0xff , 0xfe , 0x00 , 0x00 ),
+ ' length' => 4 ,
+ ),
+ array (
+ ' encoding' => ' GB-18030' ,
+ ' bom' => pack (' CCCC' , 0x84 , 0x31 , 0x95 , 0x33 ),
+ ' length' => 4 ,
+ ),
+ array (
+ ' encoding' => ' UTF-16BE' ,
+ ' bom' => pack (' CC' , 0xfe , 0xff ),
+ ' length' => 2 ,
+ ),
+ array (
+ ' encoding' => ' UTF-16LE' ,
+ ' bom' => pack (' CC' , 0xff , 0xfe ),
+ ' length' => 2 ,
+ ),
+ array (
+ ' encoding' => ' UTF-8' ,
+ ' bom' => pack (' CCC' , 0xef , 0xbb , 0xbf ),
+ ' length' => 3 ,
+ ),
+ );
+ }
+
+ /**
+ * Return a map of encoding => generator pairs.
+ *
+ * Returns a map of encoding => generator pairs, where the generator is a
+ * callable that accepts a string and returns the appropriate byte order
+ * sequence of that string for the encoding.
+ *
+ * @return array
+ */
+ protected static function getAsciiEncodingMap ()
+ {
+ return array (
+ ' UTF-32BE' => function ($ascii ) {
+ return preg_replace ('/ (.)/' , " \0\0\0 \\ 1" , $ascii );
+ },
+ ' UTF-32LE' => function ($ascii ) {
+ return preg_replace ('/ (.)/' , " \\ 1\0\0\0 " , $ascii );
+ },
+ ' UTF-32odd1' => function ($ascii ) {
+ return preg_replace ('/ (.)/' , " \0 \\ 1\0\0 " , $ascii );
+ },
+ ' UTF-32odd2' => function ($ascii ) {
+ return preg_replace ('/ (.)/' , " \0\0 \\ 1\0 " , $ascii );
+ },
+ ' UTF-16BE' => function ($ascii ) {
+ return preg_replace ('/ (.)/' , " \0 \\ 1" , $ascii );
+ },
+ ' UTF-16LE' => function ($ascii ) {
+ return preg_replace ('/ (.)/' , " \\ 1\0 " , $ascii );
+ },
+ ' UTF-8' => function ($ascii ) {
+ return $ascii ;
+ },
+ ' GB-18030' => function ($ascii ) {
+ return $ascii ;
+ },
+ );
+ }
+
+ /**
+ * Binary-safe substr.
+ *
+ * substr() is not binary-safe; this method loops by character to ensure
+ * multi-byte characters are aggregated correctly.
+ *
+ * @param string $string
+ * @param int $start
+ * @param int $end
+ * @return string
+ */
+ protected static function substr ($string , $start , $end )
+ {
+ $substr = ' ' ;
+ for ($i = $start ; $i < $end ; $i + = 1 ) {
+ $substr .= $string [$i ];
+ }
+ return $substr ;
+ }
}
0 comments on commit
79f478f