From 5990a8692c796ed841eebe30c3c76c59c8151be0 Mon Sep 17 00:00:00 2001 From: Matthew Weier O'Phinney Date: Fri, 31 Aug 2012 14:44:59 -0500 Subject: [PATCH] [zendframework/zf2#2284][ZF2-507] Updated README - Notice about Date header --- .coveralls.yml | 3 + .gitattributes | 6 + .gitignore | 14 + .php_cs | 43 +++ .travis.yml | 35 ++ CONTRIBUTING.md | 229 ++++++++++++ LICENSE.txt | 27 ++ README.md | 11 + composer.json | 34 ++ phpunit.xml.dist | 35 ++ phpunit.xml.travis | 35 ++ src/Escaper.php | 391 ++++++++++++++++++++ src/Exception/ExceptionInterface.php | 18 + src/Exception/InvalidArgumentException.php | 22 ++ src/Exception/RuntimeException.php | 22 ++ test/EscaperTest.php | 395 +++++++++++++++++++++ test/bootstrap.php | 34 ++ 17 files changed, 1354 insertions(+) create mode 100644 .coveralls.yml create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 .php_cs create mode 100644 .travis.yml create mode 100644 CONTRIBUTING.md create mode 100644 LICENSE.txt create mode 100644 README.md create mode 100644 composer.json create mode 100644 phpunit.xml.dist create mode 100644 phpunit.xml.travis create mode 100644 src/Escaper.php create mode 100644 src/Exception/ExceptionInterface.php create mode 100644 src/Exception/InvalidArgumentException.php create mode 100644 src/Exception/RuntimeException.php create mode 100644 test/EscaperTest.php create mode 100644 test/bootstrap.php diff --git a/.coveralls.yml b/.coveralls.yml new file mode 100644 index 0000000..53bda82 --- /dev/null +++ b/.coveralls.yml @@ -0,0 +1,3 @@ +coverage_clover: clover.xml +json_path: coveralls-upload.json +src_dir: src diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..85dc9a8 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,6 @@ +/test export-ignore +/vendor export-ignore +.gitattributes export-ignore +.gitignore export-ignore +.travis.yml export-ignore +.php_cs export-ignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4cac0a2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +.buildpath +.DS_Store +.idea +.project +.settings/ +.*.sw* +.*.un~ +nbproject +tmp/ + +clover.xml +coveralls-upload.json +phpunit.xml +vendor diff --git a/.php_cs b/.php_cs new file mode 100644 index 0000000..bf4b799 --- /dev/null +++ b/.php_cs @@ -0,0 +1,43 @@ +notPath('TestAsset') + ->notPath('_files') + ->filter(function (SplFileInfo $file) { + if (strstr($file->getPath(), 'compatibility')) { + return false; + } + }); +$config = Symfony\CS\Config\Config::create(); +$config->level(null); +$config->fixers( + array( + 'braces', + 'duplicate_semicolon', + 'elseif', + 'empty_return', + 'encoding', + 'eof_ending', + 'function_call_space', + 'function_declaration', + 'indentation', + 'join_function', + 'line_after_namespace', + 'linefeed', + 'lowercase_keywords', + 'parenthesis', + 'multiple_use', + 'method_argument_space', + 'object_operator', + 'php_closing_tag', + 'psr0', + 'remove_lines_between_uses', + 'short_tag', + 'standardize_not_equal', + 'trailing_spaces', + 'unused_use', + 'visibility', + 'whitespacy_lines', + ) +); +$config->finder($finder); +return $config; diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..fe909ec --- /dev/null +++ b/.travis.yml @@ -0,0 +1,35 @@ +sudo: false + +language: php + +matrix: + fast_finish: true + include: + - php: 5.5 + - php: 5.6 + env: + - EXECUTE_TEST_COVERALLS=true + - EXECUTE_CS_CHECK=true + - php: 7 + - php: hhvm + allow_failures: + - php: 7 + - php: hhvm + +notifications: + irc: "irc.freenode.org#zftalk.dev" + email: false + +before_install: + - if [[ $EXECUTE_TEST_COVERALLS != 'true' ]]; then phpenv config-rm xdebug.ini || return 0 ; fi + +install: + - composer install --no-interaction --prefer-source + +script: + - if [[ $EXECUTE_TEST_COVERALLS == 'true' ]]; then ./vendor/bin/phpunit -c phpunit.xml.travis --coverage-clover clover.xml ; fi + - if [[ $EXECUTE_TEST_COVERALLS != 'true' ]]; then ./vendor/bin/phpunit -c phpunit.xml.travis ; fi + - if [[ $EXECUTE_CS_CHECK == 'true' ]]; then ./vendor/bin/php-cs-fixer fix -v --diff --dry-run --config-file=.php_cs ; fi + +after_script: + - if [[ $EXECUTE_TEST_COVERALLS == 'true' ]]; then ./vendor/bin/coveralls ; fi diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..2b58aca --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,229 @@ +# CONTRIBUTING + +## RESOURCES + +If you wish to contribute to Zend Framework, please be sure to +read/subscribe to the following resources: + + - [Coding Standards](https://github.com/zendframework/zf2/wiki/Coding-Standards) + - [Contributor's Guide](http://framework.zend.com/participate/contributor-guide) + - ZF Contributor's mailing list: + Archives: http://zend-framework-community.634137.n4.nabble.com/ZF-Contributor-f680267.html + Subscribe: zf-contributors-subscribe@lists.zend.com + - ZF Contributor's IRC channel: + #zftalk.dev on Freenode.net + +If you are working on new features or refactoring [create a proposal](https://github.com/zendframework/zend-escaper/issues/new). + +## Reporting Potential Security Issues + +If you have encountered a potential security vulnerability, please **DO NOT** report it on the public +issue tracker: send it to us at [zf-security@zend.com](mailto:zf-security@zend.com) instead. +We will work with you to verify the vulnerability and patch it as soon as possible. + +When reporting issues, please provide the following information: + +- Component(s) affected +- A description indicating how to reproduce the issue +- A summary of the security vulnerability and impact + +We request that you contact us via the email address above and give the project +contributors a chance to resolve the vulnerability and issue a new release prior +to any public exposure; this helps protect users and provides them with a chance +to upgrade and/or update in order to protect their applications. + +For sensitive email communications, please use [our PGP key](http://framework.zend.com/zf-security-pgp-key.asc). + +## RUNNING TESTS + +> ### Note: testing versions prior to 2.4 +> +> This component originates with Zend Framework 2. During the lifetime of ZF2, +> testing infrastructure migrated from PHPUnit 3 to PHPUnit 4. In most cases, no +> changes were necessary. However, due to the migration, tests may not run on +> versions < 2.4. As such, you may need to change the PHPUnit dependency if +> attempting a fix on such a version. + +To run tests: + +- Clone the repository: + + ```console + $ git clone git@github.com:zendframework/zend-escaper.git + $ cd + ``` + +- Install dependencies via composer: + + ```console + $ curl -sS https://getcomposer.org/installer | php -- + $ ./composer.phar install + ``` + + If you don't have `curl` installed, you can also download `composer.phar` from https://getcomposer.org/ + +- Run the tests via `phpunit` and the provided PHPUnit config, like in this example: + + ```console + $ ./vendor/bin/phpunit + ``` + +You can turn on conditional tests with the phpunit.xml file. +To do so: + + - Copy `phpunit.xml.dist` file to `phpunit.xml` + - Edit `phpunit.xml` to enable any specific functionality you + want to test, as well as to provide test values to utilize. + +## Running Coding Standards Checks + +This component uses [php-cs-fixer](http://cs.sensiolabs.org/) for coding +standards checks, and provides configuration for our selected checks. +`php-cs-fixer` is installed by default via Composer. + +To run checks only: + +```console +$ ./vendor/bin/php-cs-fixer fix . -v --diff --dry-run --config-file=.php_cs +``` + +To have `php-cs-fixer` attempt to fix problems for you, omit the `--dry-run` +flag: + +```console +$ ./vendor/bin/php-cs-fixer fix . -v --diff --config-file=.php_cs +``` + +If you allow php-cs-fixer to fix CS issues, please re-run the tests to ensure +they pass, and make sure you add and commit the changes after verification. + +## Recommended Workflow for Contributions + +Your first step is to establish a public repository from which we can +pull your work into the master repository. We recommend using +[GitHub](https://github.com), as that is where the component is already hosted. + +1. Setup a [GitHub account](http://github.com/), if you haven't yet +2. Fork the repository (http://github.com/zendframework/zend-escaper) +3. Clone the canonical repository locally and enter it. + + ```console + $ git clone git://github.com:zendframework/zend-escaper.git + $ cd zend-escaper + ``` + +4. Add a remote to your fork; substitute your GitHub username in the command + below. + + ```console + $ git remote add {username} git@github.com:{username}/zend-escaper.git + $ git fetch {username} + ``` + +### Keeping Up-to-Date + +Periodically, you should update your fork or personal repository to +match the canonical ZF repository. Assuming you have setup your local repository +per the instructions above, you can do the following: + + +```console +$ git checkout master +$ git fetch origin +$ git rebase origin/master +# OPTIONALLY, to keep your remote up-to-date - +$ git push {username} master:master +``` + +If you're tracking other branches -- for example, the "develop" branch, where +new feature development occurs -- you'll want to do the same operations for that +branch; simply substitute "develop" for "master". + +### Working on a patch + +We recommend you do each new feature or bugfix in a new branch. This simplifies +the task of code review as well as the task of merging your changes into the +canonical repository. + +A typical workflow will then consist of the following: + +1. Create a new local branch based off either your master or develop branch. +2. Switch to your new local branch. (This step can be combined with the + previous step with the use of `git checkout -b`.) +3. Do some work, commit, repeat as necessary. +4. Push the local branch to your remote repository. +5. Send a pull request. + +The mechanics of this process are actually quite trivial. Below, we will +create a branch for fixing an issue in the tracker. + +```console +$ git checkout -b hotfix/9295 +Switched to a new branch 'hotfix/9295' +``` + +... do some work ... + + +```console +$ git commit +``` + +... write your log message ... + + +```console +$ git push {username} hotfix/9295:hotfix/9295 +Counting objects: 38, done. +Delta compression using up to 2 threads. +Compression objects: 100% (18/18), done. +Writing objects: 100% (20/20), 8.19KiB, done. +Total 20 (delta 12), reused 0 (delta 0) +To ssh://git@github.com/{username}/zend-escaper.git + b5583aa..4f51698 HEAD -> master +``` + +To send a pull request, you have two options. + +If using GitHub, you can do the pull request from there. Navigate to +your repository, select the branch you just created, and then select the +"Pull Request" button in the upper right. Select the user/organization +"zendframework" as the recipient. + +If using your own repository - or even if using GitHub - you can use `git +format-patch` to create a patchset for us to apply; in fact, this is +**recommended** for security-related patches. If you use `format-patch`, please +send the patches as attachments to: + +- zf-devteam@zend.com for patches without security implications +- zf-security@zend.com for security patches + +#### What branch to issue the pull request against? + +Which branch should you issue a pull request against? + +- For fixes against the stable release, issue the pull request against the + "master" branch. +- For new features, or fixes that introduce new elements to the public API (such + as new public methods or properties), issue the pull request against the + "develop" branch. + +### Branch Cleanup + +As you might imagine, if you are a frequent contributor, you'll start to +get a ton of branches both locally and on your remote. + +Once you know that your changes have been accepted to the master +repository, we suggest doing some cleanup of these branches. + +- Local branch cleanup + + ```console + $ git branch -d + ``` + +- Remote branch removal + + ```console + $ git push {username} : + ``` diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..6eab5aa --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,27 @@ +Copyright (c) 2005-2015, Zend Technologies USA, Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of Zend Technologies USA, Inc. nor the names of its + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..dd96215 --- /dev/null +++ b/README.md @@ -0,0 +1,11 @@ +# zend-escaper + +The OWASP Top 10 web security risks study lists Cross-Site Scripting (XSS) in +second place. PHP’s sole functionality against XSS is limited to two functions +of which one is commonly misapplied. Thus, the `Zend\Escaper` component was written. +It offers developers a way to escape output and defend from XSS and related +vulnerabilities by introducing contextual escaping based on peer-reviewed rules. + + +- File issues at https://github.com/zendframework/zend-escaper/issues +- Documentation is at http://framework.zend.com/manual/current/en/index.html#zend-escaper diff --git a/composer.json b/composer.json new file mode 100644 index 0000000..a576c95 --- /dev/null +++ b/composer.json @@ -0,0 +1,34 @@ +{ + "name": "zendframework/zend-escaper", + "description": " ", + "license": "BSD-3-Clause", + "keywords": [ + "zf2", + "escaper" + ], + "homepage": "https://github.com/zendframework/zend-escaper", + "autoload": { + "psr-4": { + "Zend\\Escaper": "src/" + } + }, + "require": { + "php": ">=5.3.3" + }, + "extra": { + "branch-alias": { + "dev-master": "2.4-dev", + "dev-develop": "2.5-dev" + } + }, + "autoload-dev": { + "psr-4": { + "ZendTest\\Escaper\\": "test/" + } + }, + "require-dev": { + "fabpot/php-cs-fixer": "1.7.*", + "satooshi/php-coveralls": "dev-master", + "phpunit/PHPUnit": "~4.0" + } +} \ No newline at end of file diff --git a/phpunit.xml.dist b/phpunit.xml.dist new file mode 100644 index 0000000..c887b36 --- /dev/null +++ b/phpunit.xml.dist @@ -0,0 +1,35 @@ + + + + + ./test/ + + + + + + disable + + + + + + ./src + + + + + + + + + + + + diff --git a/phpunit.xml.travis b/phpunit.xml.travis new file mode 100644 index 0000000..c887b36 --- /dev/null +++ b/phpunit.xml.travis @@ -0,0 +1,35 @@ + + + + + ./test/ + + + + + + disable + + + + + + ./src + + + + + + + + + + + + diff --git a/src/Escaper.php b/src/Escaper.php new file mode 100644 index 0000000..399e4fd --- /dev/null +++ b/src/Escaper.php @@ -0,0 +1,391 @@ + 'quot', // quotation mark + 38 => 'amp', // ampersand + 60 => 'lt', // less-than sign + 62 => 'gt', // greater-than sign + ); + + /** + * Current encoding for escaping. If not UTF-8, we convert strings from this encoding + * pre-escaping and back to this encoding post-escaping. + * + * @var string + */ + protected $encoding = 'utf-8'; + + /** + * Holds the value of the special flags passed as second parameter to + * htmlspecialchars(). We modify these for PHP 5.4 to take advantage + * of the new ENT_SUBSTITUTE flag for correctly dealing with invalid + * UTF-8 sequences. + * + * @var string + */ + protected $htmlSpecialCharsFlags = \ENT_QUOTES; + + /** + * Static Matcher which escapes characters for HTML Attribute contexts + * + * @var callable + */ + protected $htmlAttrMatcher; + + /** + * Static Matcher which escapes characters for Javascript contexts + * + * @var callable + */ + protected $jsMatcher; + + /** + * Static Matcher which escapes characters for CSS Attribute contexts + * + * @var callable + */ + protected $cssMatcher; + + /** + * List of all encoding supported by this class + * + * @var array + */ + protected $supportedEncodings = array( + 'iso-8859-1', 'iso8859-1', 'iso-8859-5', 'iso8859-5', + 'iso-8859-15', 'iso8859-15', 'utf-8', 'cp866', + 'ibm866', '866', 'cp1251', 'windows-1251', + 'win-1251', '1251', 'cp1252', 'windows-1252', + '1252', 'koi8-r', 'koi8-ru', 'koi8r', + 'big5', '950', 'gb2312', '936', + 'big5-hkscs', 'shift_jis', 'sjis', 'sjis-win', + 'cp932', '932', 'euc-jp', 'eucjp', + 'eucjp-win', 'macroman' + ); + + /** + * Constructor: Single parameter allows setting of global encoding for use by + * the current object. If PHP 5.4 is detected, additional ENT_SUBSTITUTE flag + * is set for htmlspecialchars() calls. + * + * @param string $encoding + */ + public function __construct($encoding = null) + { + if ($encoding !== null) { + $encoding = (string) $encoding; + if ($encoding === '') { + throw new Exception\InvalidArgumentException( + get_called_class() . ' constructor parameter does not allow a blank value' + ); + } + + $encoding = strtolower($encoding); + if (!in_array($encoding, $this->supportedEncodings)) { + throw new Exception\InvalidArgumentException( + 'Value of \'' . $encoding . '\' passed to ' . get_called_class() + . ' constructor parameter is invalid. Provide an encoding supported by htmlspecialchars()' + ); + } + + $this->encoding = $encoding; + } + + if (defined('ENT_SUBSTITUTE')) { + $this->htmlSpecialCharsFlags|= \ENT_SUBSTITUTE; + } + + // set matcher callbacks + $this->htmlAttrMatcher = array($this, 'htmlAttrMatcher'); + $this->jsMatcher = array($this, 'jsMatcher'); + $this->cssMatcher = array($this, 'cssMatcher'); + } + + /** + * Return the encoding that all output/input is expected to be encoded in. + * + * @return string + */ + public function getEncoding() + { + return $this->encoding; + } + + /** + * Escape a string for the HTML Body context where there are very few characters + * of special meaning. Internally this will use htmlspecialchars(). + * + * @param string $string + * @return string + */ + public function escapeHtml($string) + { + $result = htmlspecialchars($string, $this->htmlSpecialCharsFlags, $this->encoding); + return $result; + } + + /** + * Escape a string for the HTML Attribute context. We use an extended set of characters + * to escape that are not covered by htmlspecialchars() to cover cases where an attribute + * might be unquoted or quoted illegally (e.g. backticks are valid quotes for IE). + * + * @param string $string + * @return string + */ + public function escapeHtmlAttr($string) + { + $string = $this->toUtf8($string); + if ($string === '' || ctype_digit($string)) { + return $string; + } + + $result = preg_replace_callback('/[^a-z0-9,\.\-_]/iSu', $this->htmlAttrMatcher, $string); + return $this->fromUtf8($result); + } + + /** + * Escape a string for the Javascript context. This does not use json_encode(). An extended + * set of characters are escaped beyond ECMAScript's rules for Javascript literal string + * escaping in order to prevent misinterpretation of Javascript as HTML leading to the + * injection of special characters and entities. The escaping used should be tolerant + * of cases where HTML escaping was not applied on top of Javascript escaping correctly. + * Backslash escaping is not used as it still leaves the escaped character as-is and so + * is not useful in a HTML context. + * + * @param string $string + * @return string + */ + public function escapeJs($string) + { + $string = $this->toUtf8($string); + if ($string === '' || ctype_digit($string)) { + return $string; + } + + $result = preg_replace_callback('/[^a-z0-9,\._]/iSu', $this->jsMatcher, $string); + return $this->fromUtf8($result); + } + + /** + * Escape a string for the URI or Parameter contexts. This should not be used to escape + * an entire URI - only a subcomponent being inserted. The function is a simple proxy + * to rawurlencode() which now implements RFC 3986 since PHP 5.3 completely. + * + * @param string $string + * @return string + */ + public function escapeUrl($string) + { + return rawurlencode($string); + } + + /** + * Escape a string for the CSS context. CSS escaping can be applied to any string being + * inserted into CSS and escapes everything except alphanumerics. + * + * @param string $string + * @return string + */ + public function escapeCss($string) + { + $string = $this->toUtf8($string); + if ($string === '' || ctype_digit($string)) { + return $string; + } + + $result = preg_replace_callback('/[^a-z0-9]/iSu', $this->cssMatcher, $string); + return $this->fromUtf8($result); + } + + /** + * Callback function for preg_replace_callback that applies HTML Attribute + * escaping to all matches. + * + * @param array $matches + * @return string + */ + protected function htmlAttrMatcher($matches) + { + $chr = $matches[0]; + $ord = ord($chr); + + /** + * The following replaces characters undefined in HTML with the + * hex entity for the Unicode replacement character. + */ + if (($ord <= 0x1f && $chr != "\t" && $chr != "\n" && $chr != "\r") + || ($ord >= 0x7f && $ord <= 0x9f) + ) { + return '�'; + } + + /** + * Check if the current character to escape has a name entity we should + * replace it with while grabbing the integer value of the character. + */ + if (strlen($chr) > 1) { + $chr = $this->convertEncoding($chr, 'UTF-16BE', 'UTF-8'); + } + + $hex = bin2hex($chr); + $ord = hexdec($hex); + if (isset(static::$htmlNamedEntityMap[$ord])) { + return '&' . static::$htmlNamedEntityMap[$ord] . ';'; + } + + /** + * Per OWASP recommendations, we'll use upper hex entities + * for any other characters where a named entity does not exist. + */ + if ($ord > 255) { + return sprintf('&#x%04X;', $ord); + } else { + return sprintf('&#x%02X;', $ord); + } + } + + /** + * Callback function for preg_replace_callback that applies Javascript + * escaping to all matches. + * + * @param array $matches + * @return string + */ + protected function jsMatcher($matches) + { + $chr = $matches[0]; + if (strlen($chr) == 1) { + return sprintf('\\x%02X', ord($chr)); + } else { + $chr = $this->convertEncoding($chr, 'UTF-16BE', 'UTF-8'); + return sprintf('\\u%04s', strtoupper(bin2hex($chr))); + } + } + + /** + * Callback function for preg_replace_callback that applies CSS + * escaping to all matches. + * + * @param array $matches + * @return string + */ + protected function cssMatcher($matches) + { + $chr = $matches[0]; + if (strlen($chr) == 1) { + $ord = ord($chr); + } else { + $chr = $this->convertEncoding($chr, 'UTF-16BE', 'UTF-8'); + $ord = hexdec(bin2hex($chr)); + } + return sprintf('\\%X ', $ord); + } + + /** + * Converts a string to UTF-8 from the base encoding. The base encoding is set via this + * class' constructor. + * + * @param string $string + * @return string + */ + protected function toUtf8($string) + { + if ($this->getEncoding() === 'utf-8') { + $result = $string; + } else { + $result = $this->convertEncoding($string, 'UTF-8', $this->getEncoding()); + } + + if (!$this->isUtf8($result)) { + throw new Exception\RuntimeException(sprintf( + 'String to be escaped was not valid UTF-8 or could not be converted: %s', $result + )); + } + + return $result; + } + + /** + * Converts a string from UTF-8 to the base encoding. The base encoding is set via this + * class' constructor. + * @param string $string + * @return string + */ + protected function fromUtf8($string) + { + if ($this->getEncoding() === 'utf-8') { + return $string; + } + + return $this->convertEncoding($string, $this->getEncoding(), 'UTF-8'); + } + + /** + * Checks if a given string appears to be valid UTF-8 or not. + * + * @param string $string + * @return bool + */ + protected function isUtf8($string) + { + return ($string === '' || preg_match('/^./su', $string)); + } + + /** + * Encoding conversion helper which wraps iconv and mbstring where they exist or throws + * and exception where neither is available. + * + * @param string $string + * @return string + */ + protected function convertEncoding($string, $to, $from) + { + $result = ''; + if (function_exists('iconv')) { + $result = iconv($from, $to, $string); + } elseif (function_exists('mb_convert_encoding')) { + $result = mb_convert_encoding($string, $to, $from); + } else { + throw new Exception\RuntimeException( + get_called_class() + . ' requires either the iconv or mbstring extension to be installed' + . ' when escaping for non UTF-8 strings.' + ); + } + + if ($result === false) { + return ''; // return non-fatal blank string on encoding errors from users + } else { + return $result; + } + } +} diff --git a/src/Exception/ExceptionInterface.php b/src/Exception/ExceptionInterface.php new file mode 100644 index 0000000..65a818e --- /dev/null +++ b/src/Exception/ExceptionInterface.php @@ -0,0 +1,18 @@ + ''', + '"' => '"', + '<' => '<', + '>' => '>', + '&' => '&' + ); + + protected $htmlAttrSpecialChars = array( + '\'' => ''', + '"' => '"', + '<' => '<', + '>' => '>', + '&' => '&', + /* Characters beyond ASCII value 255 to unicode escape */ + 'Ā' => 'Ā', + /* Immune chars excluded */ + ',' => ',', + '.' => '.', + '-' => '-', + '_' => '_', + /* Basic alnums exluded */ + 'a' => 'a', + 'A' => 'A', + 'z' => 'z', + 'Z' => 'Z', + '0' => '0', + '9' => '9', + /* Basic control characters and null */ + "\r" => ' ', + "\n" => ' ', + "\t" => ' ', + "\0" => '�', // should use Unicode replacement char + /* Encode chars as named entities where possible */ + '<' => '<', + '>' => '>', + '&' => '&', + '"' => '"', + /* Encode spaces for quoteless attribute protection */ + ' ' => ' ', + ); + + protected $jsSpecialChars = array( + /* HTML special chars - escape without exception to hex */ + '<' => '\\x3C', + '>' => '\\x3E', + '\'' => '\\x27', + '"' => '\\x22', + '&' => '\\x26', + /* Characters beyond ASCII value 255 to unicode escape */ + 'Ā' => '\\u0100', + /* Immune chars excluded */ + ',' => ',', + '.' => '.', + '_' => '_', + /* Basic alnums exluded */ + 'a' => 'a', + 'A' => 'A', + 'z' => 'z', + 'Z' => 'Z', + '0' => '0', + '9' => '9', + /* Basic control characters and null */ + "\r" => '\\x0D', + "\n" => '\\x0A', + "\t" => '\\x09', + "\0" => '\\x00', + /* Encode spaces for quoteless attribute protection */ + ' ' => '\\x20', + ); + + protected $urlSpecialChars = array( + /* HTML special chars - escape without exception to percent encoding */ + '<' => '%3C', + '>' => '%3E', + '\'' => '%27', + '"' => '%22', + '&' => '%26', + /* Characters beyond ASCII value 255 to hex sequence */ + 'Ā' => '%C4%80', + /* Punctuation and unreserved check */ + ',' => '%2C', + '.' => '.', + '_' => '_', + '-' => '-', + ':' => '%3A', + ';' => '%3B', + '!' => '%21', + /* Basic alnums excluded */ + 'a' => 'a', + 'A' => 'A', + 'z' => 'z', + 'Z' => 'Z', + '0' => '0', + '9' => '9', + /* Basic control characters and null */ + "\r" => '%0D', + "\n" => '%0A', + "\t" => '%09', + "\0" => '%00', + /* PHP quirks from the past */ + ' ' => '%20', + '~' => '~', + '+' => '%2B', + ); + + protected $cssSpecialChars = array( + /* HTML special chars - escape without exception to hex */ + '<' => '\\3C ', + '>' => '\\3E ', + '\'' => '\\27 ', + '"' => '\\22 ', + '&' => '\\26 ', + /* Characters beyond ASCII value 255 to unicode escape */ + 'Ā' => '\\100 ', + /* Immune chars excluded */ + ',' => '\\2C ', + '.' => '\\2E ', + '_' => '\\5F ', + /* Basic alnums exluded */ + 'a' => 'a', + 'A' => 'A', + 'z' => 'z', + 'Z' => 'Z', + '0' => '0', + '9' => '9', + /* Basic control characters and null */ + "\r" => '\\D ', + "\n" => '\\A ', + "\t" => '\\9 ', + "\0" => '\\0 ', + /* Encode spaces for quoteless attribute protection */ + ' ' => '\\20 ', + ); + + + public function setUp() + { + $this->escaper = new Escaper('UTF-8'); + } + + /** + * @expectedException \Zend\Escaper\Exception\InvalidArgumentException + */ + public function testSettingEncodingToEmptyStringShouldThrowException() + { + $escaper = new Escaper(''); + } + + public function testSettingValidEncodingShouldNotThrowExceptions() + { + foreach ($this->supportedEncodings as $value) { + $escaper = new Escaper($value); + } + } + + /** + * @expectedException \Zend\Escaper\Exception\InvalidArgumentException + */ + public function testSettingEncodingToInvalidValueShouldThrowException() + { + $escaper = new Escaper('invalid-encoding'); + } + + public function testReturnsEncodingFromGetter() + { + $this->assertEquals('utf-8', $this->escaper->getEncoding()); + } + + public function testHtmlEscapingConvertsSpecialChars() + { + foreach ($this->htmlSpecialChars as $key => $value) { + $this->assertEquals( + $value, + $this->escaper->escapeHtml($key), + 'Failed to escape: ' . $key + ); + } + } + + public function testHtmlAttributeEscapingConvertsSpecialChars() + { + foreach ($this->htmlAttrSpecialChars as $key => $value) { + $this->assertEquals( + $value, + $this->escaper->escapeHtmlAttr($key), + 'Failed to escape: ' . $key + ); + } + } + + public function testJavascriptEscapingConvertsSpecialChars() + { + foreach ($this->jsSpecialChars as $key => $value) { + $this->assertEquals( + $value, + $this->escaper->escapeJs($key), + 'Failed to escape: ' . $key + ); + } + } + + public function testJavascriptEscapingReturnsStringIfZeroLength() + { + $this->assertEquals('', $this->escaper->escapeJs('')); + } + + public function testJavascriptEscapingReturnsStringIfContainsOnlyDigits() + { + $this->assertEquals('123', $this->escaper->escapeJs('123')); + } + + public function testCssEscapingConvertsSpecialChars() + { + foreach ($this->cssSpecialChars as $key => $value) { + $this->assertEquals( + $value, + $this->escaper->escapeCss($key), + 'Failed to escape: ' . $key + ); + } + } + + public function testCssEscapingReturnsStringIfZeroLength() + { + $this->assertEquals('', $this->escaper->escapeCss('')); + } + + public function testCssEscapingReturnsStringIfContainsOnlyDigits() + { + $this->assertEquals('123', $this->escaper->escapeCss('123')); + } + + public function testUrlEscapingConvertsSpecialChars() + { + foreach ($this->urlSpecialChars as $key => $value) { + $this->assertEquals( + $value, + $this->escaper->escapeUrl($key), + 'Failed to escape: ' . $key + ); + } + } + + /** + * Range tests to confirm escaped range of characters is within OWASP recommendation + */ + + /** + * Only testing the first few 2 ranges on this prot. function as that's all these + * other range tests require + */ + public function testUnicodeCodepointConversionToUtf8() + { + $expected = " ~ޙ"; + $codepoints = array(0x20, 0x7e, 0x799); + $result = ''; + foreach ($codepoints as $value) { + $result .= $this->codepointToUtf8($value); + } + $this->assertEquals($expected, $result); + } + + /** + * Convert a Unicode Codepoint to a literal UTF-8 character. + * + * @param int Unicode codepoint in hex notation + * @return string UTF-8 literal string + */ + protected function codepointToUtf8($codepoint) + { + if ($codepoint < 0x80) { + return chr($codepoint); + } + if ($codepoint < 0x800) { + return chr($codepoint >> 6 & 0x3f | 0xc0) + . chr($codepoint & 0x3f | 0x80); + } + if ($codepoint < 0x10000) { + return chr($codepoint >> 12 & 0x0f | 0xe0) + . chr($codepoint >> 6 & 0x3f | 0x80) + . chr($codepoint & 0x3f | 0x80); + } + if ($codepoint < 0x110000) { + return chr($codepoint >> 18 & 0x07 | 0xf0) + . chr($codepoint >> 12 & 0x3f | 0x80) + . chr($codepoint >> 6 & 0x3f | 0x80) + . chr($codepoint & 0x3f | 0x80); + } + throw new \Exception('Codepoint requested outside of Unicode range'); + } + + public function testJavascriptEscapingEscapesOwaspRecommendedRanges() + { + $immune = array(',', '.', '_'); // Exceptions to escaping ranges + for ($chr=0; $chr < 0xFF; $chr++) { + if ($chr >= 0x30 && $chr <= 0x39 + || $chr >= 0x41 && $chr <= 0x5A + || $chr >= 0x61 && $chr <= 0x7A + ) { + $literal = $this->codepointToUtf8($chr); + $this->assertEquals($literal, $this->escaper->escapeJs($literal)); + } else { + $literal = $this->codepointToUtf8($chr); + if (in_array($literal, $immune)) { + $this->assertEquals($literal, $this->escaper->escapeJs($literal)); + } else { + $this->assertNotEquals( + $literal, + $this->escaper->escapeJs($literal), + $literal . ' should be escaped!' + ); + } + } + } + } + + public function testHtmlAttributeEscapingEscapesOwaspRecommendedRanges() + { + $immune = array(',', '.', '-', '_'); // Exceptions to escaping ranges + for ($chr=0; $chr < 0xFF; $chr++) { + if ($chr >= 0x30 && $chr <= 0x39 + || $chr >= 0x41 && $chr <= 0x5A + || $chr >= 0x61 && $chr <= 0x7A + ) { + $literal = $this->codepointToUtf8($chr); + $this->assertEquals($literal, $this->escaper->escapeHtmlAttr($literal)); + } else { + $literal = $this->codepointToUtf8($chr); + if (in_array($literal, $immune)) { + $this->assertEquals($literal, $this->escaper->escapeHtmlAttr($literal)); + } else { + $this->assertNotEquals( + $literal, + $this->escaper->escapeHtmlAttr($literal), + $literal . ' should be escaped!' + ); + } + } + } + } + + public function testCssEscapingEscapesOwaspRecommendedRanges() + { + $immune = array(); // CSS has no exceptions to escaping ranges + for ($chr=0; $chr < 0xFF; $chr++) { + if ($chr >= 0x30 && $chr <= 0x39 + || $chr >= 0x41 && $chr <= 0x5A + || $chr >= 0x61 && $chr <= 0x7A + ) { + $literal = $this->codepointToUtf8($chr); + $this->assertEquals($literal, $this->escaper->escapeCss($literal)); + } else { + $literal = $this->codepointToUtf8($chr); + $this->assertNotEquals( + $literal, + $this->escaper->escapeCss($literal), + $literal . ' should be escaped!' + ); + } + } + } +} diff --git a/test/bootstrap.php b/test/bootstrap.php new file mode 100644 index 0000000..f69b308 --- /dev/null +++ b/test/bootstrap.php @@ -0,0 +1,34 @@ +