Skip to content

Commit

Permalink
improvement performance
Browse files Browse the repository at this point in the history
  • Loading branch information
zaininnari committed Oct 27, 2013
1 parent bfcdd29 commit 4ccb3db
Show file tree
Hide file tree
Showing 6 changed files with 109 additions and 106 deletions.
6 changes: 6 additions & 0 deletions README.mdown
Expand Up @@ -40,7 +40,13 @@ $html = '<div>
text
</p>
</div>';

// shortcut. retrun minify html
$minify = HTMLMinify::minify($html);

// detail
$HTMLMinify = new HTMLMinify($html);
$minify = $HTMLMinify->process();
```

output html
Expand Down
3 changes: 1 addition & 2 deletions composer.json
Expand Up @@ -18,8 +18,7 @@
}
],
"require": {
"php": ">=5.3.0",
"composer/installers": "*"
"php": ">=5.3.0"
},
"require-dev": {
"satooshi/php-coveralls": "dev-master"
Expand Down
101 changes: 49 additions & 52 deletions src/zz/Html/HTMLMinify.php
Expand Up @@ -8,8 +8,6 @@
namespace zz\Html;

class HTMLMinify {
const ENCODING = 'UTF-8';

const DOCTYPE_HTML4 = 'HTML4.01';
const DOCTYPE_XHTML1 = 'XHTML1.0';
const DOCTYPE_HTML5 = 'html5';
Expand Down Expand Up @@ -226,7 +224,7 @@ public function __construct($html, $options = array()) {
*
* 'excludeComment'
* example : <!--nocache-->content</--nocache-->
* array()(default) => content
* array('/<!--\/?nocache-->/')(default) => content
* array('/<!--\/?nocache-->/') => <!--nocache-->content</--nocache-->
*
* 'removeDuplicateAttribute'
Expand Down Expand Up @@ -389,60 +387,62 @@ protected function beforeFilter() {
protected function removeWhitespaceFromComment() {
$tokens = $this->tokens;
$regexps = $this->options['excludeComment'];

for ($i = 0, $len = count($tokens); $i < $len; $i++) {
$HTMLTokenStartTag = HTMLToken::StartTag;
$HTMLTokenComment = HTMLToken::Comment;
$HTMLTokenCharacter = HTMLToken::Character;
$HTMLNamesScriptTag = HTMLNames::scriptTag;
$HTMLNamesStyleTag = HTMLNames::styleTag;
$removes = array();
$combineIndex = null;

$len = count($tokens);
for ($i = 0; $i < $len; $i++) {
$token = $tokens[$i];
$type = $token->getType();
if ($type === HTMLToken::StartTag) {
if ($type === $HTMLTokenStartTag) {
$combineIndex = null;
$tagName = $token->getTagName();
if ($tagName === HTMLNames::scriptTag || $tagName === HTMLNames::styleTag) {
if ($tagName === $HTMLNamesScriptTag || $tagName === $HTMLNamesStyleTag) {
$i++;
continue;
}
} else if ($this->_isConditionalComment($token)) {
continue;
} else if ($type === $HTMLTokenCharacter) {
if ($combineIndex > 0) {
$tokens[$combineIndex]->setData($tokens[$combineIndex] . $token);
$removes[] = $i;
}
continue;
} else if ($type !== $HTMLTokenComment) {
$combineIndex = null;
continue;
}
if ($type !== HTMLToken::Comment) {

$comment = $token->getData();
if ($this->_isConditionalComment($comment)) {
$combineIndex = null;
continue;
}
if ($regexps) {
$comment = $token->getData();
foreach ($regexps as $regexp) {
if (preg_match($regexp, $comment)) {
$combineIndex = null;
continue 2;
}
}
}

unset($tokens[$i]);
$tokens = array_merge($tokens, array());
$len = count($tokens);
$i--;
$combineIndex = $i - 1;
$removes[] = $i;
}

/**
* @var HTMLToken[] $tokens
*/
$tokens = array_merge($tokens, array());
foreach ($removes as $remove) {
unset($tokens[$remove]);
}

// combine chars
for ($i = 1, $len = count($tokens); $i < $len; $i++) {
$token = $tokens[$i];
if ($token->getType() !== HTMLToken::Character) {
continue;
}
$token_before = $tokens[$i - 1];
if ($token_before->getType() !== HTMLToken::Character) {
continue;
}
$tokens[$i]->setData($token_before . $token->getData());
unset($tokens[$i - 1]);
$len = count($tokens);
$tokens = array_merge($tokens, array());
$i--;
if ($len !== count($tokens)) {
$tokens = array_merge($tokens,array());
}
$tokens = array_merge($tokens, array());
$this->tokens = $tokens;
return true;
}

protected function isInlineTag($tag) {
Expand Down Expand Up @@ -470,22 +470,24 @@ protected function removeWhitespaceFromCharacter() {
$token = $tokens[$i];
$type = $token->getType();
if ($type === HTMLToken::StartTag) {
$isBeforeInline = $this->isInlineTag($token->getTagName());
switch ($token->getTagName()) {
$tagName = $token->getName();
$isBeforeInline = $this->isInlineTag($tagName);
switch ($tagName) {
case HTMLNames::scriptTag:
case HTMLNames::styleTag:
case HTMLNames::textareaTag:
case HTMLNames::preTag:
$isEditable = false;
$uneditableTag = $token->getTagName();
$uneditableTag = $tagName;
continue 2;
break;
default:
break;
}
} else if ($type === HTMLToken::EndTag) {
$isBeforeInline = $this->isInlineTag($token->getTagName());
if (!$isEditable && $token->getTagName() === $uneditableTag) {
$tagName = $token->getName();
$isBeforeInline = $this->isInlineTag($tagName);
if (!$isEditable && $tagName === $uneditableTag) {
$uneditableTag = null;
$isEditable = true;
continue;
Expand Down Expand Up @@ -535,12 +537,12 @@ protected function _removeWhitespaceFromCharacter($characters) {
$compactCharacters = '';
$hasWhiteSpace = false;

for ($i = 0, $len = mb_strlen($characters, static::ENCODING); $i < $len; $i++) {
$char = mb_substr($characters, $i, 1, static::ENCODING);
for ($i = 0, $len = strlen($characters); $i < $len; $i++) {
$char = $characters[$i];
if ($char === "\x0A") {
// remove before whitespace char
if ($hasWhiteSpace) {
$compactCharacters = mb_substr($compactCharacters, 0, -1, static::ENCODING);
$compactCharacters = substr($compactCharacters, 0, -1);
}
$compactCharacters .= $char;
$hasWhiteSpace = true;
Expand All @@ -567,7 +569,7 @@ protected function optimizeStartTagAttributes() {
}

$attributes_old = $token->getAttributes();
$attributes_new = array();
$attributes_new =array();
$attributes_name = array();

foreach ($attributes_old as $attribute) {
Expand All @@ -586,15 +588,10 @@ protected function optimizeStartTagAttributes() {
/**
* downlevel-hidden : <!--[if expression]> HTML <![endif]-->
* downlevel-revealed : <![if expression]> HTML <![endif]>
* @param HTMLToken $token
* @param string $comment
* @return bool
*/
protected function _isConditionalComment(HTMLToken $token) {
if ($token->getType() !== HTMLToken::Comment) {
return false;
}

$comment = $this->_buildElement($token);
protected function _isConditionalComment($comment) {
$pattern = '/\A<!(?:--)?\[if [^\]]+\]>/s';
if (preg_match($pattern, $comment)) {
return true;
Expand Down
3 changes: 2 additions & 1 deletion src/zz/Html/HTMLToken.php
Expand Up @@ -235,7 +235,8 @@ public function setState($states) {
}

public function getTagName() {
if ($this->getType() !== static::StartTag && $this->getType() !== static::EndTag) {
$type = $this->getType();
if ($type !== static::StartTag && $type !== static::EndTag) {
return false;
}
return $this->getName();
Expand Down

0 comments on commit 4ccb3db

Please sign in to comment.