Skip to content

Commit

Permalink
Fix issue with regex limit getting hit
Browse files Browse the repository at this point in the history
Since we added logic that does not rely on the matches from the regex
we can simplify everything by just matching on html tags
  • Loading branch information
zacharymarshal committed May 19, 2017
1 parent e9fa3e9 commit 46f7a5d
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/Tokenizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public function getToken($string)
};
$token_patterns = [
"/^\s+/" => ['type' => 'whitespace', 'value_fn' => $returnMatches],
"/^<.+?[\s]*\/?[\s]*>/" => [
"/^<.+>/" => [
'type' => 'html-tag',
'value_fn' => function($matches, $string) {
$matched = '';
Expand Down
1 change: 1 addition & 0 deletions tests/large_image.html

Large diffs are not rendered by default.

59 changes: 59 additions & 0 deletions tests/lexer-test.php
Original file line number Diff line number Diff line change
Expand Up @@ -218,3 +218,62 @@
]
);
});

Test::create('should tokenize crazy ass images', function (Test $test) {
$img_html = file_get_contents(__DIR__ . '/large_image.html');
$tokenizer = new Tokenizer;
$test->equals($tokenizer->tokenize($img_html), [
['type' => 'html-tag', 'value' => trim($img_html)],
['type' => 'whitespace', 'value' => "\n"],
]);
});

Test::create('should tokenize multiple tags', function(Test $test) {
$tokenizer = new Tokenizer;
$tokens = $tokenizer->tokenize("<br><br /><p>test</p>");
$test->equals(
$tokens,
[
['type' => 'html-tag', 'value' => '<br>'],
['type' => 'html-tag', 'value' => '<br />'],
['type' => 'html-tag', 'value' => '<p>'],
['type' => 'word', 'value' => 'test'],
['type' => 'html-tag', 'value' => '</p>'],
]
);
});

Test::create('should tokenize spaces before/after tag', function(Test $test) {
$tokenizer = new Tokenizer;
$tokens = $tokenizer->tokenize("< br ><br / >< hr style='color: blue' >");
$test->equals(
$tokens,
[
['type' => 'html-tag', 'value' => '< br >'],
['type' => 'html-tag', 'value' => '<br / >'],
['type' => 'html-tag', 'value' => '< hr style=\'color: blue\' >'],
]
);
});

Test::create('should tokenize gt/lt in attributes', function(Test $test) {
$tokenizer = new Tokenizer;
$tokens = $tokenizer->tokenize("<hr style='color: >blue'>");
$test->equals($tokens, [
['type' => 'html-tag', 'value' => '<hr style=\'color: >blue\'>'],
]);
});

Test::create('nested html', function(Test $test) {
$tokenizer = new Tokenizer;
$tokens = $tokenizer->tokenize("<p><hr/><img/></p>");
$test->equals(
$tokens,
[
['type' => 'html-tag', 'value' => '<p>'],
['type' => 'html-tag', 'value' => '<hr/>'],
['type' => 'html-tag', 'value' => '<img/>'],
['type' => 'html-tag', 'value' => '</p>'],
]
);
});

0 comments on commit 46f7a5d

Please sign in to comment.