diff --git a/src/Tokenizer.php b/src/Tokenizer.php index 447266a..75150e6 100644 --- a/src/Tokenizer.php +++ b/src/Tokenizer.php @@ -27,7 +27,7 @@ public function getToken($string) }; $token_patterns = [ "/^\s+/" => ['type' => 'whitespace', 'value_fn' => $returnMatches], - "/^<.+?[\s]*\/?[\s]*>/" => [ + "/^<.+>/" => [ 'type' => 'html-tag', 'value_fn' => function($matches, $string) { $matched = ''; diff --git a/tests/large_image.html b/tests/large_image.html new file mode 100644 index 0000000..2c57d89 --- /dev/null +++ b/tests/large_image.html @@ -0,0 +1 @@ + diff --git a/tests/lexer-test.php b/tests/lexer-test.php index 94a7843..786af48 100644 --- a/tests/lexer-test.php +++ b/tests/lexer-test.php @@ -218,3 +218,62 @@ ] ); }); + +Test::create('should tokenize crazy ass images', function (Test $test) { + $img_html = file_get_contents(__DIR__ . '/large_image.html'); + $tokenizer = new Tokenizer; + $test->equals($tokenizer->tokenize($img_html), [ + ['type' => 'html-tag', 'value' => trim($img_html)], + ['type' => 'whitespace', 'value' => "\n"], + ]); +}); + +Test::create('should tokenize multiple tags', function(Test $test) { + $tokenizer = new Tokenizer; + $tokens = $tokenizer->tokenize("

test

"); + $test->equals( + $tokens, + [ + ['type' => 'html-tag', 'value' => '
'], + ['type' => 'html-tag', 'value' => '
'], + ['type' => 'html-tag', 'value' => '

'], + ['type' => 'word', 'value' => 'test'], + ['type' => 'html-tag', 'value' => '

'], + ] + ); +}); + +Test::create('should tokenize spaces before/after tag', function(Test $test) { + $tokenizer = new Tokenizer; + $tokens = $tokenizer->tokenize("< br >
< hr style='color: blue' >"); + $test->equals( + $tokens, + [ + ['type' => 'html-tag', 'value' => '< br >'], + ['type' => 'html-tag', 'value' => '
'], + ['type' => 'html-tag', 'value' => '< hr style=\'color: blue\' >'], + ] + ); +}); + +Test::create('should tokenize gt/lt in attributes', function(Test $test) { + $tokenizer = new Tokenizer; + $tokens = $tokenizer->tokenize("
"); + $test->equals($tokens, [ + ['type' => 'html-tag', 'value' => '
blue\'>'], + ]); +}); + +Test::create('nested html', function(Test $test) { + $tokenizer = new Tokenizer; + $tokens = $tokenizer->tokenize("


"); + $test->equals( + $tokens, + [ + ['type' => 'html-tag', 'value' => '

'], + ['type' => 'html-tag', 'value' => '


'], + ['type' => 'html-tag', 'value' => ''], + ['type' => 'html-tag', 'value' => '

'], + ] + ); +});