diff --git a/src/Tokenizer.php b/src/Tokenizer.php
index 447266a..75150e6 100644
--- a/src/Tokenizer.php
+++ b/src/Tokenizer.php
@@ -27,7 +27,7 @@ public function getToken($string)
};
$token_patterns = [
"/^\s+/" => ['type' => 'whitespace', 'value_fn' => $returnMatches],
- "/^<.+?[\s]*\/?[\s]*>/" => [
+ "/^<.+>/" => [
'type' => 'html-tag',
'value_fn' => function($matches, $string) {
$matched = '';
diff --git a/tests/large_image.html b/tests/large_image.html
new file mode 100644
index 0000000..2c57d89
--- /dev/null
+++ b/tests/large_image.html
@@ -0,0 +1 @@
+
diff --git a/tests/lexer-test.php b/tests/lexer-test.php
index 94a7843..786af48 100644
--- a/tests/lexer-test.php
+++ b/tests/lexer-test.php
@@ -218,3 +218,62 @@
]
);
});
+
+Test::create('should tokenize crazy ass images', function (Test $test) {
+ $img_html = file_get_contents(__DIR__ . '/large_image.html');
+ $tokenizer = new Tokenizer;
+ $test->equals($tokenizer->tokenize($img_html), [
+ ['type' => 'html-tag', 'value' => trim($img_html)],
+ ['type' => 'whitespace', 'value' => "\n"],
+ ]);
+});
+
+Test::create('should tokenize multiple tags', function(Test $test) {
+ $tokenizer = new Tokenizer;
+ $tokens = $tokenizer->tokenize("
test
"); + $test->equals( + $tokens, + [ + ['type' => 'html-tag', 'value' => ''], + ['type' => 'word', 'value' => 'test'], + ['type' => 'html-tag', 'value' => '
'], + ] + ); +}); + +Test::create('should tokenize spaces before/after tag', function(Test $test) { + $tokenizer = new Tokenizer; + $tokens = $tokenizer->tokenize("< br >'], + ['type' => 'html-tag', 'value' => '