Skip to content

Commit

Permalink
[+]: optimize performance for processing "pre"-tags
Browse files Browse the repository at this point in the history
[+]: move "ignore links" into the config ("do_links_ignore")
[+]: add "markdown" to possible link-styles
[+]: add more tests
  • Loading branch information
Lars Moelleken committed Aug 12, 2016
1 parent cd26c46 commit ed82f91
Show file tree
Hide file tree
Showing 4 changed files with 2,051 additions and 7 deletions.
49 changes: 42 additions & 7 deletions src/Html2Text.php
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,10 @@ class Html2Text
* @var array
*/
private static $defaultOptions = array(
'do_links' => 'inline',
'width' => 0,
'elements' => array(
'do_links' => 'inline',
'do_links_ignore' => 'javascript:|mailto:|#',
'width' => 0,
'elements' => array(
'h1' => array(
'case' => self::OPTION_UPPERCASE,
'prepend' => "\n\n",
Expand Down Expand Up @@ -118,6 +119,10 @@ class Html2Text
'prepend' => '_',
'append' => '_',
),
'pre' => array(
'prepend' => '',
'append' => '',
)
),
);

Expand Down Expand Up @@ -295,6 +300,7 @@ public function setPrefixForImages($string)
*
* 'none'
* 'inline' (show links inline)
* 'markdown' (show links as markdown)
* 'nextline' (show links on the next line)
* 'table' (if a table of link URLs should be listed after the text.
* 'bbcode' (show links as bbcode)
Expand Down Expand Up @@ -600,9 +606,13 @@ protected function convertPre(&$text)

// Get the content of PRE element.
while (preg_match('/<pre[^>]*>(.*)<\/pre>/ismU', $text, $matches)) {

// Replace br tags with newlines to prevent the search-and-replace callback from killing whitespace.
$this->preContent = preg_replace('/(<br\b[^>]*>)/i', "\n", $matches[1]);

// Use options (append, prepend, ...) for the current "pre"-tag.
$this->preContent = $this->convertElement('<pre>' . $this->preContent . '</pre>', 'pre');

// Run our defined tags search-and-replace with callback.
$this->preContent = preg_replace_callback(
self::$callbackSearch,
Expand All @@ -617,7 +627,7 @@ protected function convertPre(&$text)
);

// replace the content
$text = preg_replace('/<pre[^>]*>.*<\/pre>/ismU', $this->preContent, $text, 1);
$text = str_replace($matches[0], $this->preContent, $text);

// free some memory
$this->preContent = '';
Expand Down Expand Up @@ -827,7 +837,7 @@ protected function buildLinkList($link, $display, $linkOverride = null)
}

// ignored link types
if (preg_match('!^(javascript:|mailto:|#)!i', $link)) {
if (preg_match('!^(' . $this->options['do_links_ignore'] . ')!i', $link)) {
return $display;
}

Expand All @@ -846,19 +856,44 @@ protected function buildLinkList($link, $display, $linkOverride = null)
}

if ($linkMethod === 'table') {

//
// table
//
if (($index = array_search($url, $this->linkList, true)) === false) {
$index = count($this->linkList);
$this->linkList[] = $url;
}

return ' ' . $display . ' [' . ($index + 1) . '] ';

} elseif ($linkMethod === 'nextline') {

//
// nextline
//
return ' ' . $display . "\n" . '[' . $url . '] ';

} elseif ($linkMethod === 'markdown') {

//
// markdown
//
return ' [' . $display . '](' . $url . ') ';

} elseif ($linkMethod === 'bbcode') {

//
// bbcode
//
return ' [url=' . $url . ']' . $display . '[/url] ';

} else {
// link_method defaults to inline

//
// inline (default)
//
return ' ' . $display . ' [' . $url . '] ';

}
}

Expand Down
30 changes: 30 additions & 0 deletions tests/MailTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,36 @@ public function testHtmlToText9()
self::assertSame($this->file_get_contents(__DIR__ . '/fixtures/test9Html.txt'), $text);
}

public function testHtmlToText10()
{
$html = UTF8::file_get_contents(__DIR__ . '/fixtures/test10Html.html');

$html2text = new Html2Text(
$html,
false,
array(
'directConvert' => true,
'do_links' => 'markdown',
'do_links_ignore' => 'javascript:|mailto:',
'elements' => array(
'pre' => array(
'prepend' => '```php' . "\n",
'append' => "\n" . '```',
),
'h5' => array(
'case' => Html2Text::OPTION_NONE,
'prepend' => "\n\n",
'append' => "\n\n",
),
)
)
);

$text = $html2text->getText();

self::assertSame($this->file_get_contents(__DIR__ . '/fixtures/test10Html.txt'), $text);
}

/**
* @param string $filename
*
Expand Down
Loading

0 comments on commit ed82f91

Please sign in to comment.