Skip to content

Commit

Permalink
[-]: don't use static cache
Browse files Browse the repository at this point in the history
[-]: clean-up "Html2Text"
[+]: added some more tests
  • Loading branch information
Lars Moelleken committed Jun 29, 2016
1 parent dae815c commit 2298621
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 50 deletions.
61 changes: 11 additions & 50 deletions src/Html2Text.php
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,13 @@ class Html2Text
* @var array
*/
private static $caseModeMapping = array(
self::OPTION_LOWERCASE => MB_CASE_LOWER,
self::OPTION_UPPERCASE => MB_CASE_UPPER,
self::OPTION_UCFIRST => MB_CASE_LOWER,
self::OPTION_TITLE => MB_CASE_TITLE,
self::OPTION_LOWERCASE => \MB_CASE_LOWER,
self::OPTION_UPPERCASE => \MB_CASE_UPPER,
self::OPTION_UCFIRST => \MB_CASE_LOWER,
self::OPTION_TITLE => \MB_CASE_TITLE,
);

private static $defaultOptions = array(
'do_upper' => true,
'do_underscores' => true,
'do_links' => 'inline',
'width' => 70,
Expand Down Expand Up @@ -258,9 +257,6 @@ public function setPrefixForImages($string)
*
*
* ---------------------->
* do_upper:
* Convert strong and bold to uppercase?
* ---------------------->
* do_underscores:
* Surround emphasis and italics with underscores?
* ---------------------->
Expand Down Expand Up @@ -422,17 +418,11 @@ protected function convert()
*/
protected function converter(&$text)
{
static $searchReplaceArrayKeys = null;
static $searchReplaceArrayValues = null;

static $endSearchReplaceArrayKeys = null;
static $endSearchReplaceArrayValues = null;

$searchReplaceArrayKeys = ($searchReplaceArrayKeys === null ? array_keys($this->searchReplaceArray) : $searchReplaceArrayKeys);
$searchReplaceArrayValues = ($searchReplaceArrayValues === null ? array_values($this->searchReplaceArray) : $searchReplaceArrayValues);
$searchReplaceArrayKeys = array_keys($this->searchReplaceArray);
$searchReplaceArrayValues = array_values($this->searchReplaceArray);

$endSearchReplaceArrayKeys = ($endSearchReplaceArrayKeys === null ? array_keys($this->endSearchReplaceArray) : $endSearchReplaceArrayKeys);
$endSearchReplaceArrayValues = ($endSearchReplaceArrayValues === null ? array_values($this->endSearchReplaceArray) : $endSearchReplaceArrayValues);
$endSearchReplaceArrayKeys = array_keys($this->endSearchReplaceArray);
$endSearchReplaceArrayValues = array_values($this->endSearchReplaceArray);

// convert <BLOCKQUOTE> (before PRE!)
$this->convertBlockquotes($text);
Expand All @@ -458,12 +448,9 @@ protected function converter(&$text)
// run our defined entities/characters search-and-replace
$text = preg_replace($endSearchReplaceArrayKeys, $endSearchReplaceArrayValues, $text);

// replace known html entities
// replace known html entities + UTF-8 codepoints
$text = UTF8::html_entity_decode($text);

// replace html entities which represent UTF-8 codepoints.
$text = preg_replace_callback("/&#\d{2,5};/", array($this, 'entityCallback'), $text);

// remove unknown/unhandled entities (this cannot be done in search-and-replace block)
$text = preg_replace('/&[a-zA-Z0-9]{2,6};/', '', $text);

Expand Down Expand Up @@ -780,33 +767,7 @@ private function convertElement($str, $element)
protected function entityCallback(&$matches)
{
// Convert from HTML-ENTITIES to UTF-8
return mb_convert_encoding($matches[0], 'UTF-8', 'HTML-ENTITIES');
}

/**
* "strtoupper" function with HTML tags and entities handling.
*
* @param string $str Text to convert
*
* @return string Converted text
*/
protected function toupper($str)
{
if ($this->options['do_upper'] !== true) {
return $str;
}

// string can contain HTML tags
$chunks = preg_split('/(<[^>]*>)/', $str, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);

// convert toupper only the text between HTML tags
foreach ($chunks as $i => &$chunk) {
if ($chunk[0] !== '<') {
$chunk = UTF8::strtoupper($chunk);
}
}

return implode($chunks);
return \mb_convert_encoding($matches[0], 'UTF-8', 'HTML-ENTITIES');
}

/**
Expand Down Expand Up @@ -887,6 +848,6 @@ protected function pregPreCallback()
private function legacyConstruct($html = '', $fromFile = false, array $options = array())
{
$this->set_html($html, $fromFile);
$this->options = array_merge(self::$defaultOptions, $options);
$this->options = array_replace_recursive(self::$defaultOptions, $options);
}
}
26 changes: 26 additions & 0 deletions tests/ElementsTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,32 @@ public function testReplace()
);

self::assertEquals($this->normalizeString($expected), $html2text->getText());

// -----------

$html = '<h1>Should have "AAA" changed to BBB</h1><ul><li>• Custom bullet should be removed</li></ul><img alt="The Linux Tux" src="tux.png" />';
$expected = 'SHOULD HAVE "BBB" CHANGED TO BBB' . "\n\n" . '- Custom bullet should be removed |' . "\n\n" . '[IMAGE]: "The Linux Tux"';

$html2text = new Html2Text(
$html,
array(
'width' => 0,
'elements' => array(
'h1' => array(
'case' => Html2Text::OPTION_UPPERCASE,
'replace' => array('AAA', 'BBB')),
'li' => array(
'case' => Html2Text::OPTION_NONE,
'replace' => array('', ''),
'prepend' => "- ",
'append' => " |",
),
),
)
);

$html2text->setPrefixForImages('[IMAGE]: ');
self::assertEquals($this->normalizeString($expected), $html2text->getText());
}

/**
Expand Down
8 changes: 8 additions & 0 deletions tests/ImageTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@ public function testShowAltText()
self::assertEquals("image: \"This is our cool logo\"\nimage: 'This is our cool logo'", $html->getText());
}

public function testEditImagePreText()
{
$html = new Html2Text("<img id=\"head\" class=\"header\" src=\"imgs/logo.png\" alt=\"This is our cool logo\" />\n <br/>\n\n <img id=\"head\" class=\"header\" src=\"imgs/logo.png\" alt='This is our cool logo' data-foo=\"bar\">");
$html->setPrefixForImages('Bild: ');

self::assertEquals("Bild: \"This is our cool logo\"\nBild: 'This is our cool logo'", $html->getText());
}

/**
* @return array
*/
Expand Down

0 comments on commit 2298621

Please sign in to comment.