Skip to content

Commit

Permalink
[+]: merge changes from yeahbert/html2text "elementConversionOptions"
Browse files Browse the repository at this point in the history
  • Loading branch information
Lars Moelleken committed Jun 21, 2016
1 parent 057138c commit 552bd09
Show file tree
Hide file tree
Showing 9 changed files with 394 additions and 97 deletions.
311 changes: 231 additions & 80 deletions src/Html2Text.php

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions tests/BlockquoteTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,8 @@ public function testMalformedHtmlBlockquotes()
$expected = <<<'EOT'
Before
> Foo1 foo1 foo1 Foo2 foo2 foo2
Bar bar bar Before-After-1
> Foo1 foo1 foo1Foo2 foo2 foo2
Bar bar barBefore-After-1
> Before-After-2
Before-After-3 Before-After-4
Expand Down
101 changes: 101 additions & 0 deletions tests/ElementsTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
<?php

namespace Html2Text;

use voku\Html2Text\Html2Text;

class ElementsTest extends \PHPUnit_Framework_TestCase
{

public function testPrependAndAppend()
{
$html = <<<EOT
<h1>Should have "AAA " prepended</h1>
<h4>Should have " BBB" appended</h4>
<h6>Should have "AAA " prepended and " BBB" appended</h6>
<li>Dash instead of asterisk</li>
EOT;

$expected = <<<EOT
AAA Should have "AAA " prepended
Should have " BBB" appended BBB
AAA Should have "AAA " prepended and " BBB" appended BBB
- Dash instead of asterisk
EOT;

$html2text = new Html2Text(
$html,
array(
'elements' => array(
'h1' => array('case' => Html2Text::OPTION_NONE, 'prepend' => "\nAAA "),
'h4' => array('case' => Html2Text::OPTION_NONE, 'append' => " BBB\n"),
'h6' => array('case' => Html2Text::OPTION_NONE, 'prepend' => "\nAAA ", 'append' => " BBB\n"),
'li' => array('prepend' => "\n\t- "),
),
)
);

self::assertEquals($this->normalizeString($expected), $html2text->getText());
}

public function testReplace()
{
$html = <<<EOT
<h1>Should have "AAA" changed to BBB</h1>
<li>• Custom bullet should be removed</li>
EOT;

$expected = <<<EOT
Should have "BBB" changed to BBB
* Custom bullet should be removed
EOT;

$html2text = new Html2Text(
$html,
array(
'width' => 0,
'elements' => array(
'h1' => array('case' => Html2Text::OPTION_NONE, 'replace' => array('AAA', 'BBB')),
'li' => array('replace' => array('', '')),
),
)
);

self::assertEquals($this->normalizeString($expected), $html2text->getText());
}

/**
* @dataProvider getSpacesData
*
* @param $expected
* @param $html
*/
public function testTrimSpaces($expected, $html)
{
$html2text = new Html2Text($html);

self::assertEquals($expected, $html2text->getText());
}

/**
* @see testTrimSpaces
* @return array
*/
public function getSpacesData()
{
return array(
array($this->normalizeString('BOLD WITH SPACE: Rest of text'), "<b>Bold with space: </b>Rest of text"),
array($this->normalizeString("BOLD WITH SPACE: Rest of text"), " <p> <b>Bold with space: </b>Rest of text </p> "),
array($this->normalizeString("BOLD WITH SPACE: Rest of text"), " <p> <b>Bold with space: </b>Rest of text </p> "),
);
}

protected function normalizeString($string)
{
return str_replace(array("\r\n", "\r"), "\n", $string);
}
}
56 changes: 56 additions & 0 deletions tests/HeadingsTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
<?php
namespace Html2Text;

use voku\Html2Text\Html2Text;

/**
* @copyright ResearchGate GmbH
*/
class HeadingsTest extends \PHPUnit_Framework_TestCase
{

public function testHeadings()
{
$html = <<<EOT
<h1>Will be UTF-8 (äöüèéилčλ) lowercased</h1>
<h2>Will be UTF-8 (äöüèéилčλ) ucfirst</h2>
<h3>Will be UTF-8 (äöüèéилčλ) titled</h3>
<h5>Will be UTF-8 (äöüèéилčλ) uppercased as default</h5>
<h6>Will be UTF-8 (äöüèéилčλ) uppercased</h6>
<p>Will remain lowercased</p>
EOT;
$expected = <<<EOT
will be utf-8 (äöüèéилčλ) lowercased
Will be utf-8 (äöüèéилčλ) ucfirst
Will Be Utf-8 (Äöüèéилčλ) Titled
WILL BE UTF-8 (ÄÖÜÈÉИЛČΛ) UPPERCASED AS DEFAULT
WILL BE UTF-8 (ÄÖÜÈÉИЛČΛ) UPPERCASED
Will remain lowercased
EOT;

$html2text = new Html2Text(
$html,
array(
'elements' => array(
'h1' => array('case' => Html2Text::OPTION_LOWERCASE),
'h2' => array('case' => Html2Text::OPTION_UCFIRST),
'h3' => array('case' => Html2Text::OPTION_TITLE),
'h6' => array('case' => Html2Text::OPTION_UPPERCASE),
),
)
);
$output = $html2text->getText();

self::assertEquals($this->normalizeString($expected), $output);
}

protected function normalizeString($string)
{
return str_replace(array("\r\n", "\r"), "\n", $string);
}
}
3 changes: 1 addition & 2 deletions tests/ListItemsTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,7 @@ public function testMultiLevelUnorderedList()
EOT;
$expected_output = <<<EOT
* Coffee
* Tea
Tea
* Black tea
* Green tea
Expand Down
4 changes: 2 additions & 2 deletions tests/UppercaseTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ public function testNoUppercase()
EOT;

$expected = <<<'EOT'
Heading 1
HEADING 1
Data 1
Heading 2
HEADING 2
Data 2
EOT;

Expand Down
3 changes: 1 addition & 2 deletions tests/fixtures/test3Html.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ to strip the text of any formatting and reset your style.
Follow on Twitter [/*|TWITTER:PROFILEURL|*] Friend on Facebook
[/*|FACEBOOK:PROFILEURL|*] Forward to Friend [/*|FORWARD|*]

_Copyright © *|CURRENT_YEAR|* *|LIST:COMPANY|*, All rights
reserved._
_Copyright © *|CURRENT_YEAR|* *|LIST:COMPANY|*, All rights reserved._
*|IFNOT:ARCHIVE_PAGE|* *|LIST:DESCRIPTION|*

OUR MAILING ADDRESS IS:
Expand Down
5 changes: 0 additions & 5 deletions tests/fixtures/test7Html.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ Can't see images below? View this email in a Web browser.




image: "github & Co®"
[http://cl.exct.net/?ju=fe36107270610579741572&ls=fdfe1d707261057c76127175&m=fef61277726104&l=fe8b15777d6d027d70&s=fdf315747d6d077c771c7576&jb=ff241779726c&t=]

Expand All @@ -25,7 +24,6 @@ image: "GIFT CARD"
image: "BACK TO SCHOOL & FALL PREVIEW SALE"
[http://cl.exct.net/?ju=fe34107270610579741673&ls=fdfe1d707261057c76127175&m=fef61277726104&l=fe8b15777d6d027d70&s=fdf315747d6d077c771c7576&jb=ff241779726c&t=]


image: "BACK TO SCHOOL & FALL PREVIEW SALE"
[http://cl.exct.net/?ju=fe34107270610579741673&ls=fdfe1d707261057c76127175&m=fef61277726104&l=fe8b15777d6d027d70&s=fdf315747d6d077c771c7576&jb=ff241779726c&t=]

Expand All @@ -45,19 +43,16 @@ image: "FIND A CENTER"
image: "LEVI'
[http://cl.exct.net/?ju=fe31107270610579741676&ls=fdfe1d707261057c76127175&m=fef61277726104&l=fe8b15777d6d027d70&s=fdf315747d6d077c771c7576&jb=ff241779726c&t=]


image: "crocs™ Find Your Fun™ Take Crocs Comfort to Class July
21st - August 2nd Enjoy $15 OFF your purchase of $60 or more Can be
combined with in-store promotions! Visit the VIP Lounge to access this
offer."
[http://cl.exct.net/?ju=fe2f107270610579741678&ls=fdfe1d707261057c76127175&m=fef61277726104&l=fe8b15777d6d027d70&s=fdf315747d6d077c771c7576&jb=ff241779726c&t=]


image: "Simon Giftcard® Gift giving has its perks Available on-site
and online"
[http://cl.exct.net/?ju=fe36107270610579741671&ls=fdfe1d707261057c76127175&m=fef61277726104&l=fe8b15777d6d027d70&s=fdf315747d6d077c771c7576&jb=ff241779726c&t=]


image: "BE THE FIRST TO KNOW"

image: "FACEBOOK"
Expand Down
4 changes: 0 additions & 4 deletions tests/fixtures/test9Html.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,8 @@ Sie:
image: "Aktuelle Ausgabe"
[https://www.henkel-lifetimes.de/mail_rdir.php?rid=123456&uid=&guid=12345678]


[https://www.henkel-lifetimes.de/mail_rdir.php?rid=123456&uid=&guid=12345678]


Neu: Rabatt-Coupons

Entdecken Sie jetzt unsere neuen Rabatt-Coupons: Einfach ausdrucken
Expand All @@ -23,7 +21,6 @@ und beim nächsten Einkauf bares Geld sparen.
Mehr »
[https://www.henkel-lifetimes.de/mail_rdir.php?rid=123456&uid=&guid=12345678]


Sie erhalten diese Mail aufgrund Ihrer Anmeldung bei der Frage der
Woche auf Henkel Lifetimes. Sollten Sie nicht mehr benachrichtigt
werden wollen, können Sie sich hier abmelden:
Expand All @@ -33,7 +30,6 @@ NEWSLETTER ABBESTELLEN
| IMPRESSUM
[https://www.henkel-lifetimes.de/mail_rdir.php?rid=123456&uid=&guid=12345678]


© 2015 Henkel AG & Co. KGaA

image: "Henkel"
Expand Down

0 comments on commit 552bd09

Please sign in to comment.