Skip to content

Commit

Permalink
[+]: added more tests from "https://github.com/soundasleep/html2text/…
Browse files Browse the repository at this point in the history
  • Loading branch information
Lars Moelleken committed Aug 19, 2016
1 parent 834c21d commit 5a64b0f
Show file tree
Hide file tree
Showing 9 changed files with 135 additions and 0 deletions.
44 changes: 44 additions & 0 deletions tests/MailTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,50 @@ public function testHtmlToText10()
self::assertSame($this->file_get_contents(__DIR__ . '/fixtures/test10Html.txt'), $text);
}

public function testHtmlToTextMsOffice()
{
$html = UTF8::file_get_contents(__DIR__ . '/fixtures/msoffice.html');

$html2text = new Html2Text($html, false, array('directConvert' => true));

$text = $html2text->getText();

self::assertSame($this->file_get_contents(__DIR__ . '/fixtures/msoffice.txt'), $text);
}

public function testHtmlToTextNbsp()
{
$html = UTF8::file_get_contents(__DIR__ . '/fixtures/nbsp.html');

$html2text = new Html2Text($html, false, array('directConvert' => true));

$text = $html2text->getText();

self::assertSame($this->file_get_contents(__DIR__ . '/fixtures/nbsp.txt'), $text);
}

public function testHtmlToTextNonBreakingSpace()
{
$html = UTF8::file_get_contents(__DIR__ . '/fixtures/non-breaking-spaces.html');

$html2text = new Html2Text($html, false, array('directConvert' => true));

$text = $html2text->getText();

self::assertSame($this->file_get_contents(__DIR__ . '/fixtures/non-breaking-spaces.txt'), $text);
}

public function testHtmlToTextTable()
{
$html = UTF8::file_get_contents(__DIR__ . '/fixtures/table.html');

$html2text = new Html2Text($html, false, array('directConvert' => true));

$text = $html2text->getText();

self::assertSame($this->file_get_contents(__DIR__ . '/fixtures/table.txt'), $text);
}

/**
* @param string $filename
*
Expand Down
1 change: 1 addition & 0 deletions tests/fixtures/msoffice.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:x="urn:schemas-microsoft-com:office:excel" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40"><head><META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=us-ascii"><meta name=Generator content="Microsoft Word 15 (filtered medium)"><style><!-- /* Font Definitions */ @font-face {font-family:"Cambria Math"; panose-1:2 4 5 3 5 4 6 3 2 4;} @font-face {font-family:Calibri; panose-1:2 15 5 2 2 2 4 3 2 4;} /* Style Definitions */ p.MsoNormal, li.MsoNormal, div.MsoNormal {margin:0cm; margin-bottom:.0001pt; font-size:11.0pt; font-family:"Calibri",sans-serif; mso-fareast-language:EN-US;} a:link, span.MsoHyperlink {mso-style-priority:99; color:#0563C1; text-decoration:underline;} a:visited, span.MsoHyperlinkFollowed {mso-style-priority:99; color:#954F72; text-decoration:underline;} span.EmailStyle17 {mso-style-type:personal-compose; font-family:"Calibri",sans-serif; color:windowtext;} .MsoChpDefault {mso-style-type:export-only; font-family:"Calibri",sans-serif; mso-fareast-language:EN-US;} @page WordSection1 {size:612.0pt 792.0pt; margin:72.0pt 72.0pt 72.0pt 72.0pt;} div.WordSection1 {page:WordSection1;} --></style><!--[if gte mso 9]><xml> <o:shapedefaults v:ext="edit" spidmax="1026" /> </xml><![endif]--><!--[if gte mso 9]><xml> <o:shapelayout v:ext="edit"> <o:idmap v:ext="edit" data="1" /> </o:shapelayout></xml><![endif]--></head><body lang=EN-GB link="#0563C1" vlink="#954F72"><div class=WordSection1><p class=MsoNormal>Dear html2text,<o:p></o:p></p><p class=MsoNormal><o:p>&nbsp;</o:p></p><p class=MsoNormal>This is an example email that can be used to test html2text conversion of outlook / exchange emails.<o:p></o:p></p><p class=MsoNormal><o:p>&nbsp;</o:p></p><p class=MsoNormal>The addition of &lt;o:p&gt; tags is very annoying!<o:p></o:p></p><p class=MsoNormal>This is a single line return<o:p></o:p></p><p class=MsoNormal><o:p>&nbsp;</o:p></p><p class=MsoNormal><b>This is bold<o:p></o:p></b></p><p class=MsoNormal><i>This is italic<o:p></o:p></i></p><p class=MsoNormal><u>This is underline<o:p></o:p></u></p><p class=MsoNormal><o:p>&nbsp;</o:p></p><p class=MsoNormal>Andrew<o:p></o:p></p></div></body></html>
15 changes: 15 additions & 0 deletions tests/fixtures/msoffice.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Dear html2text,

This is an example email that can be used to test html2text conversion of outlook / exchange emails.

The addition of tags is very annoying!

This is a single line return

THIS IS BOLD

_This is italic_

This is underline

Andrew
1 change: 1 addition & 0 deletions tests/fixtures/nbsp.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
hello &nbsp; world &amp; people &lt; &gt; &NBSP;
1 change: 1 addition & 0 deletions tests/fixtures/nbsp.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
hello world & people < >
1 change: 1 addition & 0 deletions tests/fixtures/non-breaking-spaces.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
these spaces are non-breaking
1 change: 1 addition & 0 deletions tests/fixtures/non-breaking-spaces.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
these spaces are non-breaking
53 changes: 53 additions & 0 deletions tests/fixtures/table.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
<html>
<title>Ignored Title</title>
<body>
<h1>Hello, World!</h1>
<table>
<thead>
<tr>
<th>Col A</th>
<th>Col B</th>
</tr>
</thead>
<tbody>
<tr>
<td>
Data A1
</td>
<td>
Data B1
</td>
</tr>
<tr>
<td>
Data A2
</td>
<td>
Data B2
</td>
</tr>
<tr>
<td>
Data A3
</td>
<td>
Data B4
</td>
</tr>
</tbody>
<tfoot>
<tr>
<td>
Total A
</td>
<td>
Total B
</td>
</tr>

</tfoot>

</table>

</body>
</html>
18 changes: 18 additions & 0 deletions tests/fixtures/table.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
Ignored Title

HELLO, WORLD!

COL A
COL B

Data A1
Data B1

Data A2
Data B2

Data A3
Data B4

Total A
Total B

0 comments on commit 5a64b0f

Please sign in to comment.