# Comparing Content Versions

## 1. difflib

In [8]:
from difflib import *
from IPython.display import display, HTML

# HTML Example
original = '''
  <html>
    <body>
      <h1>Title</h1>
      <p>Lorem ipsuor dilluting est</p>
      <p>Non sensium</p>
    </body>
  </html>
'''

changed = '''
  <html>
    <body>
      <h1>Big Title</h1>
      <p>Lorem ipsum <i>dilletandi</i> est</p>
    </body>
  </html>
'''

diff = HtmlDiff().make_file(original.splitlines(), changed.splitlines())
# print(diff)

display(HTML(diff))


0,1,2,3,4,5
f,1,,f,1.0,
,2,<html>,,2.0,<html>
,3,<body>,,3.0,<body>
t,4,<h1>Title</h1>,t,4.0,<h1>Big Title</h1>
,5,<p>Lorem ipsuor dilluting est</p>,,5.0,<p>Lorem ipsum <i>dilletandi</i> est</p>
,6,<p>Non sensium</p>,,,
,7,</body>,,6.0,</body>
,8,</html>,,7.0,</html>

Legends,Legends.1
Colors Added Changed Deleted,Links (f)irst change (n)ext change (t)op

Colors
Added
Changed
Deleted

Links,Links.1
(f)irst change,
(n)ext change,
(t)op,


In [9]:
from bs4 import BeautifulSoup
from difflib import HtmlDiff
from IPython.display import display, HTML

# Extract text content from HTML
soup_original = BeautifulSoup(original, 'html.parser')
soup_changed = BeautifulSoup(changed, 'html.parser')

text_original = soup_original.get_text()
text_changed = soup_changed.get_text()

# Compare the text content
diff_text = HtmlDiff().make_file(text_original.splitlines(), text_changed.splitlines())

# Display the diff
display(HTML(diff_text))

0,1,2,3,4,5
f,1,,f,1.0,
,2,,,2.0,
,3,,,3.0,
t,4,Title,t,4.0,Big Title
,5,Lorem ipsuor dilluting est,,5.0,Lorem ipsum dilletandi est
,6,Non sensium,,,
,7,,,6.0,
,8,,,7.0,

Legends,Legends.1
Colors Added Changed Deleted,Links (f)irst change (n)ext change (t)op

Colors
Added
Changed
Deleted

Links,Links.1
(f)irst change,
(n)ext change,
(t)op,


## 2. htmldiff2

* Source: https://github.com/edsu/htmldiff2
* Install: `./pip install htmldiff2`

In [12]:
from htmldiff2 import render_html_diff
import re
from IPython.display import display, HTML

original = ''''<div class="ZMSDocument"><h1>First Document<small></small></h1></div>\n<!-- ZMSDocument.standard_html -->\n\n<!-- bodyContentZMSLib_page -->\n\n\t\n\t\t<!-- ZMSTextarea.standard_html -->\n\n<p>Lorem ipsum sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. &nbsp;&nbsp;</p>\n\n<p><strong>Lorem ipsum dolor&nbsp;</strong></p>\n\n<p>Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. &nbsp;Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex ea commodo consequat. Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. &nbsp;&nbsp;</p>\n\n<p><strong>vulputate velit &nbsp;molestie consequat</strong></p>\n\n<p>Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. &nbsp;Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex ea commodo consequat. Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. &nbsp;&nbsp;</p>\n\n<!-- /ZMSTextarea.standard_html --><!-- ZMSTextarea.standard_html -->\n\n<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>\n\n<!-- /ZMSTextarea.standard_html -->\n\t\n\t\n\n<!-- /bodyContentZMSLib_page -->\n\n<!-- /ZMSDocument.standard_html -->'''
changed = ''''<div class="ZMSDocument"><h1>Second Document<small></small></h1></div>\n<!-- ZMSDocument.standard_html -->\n\n<!-- bodyContentZMSLib_page -->\n\n\t\n\t\t<!-- ZMSTextarea.standard_html -->\n\n<p>Lorem ipsum sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. &nbsp;&nbsp;</p>\n\n<p><strong>Lorem ipsum dolor&nbsp;</strong></p>\n\n<p>Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. &nbsp;Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex ea commodo consequat. Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. &nbsp;&nbsp;</p>\n\n<p><strong>vulputate velit &nbsp;molestie consequat</strong></p>\n\n<p>Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. &nbsp;Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex ea commodo consequat. Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. &nbsp;&nbsp;</p>\n\n<!-- /ZMSTextarea.standard_html --><!-- ZMSTextarea.standard_html -->\n\n<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>\n\n<!-- /ZMSTextarea.standard_html -->\n\t\n\t\n\n<!-- /bodyContentZMSLib_page -->\n\n<!-- /ZMSDocument.standard_html -->'''
# Remove the HTML comments
original = re.sub(r'<!--.*?-->', '', original, flags=re.DOTALL)
changed = re.sub(r'<!--.*?-->', '', changed, flags=re.DOTALL)


In [13]:
diff = render_html_diff(original,changed)
#print(diff)
display(HTML(diff))