Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

html2vimdoc: Ignore HTML comments

  • Loading branch information...
commit d3df4d21e3d356ba1decb7f20441c038cc2395af 1 parent f94cb80
@xolox authored
Showing with 10 additions and 3 deletions.
  1. +10 −3 html2vimdoc.py
View
13 html2vimdoc.py
@@ -3,7 +3,7 @@
# Convert HTML (and Markdown) documents to Vim help files
#
# Author: Peter Odding <peter@peterodding.com>
-# Last Change: June 1, 2013
+# Last Change: June 2, 2013
# URL: http://peterodding.com/code/vim/tools/
#
# Missing features:
@@ -67,7 +67,7 @@
# External dependency, install with:
# sudo apt-get install python-beautifulsoup
# pip install beautifulsoup
-from BeautifulSoup import BeautifulSoup, NavigableString, UnicodeDammit
+from BeautifulSoup import BeautifulSoup, NavigableString, Comment, UnicodeDammit
# External dependency, install with:
# pip install coloredlogs
@@ -185,10 +185,10 @@ def html2vimdoc(html, title='', filename='', url='', content_selector='#content'
tree = BeautifulSoup(html, convertEntities=BeautifulSoup.ALL_ENTITIES)
logger.info("Transforming contents ..")
title = select_title(tree, title)
+ ignore_comments(tree)
ignore_given_selectors(tree, selectors_to_ignore)
root = find_root_node(tree, content_selector)
simple_tree = simplify_node(root)
- #make_parents_explicit(simple_tree)
shift_headings(simple_tree)
find_references(simple_tree, url)
# Add an "Introduction" heading to separate the table of contents from the
@@ -312,6 +312,13 @@ def find_root_node(tree, selector):
# Don't break when html.body doesn't exist.
return tree
+def ignore_comments(tree):
+ """
+ Remove HTML comments from the parse tree generated by BeautifulSoup.
+ """
+ for html_node in tree.findAll(text = lambda n: isinstance(n, Comment)):
+ html_node.extract()
+
def ignore_given_selectors(tree, selectors_to_ignore):
"""
Remove all HTML elements matching any of the CSS selectors provided by
Please sign in to comment.
Something went wrong with that request. Please try again.