diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py index a698814db..795e29ba1 100644 --- a/markdown/htmlparser.py +++ b/markdown/htmlparser.py @@ -131,6 +131,9 @@ def handle_starttag(self, tag, attrs): self._cache.append(text) else: self.cleandoc.append(text) + if tag in self.CDATA_CONTENT_ELEMENTS: + # This is presumably a standalone tag in a code span (see #1036). + self.clear_cdata_mode() def handle_endtag(self, tag): text = self.get_endtag_text(tag) @@ -207,3 +210,63 @@ def handle_pi(self, data): def unknown_decl(self, data): end = ']]>' if data.startswith('CDATA[') else ']>' self.handle_empty_tag(''.""" + return self.__starttag_text + + def parse_starttag(self, i): + self.__starttag_text = None + endpos = self.check_for_whole_start_tag(i) + if endpos < 0: + return endpos + rawdata = self.rawdata + self.__starttag_text = rawdata[i:endpos] + + # Now parse the data between i+1 and j into a tag and attrs + attrs = [] + match = htmlparser.tagfind_tolerant.match(rawdata, i+1) + assert match, 'unexpected call to parse_starttag()' + k = match.end() + self.lasttag = tag = match.group(1).lower() + while k < endpos: + m = htmlparser.attrfind_tolerant.match(rawdata, k) + if not m: + break + attrname, rest, attrvalue = m.group(1, 2, 3) + if not rest: + attrvalue = None + elif attrvalue[:1] == '\'' == attrvalue[-1:] or \ + attrvalue[:1] == '"' == attrvalue[-1:]: + attrvalue = attrvalue[1:-1] + if attrvalue: + attrvalue = htmlparser.unescape(attrvalue) + attrs.append((attrname.lower(), attrvalue)) + k = m.end() + + end = rawdata[k:endpos].strip() + if end not in (">", "/>"): + lineno, offset = self.getpos() + if "\n" in self.__starttag_text: + lineno = lineno + self.__starttag_text.count("\n") + offset = len(self.__starttag_text) \ + - self.__starttag_text.rfind("\n") + else: + offset = offset + len(self.__starttag_text) + self.handle_data(rawdata[i:endpos]) + return endpos + if end.endswith('/>'): + # XHTML-style empty tag: + self.handle_startendtag(tag, attrs) + else: + # *** set cdata_mode first so we can override it in handle_starttag (see #1036) *** + if tag in self.CDATA_CONTENT_ELEMENTS: + self.set_cdata_mode(tag) + self.handle_starttag(tag, attrs) + return endpos diff --git a/tests/test_syntax/blocks/test_html_blocks.py b/tests/test_syntax/blocks/test_html_blocks.py index 11884e186..3b4ad3643 100644 --- a/tests/test_syntax/blocks/test_html_blocks.py +++ b/tests/test_syntax/blocks/test_html_blocks.py @@ -1363,4 +1363,42 @@ def test_unclosed_script_tag(self): Still part of the *script* tag """ ) + ) + + def test_inline_script_tags(self): + # Ensure inline script tags doesn't cause the parser to eat content (see #1036). + self.assertMarkdownRenders( + self.dedent( + """ + Text `` tag. + """ + ), + self.dedent( + """ +

Text <script> more text.

+
+ *foo* +
+ +
+ + bar + +
+ +

A new paragraph with a closing </script> tag.

+ """ + ) ) \ No newline at end of file