email_notifications: Fix inline-ing of image-URL-only messages.

fe25517 adjusted the email_notifications codepath to use `lxml.html.fragment_fromstring` method when parsing `rendered_content`, but left the tests using a helper which called `fromstring`. Switching the tests to match the code as run reveals a bug -- using `drop_tree` on all `message_inline_image` classes now _does_ remove all of a top-level image-URL-only message. Previously, such messages were "safe" from the block that calls `drop_tree` only by dint of `drop_tree` being a silent no-op for the root element. When parsed using `fragment_fromstring`, they are no longer the root, and as such an empty message results. Reorder relative_to_full_url to check for only one `message_inline_image` within the top `<div>`, and only run the `drop_tree` path in the alternate case. Tests must be adjusted for their output now including one more layer of `<div>`.
zulip · Sep 15, 2021 · 039b869 · 039b869
1 parent 0e8735a
commit 039b869
Show file tree

Hide file tree

Showing 2 changed files with 34 additions and 28 deletions.
diff --git a/zerver/lib/email_notifications.py b/zerver/lib/email_notifications.py
@@ -61,26 +61,30 @@ def relative_to_full_url(fragment: lxml.html.HtmlElement, base_url: str) -> None
             if elem.get("title") is not None:
                 elem.set("title", link)
 
-    # Inline images can't be displayed in the emails as the request
-    # from the mail server can't be authenticated because it has no
-    # user_profile object linked to it. So we scrub the inline image
-    # container.
-    inline_image_containers = fragment.find_class("message_inline_image")
-    for container in inline_image_containers:
-        container.drop_tree()
-
-    # The previous block handles most inline images, but for messages
-    # where the entire Markdown input was just the URL of an image
-    # (i.e. the entire body is a message_inline_image object), the
-    # entire message body will be that image element; here, we need a
-    # more drastic edit to the content.
-    if fragment.get("class") == "message_inline_image":
-        image_link = fragment.find("a").get("href")
-        image_title = fragment.find("a").get("title")
+    # Because we were parsed with fragment_fromstring, we are
+    # guaranteed there is a top-level <div>, and the original
+    # top-level contents are within that.
+    if len(fragment) == 1 and fragment[0].get("class") == "message_inline_image":
+        # The next block handles most inline images, but for messages
+        # where the entire Markdown input was just the URL of an image
+        # (i.e. the entire body is a message_inline_image object), the
+        # entire message body will be that image element; here, we need a
+        # more drastic edit to the content.
+        inner = fragment[0]
+        image_link = inner.find("a").get("href")
+        image_title = inner.find("a").get("title")
         title_attr = {} if image_title is None else {"title": image_title}
-        fragment.clear()
-        fragment.tag = "p"
-        fragment.append(E.A(image_link, href=image_link, target="_blank", **title_attr))
+        inner.clear()
+        inner.tag = "p"
+        inner.append(E.A(image_link, href=image_link, target="_blank", **title_attr))
+    else:
+        # Inline images can't be displayed in the emails as the request
+        # from the mail server can't be authenticated because it has no
+        # user_profile object linked to it. So we scrub the inline image
+        # container.
+        inline_image_containers = fragment.find_class("message_inline_image")
+        for container in inline_image_containers:
+            container.drop_tree()
 
     fragment.make_links_absolute(base_url)
 

diff --git a/zerver/tests/test_email_notifications.py b/zerver/tests/test_email_notifications.py
@@ -1257,7 +1257,7 @@ def test_multiple_stream_messages_different_topics(self) -> None:
 
     def test_relative_to_full_url(self) -> None:
         def convert(test_data: str) -> str:
-            fragment = lxml.html.fromstring(test_data)
+            fragment = lxml.html.fragment_fromstring(test_data, create_parent=True)
             relative_to_full_url(fragment, "http://example.com")
             return lxml.html.tostring(fragment, encoding="unicode")
 
@@ -1287,24 +1287,26 @@ def convert(test_data: str) -> str:
         # A path similar to our emoji path, but not in a link:
         test_data = "<p>Check out the file at: '/static/generated/emoji/images/emoji/'</p>"
         actual_output = convert(test_data)
-        expected_output = "<p>Check out the file at: '/static/generated/emoji/images/emoji/'</p>"
+        expected_output = (
+            "<div><p>Check out the file at: '/static/generated/emoji/images/emoji/'</p></div>"
+        )
         self.assertEqual(actual_output, expected_output)
 
         # An uploaded file
         test_data = '<a href="/user_uploads/{realm_id}/1f/some_random_value">/user_uploads/{realm_id}/1f/some_random_value</a>'
         test_data = test_data.format(realm_id=zephyr_realm.id)
         actual_output = convert(test_data)
         expected_output = (
-            '<a href="http://example.com/user_uploads/{realm_id}/1f/some_random_value">'
-            + "/user_uploads/{realm_id}/1f/some_random_value</a>"
+            '<div><a href="http://example.com/user_uploads/{realm_id}/1f/some_random_value">'
+            + "/user_uploads/{realm_id}/1f/some_random_value</a></div>"
         )
         expected_output = expected_output.format(realm_id=zephyr_realm.id)
         self.assertEqual(actual_output, expected_output)
 
         # A profile picture like syntax, but not actually in an HTML tag
         test_data = '<p>Set src="/avatar/username@example.com?s=30"</p>'
         actual_output = convert(test_data)
-        expected_output = '<p>Set src="/avatar/username@example.com?s=30"</p>'
+        expected_output = '<div><p>Set src="/avatar/username@example.com?s=30"</p></div>'
         self.assertEqual(actual_output, expected_output)
 
         # A narrow URL which begins with a '#'.
@@ -1314,8 +1316,8 @@ def convert(test_data: str) -> str:
         )
         actual_output = convert(test_data)
         expected_output = (
-            '<p><a href="http://example.com/#narrow/stream/test/topic/test.20topic/near/142" '
-            + 'title="http://example.com/#narrow/stream/test/topic/test.20topic/near/142">Conversation</a></p>'
+            '<div><p><a href="http://example.com/#narrow/stream/test/topic/test.20topic/near/142" '
+            + 'title="http://example.com/#narrow/stream/test/topic/test.20topic/near/142">Conversation</a></p></div>'
         )
         self.assertEqual(actual_output, expected_output)
 
@@ -1345,9 +1347,9 @@ def convert(test_data: str) -> str:
         )
         actual_output = convert(test_data)
         expected_output = (
-            '<p><a href="https://www.google.com/images/srpr/logo4w.png" '
+            '<div><p><a href="https://www.google.com/images/srpr/logo4w.png" '
             + 'target="_blank" title="https://www.google.com/images/srpr/logo4w.png">'
-            + "https://www.google.com/images/srpr/logo4w.png</a></p>"
+            + "https://www.google.com/images/srpr/logo4w.png</a></p></div>"
         )
         self.assertEqual(actual_output, expected_output)