Merge pull request #725 from xhtml2pdf/bugfix/image-identify

Remove stack trace when images cannot be identified
xhtml2pdf · Oct 9, 2023 · 6a50d43 · 6a50d43
2 parents 96ec58d + f8888ba
commit 6a50d43
Show file tree

Hide file tree

Showing 13 changed files with 592 additions and 486 deletions.
diff --git a/tests/samples/img/zero_width.gif b/tests/samples/img/zero_width.gif
diff --git a/tests/test_document.py b/tests/test_document.py
@@ -175,6 +175,26 @@ def test_document_with_broken_image(self) -> None:
                 ],
             )
 
+    def test_document_cannot_identify_image(self) -> None:
+        """Test that images which cannot be identified don't cause stack trace to be printed"""
+        image_path = "https://raw.githubusercontent.com/python-pillow/Pillow/7921da54a73dd4a30c23957369b79cda176005c6/Tests/images/zero_width.gif"
+        extra_html = f'<img src="{image_path}">'
+        with open(os.devnull, "wb") as pdf_file, self.assertLogs(
+            "xhtml2pdf.tags", level="WARNING"
+        ) as cm:
+            pisaDocument(
+                src=io.StringIO(HTML_CONTENT.format(head="", extra_html=extra_html)),
+                dest=pdf_file,
+            )
+            self.assertEqual(
+                cm.output,
+                [
+                    "WARNING:xhtml2pdf.tags:Cannot identify image file:\n"
+                    "'<img "
+                    'src="https://raw.githubusercontent.com/python-pillow/Pillow/7921da54a73dd4a30c23957369b79cda176005c6/Tests/images/zero_width.gif"/>\''
+                ],
+            )
+
     def test_document_nested_table(self) -> None:
         """Test that nested tables are being rendered."""
         tests_folder = os.path.dirname(os.path.realpath(__file__))

diff --git a/tests/test_parser.py b/tests/test_parser.py
@@ -57,3 +57,12 @@ def test_image_base64(self) -> None:
         )
         r = pisaParser(data, c)
         self.assertEqual(r.warn, 0)
+
+    def test_image_base64_urlencoded(self) -> None:
+        c = pisaContext(".")
+        data = (
+            b"<img"
+            b' src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAV4AAACWBAMAAABkyf1EAAAAG1BMVEXMzMyWlpacnJyqqqrFxcWxsbGjo6O3t7e%2Bvr6He3KoAAAACXBIWXMAAA7EAAAOxAGVKw4bAAAEcElEQVR4nO2aTW%2FbRhCGh18ij1zKknMkbbf2UXITIEeyMhIfRaF1exQLA%2FJRclslRykO%2Brs7s7s0VwytNmhJtsA8gHZEcox9PTs7uysQgGEYhmEYhmEYhmEYhmEYhmEYhmEYhmEYhmEYhmEYhmGYr2OWRK%2FReIKI8Zt7Hb19wTcQ0uTkGh13bQupcw7gPOvdo12%2F5CzNtNR7xLUtNtT3CGBQ6g3InjY720pvofUec22LJPr8PhEp2OMPyI40PdwWUdronCu9yQpdPx53bQlfLKnfOVhlnDYRBXve4Ov%2BIZTeMgdedm0NR%2BxoXJeQvdJ3CvziykSukwil16W%2FOe7aGjIjqc%2F9ib4jQlJy0uArtN4A0%2BcvXFvDkmUJ47sJ1Y1ATLDNVXZkNPIepQzxy1ki9fqiwbUj%2FI%2B64zxWNzyZnPuhvohJ9K70VvXBixpcu2SAHU%2BXd9EKdEJDNpYP3AQr3bQSpPQ6Y6%2F4dl1z7ZDbArsszjA7L0g7ibB0CDcidUWVoErvIMKZh2Xs0LUzcLW6V5NfiUgNEbaYmAVL6bXl0nJRc%2B1S72ua%2FD%2FcTjGPlQj7eUqd7A096rYlRjdPYlhz7VIvxpVG3cemDKF%2BWAwLY%2F6XelOZKTXXzsC4xvDjjtSN6kHLhLke6PrwM8h1raf40qjrGO7H9aTEbduucjS04ZrYU%2F4iuS5Z2Hdt0rvCLFdmLEXcU30AGddST62o%2BsLcf5l6k7CP%2Bru4pLYqX%2FVFyxbm%2FutQbx%2Fr22ZEbTb2f5I2kns1Y1OQR8ZyofX%2BTjJxj1Rz7QQVnf1QzR26Oth0ueJVYcRP6ZUPac%2FRx%2F5M6ixO1dhSrT3Y1DpiYmx3tF4ZUdpz9LD%2FdSg9PXES0LB71BwcGjKROuV28lnvnv7HHJsezheBGH5%2BX2CfSfRbMKW%2B5aGs3JFjMrjGibJc0S7TJzqjHrh2hDybj9XRXNZa89Aro55XBdbW5wti2c%2F5WJ7jJ1RolVUn%2FHWpb0I58Tziup6Rx7Dm2hnbRP1GM9PW%2FNFmQ4PtVRVN63Wvxfmu5sowDMMwDMMwDMMwDMMwDMMwDMMwzL%2BCpT%2F%2FF%2F6beoV8zb2Jmt4Qryx6lTUCsENQ75HOkhXAO3EPVgyQtKtUy3C%2Fe%2BFJg17Zjnew1Xrdb9InbG4WqfUAftG%2BWhLwPVyfg536%2BMU7m4C1CMk4ZznpXZzDYI1PDL2nS1hpvc5cNd7E2sJg05Fe7%2F7d3Fln8Cvc3bwB616auxsKl4WPghjemHrDqyDWeu1UNW5s2btPnSQ75oOdunEwWazfwgVG0kqluYCM9OIjWOGnfA2b9G4Ha63XKpvQ8perTvTifJNhi6%2BWMWmi7smEZf6G8MmhlyGq%2BNqP8GV84TLuJr7UIQVx%2BbDEoEpRZIz42gs40OuN4Mv8hXzelV7KX1isH%2BewTWckikyVv%2BCfHuqVF7I16gN0VKypX6wPsE%2BzFPzkinolU9UH8OMGvSpnZqKsv13p%2FRsMun6X5x%2Fy2LeAr8O66lsBwzBMP%2FwJfyGq8pgBk6IAAAAASUVORK5CYII%3D">'
+        )
+        r = pisaParser(data, c)
+        self.assertEqual(r.warn, 0)
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -192,7 +192,7 @@ def test_frame_dimensions_bottom_right_width_height_with_margin(self):
 
     def test_frame_dimensions_for_box_len_eq_4(self):
         dims = {"-pdf-frame-box": ["12pt", "12,pt", "12pt", "12pt"]}
-        expected = [12.0, 12.0, 12.0, 12.0]
+        expected = (12.0, 12.0, 12.0, 12.0)
         result = getFrameDimensions(dims, 100, 200)
         self.assertEqual(result, expected)
 

diff --git a/xhtml2pdf/builders/watermarks.py b/xhtml2pdf/builders/watermarks.py
@@ -1,24 +1,35 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Iterator, cast
+
 import pypdf
 from PIL import Image
 from reportlab.pdfgen.canvas import Canvas
 
 from xhtml2pdf.files import getFile, pisaFileObject
 
+if TYPE_CHECKING:
+    from io import BytesIO
+
+    from xhtml2pdf.context import pisaContext
+
 
 class WaterMarks:
     @staticmethod
-    def get_size_location(img, context, pagesize, is_portrait):
-        object_position = context.get("object_position", None)
-        cssheight = context.get("height", None)
-        csswidth = context.get("width", None)
+    def get_size_location(
+        img, context: dict, pagesize: tuple[int, int], *, is_portrait: bool
+    ) -> tuple[int, int, int, int]:
+        object_position: tuple[int, int] | None = context.get("object_position")
+        cssheight: int | None = cast(int, context.get("height"))
+        csswidth: int = cast(int, context.get("width"))
         iw, ih = img.getSize()
         pw, ph = pagesize
-        width = pw  # min(iw, pw) # max
-        wfactor = float(width) / iw
-        height = ph  # min(ih, ph) # max
-        hfactor = float(height) / ih
-        factor_min = min(wfactor, hfactor)
-        factor_max = max(wfactor, hfactor)
+        width: int = pw  # min(iw, pw) # max
+        wfactor: float = float(width) / iw
+        height: int = ph  # min(ih, ph) # max
+        hfactor: float = float(height) / ih
+        factor_min: float = min(wfactor, hfactor)
+        factor_max: float = max(wfactor, hfactor)
         if is_portrait:
             height = ih * factor_min
             width = iw * factor_min
@@ -41,86 +52,78 @@ def get_size_location(img, context, pagesize, is_portrait):
         return x, y, width, height
 
     @staticmethod
-    def get_img_with_opacity(pisafile, context):
-        opacity = context.get("opacity", None)
+    def get_img_with_opacity(pisafile: pisaFileObject, context: dict) -> BytesIO:
+        opacity: float = context.get("opacity", None)
         if opacity:
-            name = pisafile.getNamedFile()
-            img = Image.open(name)
+            name: str | None = pisafile.getNamedFile()
+            img: Image.Image = Image.open(name)
             img = img.convert("RGBA")
             img.putalpha(int(255 * opacity))
             img.save(name, "PNG")
             return getFile(name).getBytesIO()
         return pisafile.getBytesIO()
 
     @staticmethod
-    def generate_pdf_background(pisafile, pagesize, is_portrait, context=None):
+    def generate_pdf_background(
+        pisafile: pisaFileObject,
+        pagesize: tuple[int, int],
+        *,
+        is_portrait: bool,
+        context: dict | None = None,
+    ) -> pisaFileObject:
         """
         Pypdf requires pdf as background so convert image to pdf in temporary file with same page dimensions
         :param pisafile:  Image File
         :param pagesize:  Page size for the new pdf
-        :return: pisaFileObject as tempfile.
         """
         # don't move up, we are preventing circular import
+        from xhtml2pdf.xhtml2pdf_reportlab import PmlImageReader
+
         if context is None:
             context = {}
-        from xhtml2pdf.xhtml2pdf_reportlab import PmlImageReader
 
-        output = pisaFileObject(None, "application/pdf")  # build temporary file
-        img = PmlImageReader(WaterMarks.get_img_with_opacity(pisafile, context))
+        output: pisaFileObject = pisaFileObject(
+            None, "application/pdf"
+        )  # build temporary file
+        img: PmlImageReader = PmlImageReader(
+            WaterMarks.get_img_with_opacity(pisafile, context)
+        )
         x, y, width, height = WaterMarks.get_size_location(
-            img, context, pagesize, is_portrait
+            img, context, pagesize, is_portrait=is_portrait
         )
 
         canvas = Canvas(output.getNamedFile(), pagesize=pagesize)
         canvas.drawImage(img, x, y, width, height, mask="auto")
 
-        """
-        iw, ih = img.getSize()
-        pw, ph = pagesize
-
-        width = pw  # min(iw, pw) # max
-        wfactor = float(width) / iw
-        height = ph  # min(ih, ph) # max
-        hfactor = float(height) / ih
-        factor_min = min(wfactor, hfactor)
-        factor_max = max(wfactor, hfactor)
-
-        if is_portrait:
-            w = iw * factor_min
-            h = ih * factor_min
-            canvas.drawImage(img, 0, ph - h, w, h)
-        else:
-            h = ih * factor_max
-            w = iw * factor_min
-            canvas.drawImage(img, 0, 0, w, h)
-        """
         canvas.save()
 
         return output
 
     @staticmethod
-    def get_watermark(context, max_numpage):
+    def get_watermark(context: pisaContext, max_numpage: int) -> Iterator:
         if context.pisaBackgroundList:
             pages = [x[0] for x in context.pisaBackgroundList] + [max_numpage + 1]
             pages.pop(0)
-            counter = 0
-            for page, bgfile, pgcontext in context.pisaBackgroundList:
+            for counter, (page, bgfile, pgcontext) in enumerate(
+                context.pisaBackgroundList
+            ):
                 if not bgfile.notFound():
                     yield range(page, pages[counter]), bgfile, int(pgcontext["step"])
-                counter += 1
 
     @staticmethod
-    def process_doc(context, istream, output):
-        pdfoutput = pypdf.PdfWriter()
-        input1 = pypdf.PdfReader(istream)
-        has_bg = False
+    def process_doc(
+        context: pisaContext, istream: bytes, output: bytes
+    ) -> tuple[bytes, bool]:
+        pdfoutput: pypdf.PdfWriter = pypdf.PdfWriter()
+        input1: pypdf.PdfReader = pypdf.PdfReader(istream)
+        has_bg: bool = False
         for pages, bgouter, step in WaterMarks.get_watermark(
             context, len(input1.pages)
         ):
             for index, ctr in enumerate(pages):
-                bginput = pypdf.PdfReader(bgouter.getBytesIO())
-                pagebg = bginput.pages[0]
-                page = input1.pages[ctr - 1]
+                bginput: pypdf.PdfReader = pypdf.PdfReader(bgouter.getBytesIO())
+                pagebg: pypdf.PageObject = bginput.pages[0]
+                page: pypdf.PageObject = input1.pages[ctr - 1]
                 if index % step == 0:
                     pagebg.merge_page(page)
                     page = pagebg