Skip to content

Commit

Permalink
Merge pull request #725 from xhtml2pdf/bugfix/image-identify
Browse files Browse the repository at this point in the history
Remove stack trace when images cannot be identified
  • Loading branch information
timobrembeck committed Oct 9, 2023
2 parents 96ec58d + f8888ba commit 6a50d43
Show file tree
Hide file tree
Showing 13 changed files with 592 additions and 486 deletions.
Binary file added tests/samples/img/zero_width.gif
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
20 changes: 20 additions & 0 deletions tests/test_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,26 @@ def test_document_with_broken_image(self) -> None:
],
)

def test_document_cannot_identify_image(self) -> None:
"""Test that images which cannot be identified don't cause stack trace to be printed"""
image_path = "https://raw.githubusercontent.com/python-pillow/Pillow/7921da54a73dd4a30c23957369b79cda176005c6/Tests/images/zero_width.gif"
extra_html = f'<img src="{image_path}">'
with open(os.devnull, "wb") as pdf_file, self.assertLogs(
"xhtml2pdf.tags", level="WARNING"
) as cm:
pisaDocument(
src=io.StringIO(HTML_CONTENT.format(head="", extra_html=extra_html)),
dest=pdf_file,
)
self.assertEqual(
cm.output,
[
"WARNING:xhtml2pdf.tags:Cannot identify image file:\n"
"'<img "
'src="https://raw.githubusercontent.com/python-pillow/Pillow/7921da54a73dd4a30c23957369b79cda176005c6/Tests/images/zero_width.gif"/>\''
],
)

def test_document_nested_table(self) -> None:
"""Test that nested tables are being rendered."""
tests_folder = os.path.dirname(os.path.realpath(__file__))
Expand Down
9 changes: 9 additions & 0 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,12 @@ def test_image_base64(self) -> None:
)
r = pisaParser(data, c)
self.assertEqual(r.warn, 0)

def test_image_base64_urlencoded(self) -> None:
c = pisaContext(".")
data = (
b"<img"
b' src="%2Bvr6He3KoAAAACXBIWXMAAA7EAAAOxAGVKw4bAAAEcElEQVR4nO2aTW%2FbRhCGh18ij1zKknMkbbf2UXITIEeyMhIfRaF1exQLA%2FJRclslRykO%2Brs7s7s0VwytNmhJtsA8gHZEcox9PTs7uysQgGEYhmEYhmEYhmEYhmEYhmEYhmEYhmEYhmEYhmEYhmGYr2OWRK%2FReIKI8Zt7Hb19wTcQ0uTkGh13bQupcw7gPOvdo12%2F5CzNtNR7xLUtNtT3CGBQ6g3InjY720pvofUec22LJPr8PhEp2OMPyI40PdwWUdronCu9yQpdPx53bQlfLKnfOVhlnDYRBXve4Ov%2BIZTeMgdedm0NR%2BxoXJeQvdJ3CvziykSukwil16W%2FOe7aGjIjqc%2F9ib4jQlJy0uArtN4A0%2BcvXFvDkmUJ47sJ1Y1ATLDNVXZkNPIepQzxy1ki9fqiwbUj%2FI%2B64zxWNzyZnPuhvohJ9K70VvXBixpcu2SAHU%2BXd9EKdEJDNpYP3AQr3bQSpPQ6Y6%2F4dl1z7ZDbArsszjA7L0g7ibB0CDcidUWVoErvIMKZh2Xs0LUzcLW6V5NfiUgNEbaYmAVL6bXl0nJRc%2B1S72ua%2FD%2FcTjGPlQj7eUqd7A096rYlRjdPYlhz7VIvxpVG3cemDKF%2BWAwLY%2F6XelOZKTXXzsC4xvDjjtSN6kHLhLke6PrwM8h1raf40qjrGO7H9aTEbduucjS04ZrYU%2F4iuS5Z2Hdt0rvCLFdmLEXcU30AGddST62o%2BsLcf5l6k7CP%2Bru4pLYqX%2FVFyxbm%2FutQbx%2Fr22ZEbTb2f5I2kns1Y1OQR8ZyofX%2BTjJxj1Rz7QQVnf1QzR26Oth0ueJVYcRP6ZUPac%2FRx%2F5M6ixO1dhSrT3Y1DpiYmx3tF4ZUdpz9LD%2FdSg9PXES0LB71BwcGjKROuV28lnvnv7HHJsezheBGH5%2BX2CfSfRbMKW%2B5aGs3JFjMrjGibJc0S7TJzqjHrh2hDybj9XRXNZa89Aro55XBdbW5wti2c%2F5WJ7jJ1RolVUn%2FHWpb0I58Tziup6Rx7Dm2hnbRP1GM9PW%2FNFmQ4PtVRVN63Wvxfmu5sowDMMwDMMwDMMwDMMwDMMwDMMwzL%2BCpT%2F%2FF%2F6beoV8zb2Jmt4Qryx6lTUCsENQ75HOkhXAO3EPVgyQtKtUy3C%2Fe%2BFJg17Zjnew1Xrdb9InbG4WqfUAftG%2BWhLwPVyfg536%2BMU7m4C1CMk4ZznpXZzDYI1PDL2nS1hpvc5cNd7E2sJg05Fe7%2F7d3Fln8Cvc3bwB616auxsKl4WPghjemHrDqyDWeu1UNW5s2btPnSQ75oOdunEwWazfwgVG0kqluYCM9OIjWOGnfA2b9G4Ha63XKpvQ8perTvTifJNhi6%2BWMWmi7smEZf6G8MmhlyGq%2BNqP8GV84TLuJr7UIQVx%2BbDEoEpRZIz42gs40OuN4Mv8hXzelV7KX1isH%2BewTWckikyVv%2BCfHuqVF7I16gN0VKypX6wPsE%2BzFPzkinolU9UH8OMGvSpnZqKsv13p%2FRsMun6X5x%2Fy2LeAr8O66lsBwzBMP%2FwJfyGq8pgBk6IAAAAASUVORK5CYII%3D">'
)
r = pisaParser(data, c)
self.assertEqual(r.warn, 0)
2 changes: 1 addition & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def test_frame_dimensions_bottom_right_width_height_with_margin(self):

def test_frame_dimensions_for_box_len_eq_4(self):
dims = {"-pdf-frame-box": ["12pt", "12,pt", "12pt", "12pt"]}
expected = [12.0, 12.0, 12.0, 12.0]
expected = (12.0, 12.0, 12.0, 12.0)
result = getFrameDimensions(dims, 100, 200)
self.assertEqual(result, expected)

Expand Down
105 changes: 54 additions & 51 deletions xhtml2pdf/builders/watermarks.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,35 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Iterator, cast

import pypdf
from PIL import Image
from reportlab.pdfgen.canvas import Canvas

from xhtml2pdf.files import getFile, pisaFileObject

if TYPE_CHECKING:
from io import BytesIO

from xhtml2pdf.context import pisaContext


class WaterMarks:
@staticmethod
def get_size_location(img, context, pagesize, is_portrait):
object_position = context.get("object_position", None)
cssheight = context.get("height", None)
csswidth = context.get("width", None)
def get_size_location(
img, context: dict, pagesize: tuple[int, int], *, is_portrait: bool
) -> tuple[int, int, int, int]:
object_position: tuple[int, int] | None = context.get("object_position")
cssheight: int | None = cast(int, context.get("height"))
csswidth: int = cast(int, context.get("width"))
iw, ih = img.getSize()
pw, ph = pagesize
width = pw # min(iw, pw) # max
wfactor = float(width) / iw
height = ph # min(ih, ph) # max
hfactor = float(height) / ih
factor_min = min(wfactor, hfactor)
factor_max = max(wfactor, hfactor)
width: int = pw # min(iw, pw) # max
wfactor: float = float(width) / iw
height: int = ph # min(ih, ph) # max
hfactor: float = float(height) / ih
factor_min: float = min(wfactor, hfactor)
factor_max: float = max(wfactor, hfactor)
if is_portrait:
height = ih * factor_min
width = iw * factor_min
Expand All @@ -41,86 +52,78 @@ def get_size_location(img, context, pagesize, is_portrait):
return x, y, width, height

@staticmethod
def get_img_with_opacity(pisafile, context):
opacity = context.get("opacity", None)
def get_img_with_opacity(pisafile: pisaFileObject, context: dict) -> BytesIO:
opacity: float = context.get("opacity", None)
if opacity:
name = pisafile.getNamedFile()
img = Image.open(name)
name: str | None = pisafile.getNamedFile()
img: Image.Image = Image.open(name)
img = img.convert("RGBA")
img.putalpha(int(255 * opacity))
img.save(name, "PNG")
return getFile(name).getBytesIO()
return pisafile.getBytesIO()

@staticmethod
def generate_pdf_background(pisafile, pagesize, is_portrait, context=None):
def generate_pdf_background(
pisafile: pisaFileObject,
pagesize: tuple[int, int],
*,
is_portrait: bool,
context: dict | None = None,
) -> pisaFileObject:
"""
Pypdf requires pdf as background so convert image to pdf in temporary file with same page dimensions
:param pisafile: Image File
:param pagesize: Page size for the new pdf
:return: pisaFileObject as tempfile.
"""
# don't move up, we are preventing circular import
from xhtml2pdf.xhtml2pdf_reportlab import PmlImageReader

if context is None:
context = {}
from xhtml2pdf.xhtml2pdf_reportlab import PmlImageReader

output = pisaFileObject(None, "application/pdf") # build temporary file
img = PmlImageReader(WaterMarks.get_img_with_opacity(pisafile, context))
output: pisaFileObject = pisaFileObject(
None, "application/pdf"
) # build temporary file
img: PmlImageReader = PmlImageReader(
WaterMarks.get_img_with_opacity(pisafile, context)
)
x, y, width, height = WaterMarks.get_size_location(
img, context, pagesize, is_portrait
img, context, pagesize, is_portrait=is_portrait
)

canvas = Canvas(output.getNamedFile(), pagesize=pagesize)
canvas.drawImage(img, x, y, width, height, mask="auto")

"""
iw, ih = img.getSize()
pw, ph = pagesize
width = pw # min(iw, pw) # max
wfactor = float(width) / iw
height = ph # min(ih, ph) # max
hfactor = float(height) / ih
factor_min = min(wfactor, hfactor)
factor_max = max(wfactor, hfactor)
if is_portrait:
w = iw * factor_min
h = ih * factor_min
canvas.drawImage(img, 0, ph - h, w, h)
else:
h = ih * factor_max
w = iw * factor_min
canvas.drawImage(img, 0, 0, w, h)
"""
canvas.save()

return output

@staticmethod
def get_watermark(context, max_numpage):
def get_watermark(context: pisaContext, max_numpage: int) -> Iterator:
if context.pisaBackgroundList:
pages = [x[0] for x in context.pisaBackgroundList] + [max_numpage + 1]
pages.pop(0)
counter = 0
for page, bgfile, pgcontext in context.pisaBackgroundList:
for counter, (page, bgfile, pgcontext) in enumerate(
context.pisaBackgroundList
):
if not bgfile.notFound():
yield range(page, pages[counter]), bgfile, int(pgcontext["step"])
counter += 1

@staticmethod
def process_doc(context, istream, output):
pdfoutput = pypdf.PdfWriter()
input1 = pypdf.PdfReader(istream)
has_bg = False
def process_doc(
context: pisaContext, istream: bytes, output: bytes
) -> tuple[bytes, bool]:
pdfoutput: pypdf.PdfWriter = pypdf.PdfWriter()
input1: pypdf.PdfReader = pypdf.PdfReader(istream)
has_bg: bool = False
for pages, bgouter, step in WaterMarks.get_watermark(
context, len(input1.pages)
):
for index, ctr in enumerate(pages):
bginput = pypdf.PdfReader(bgouter.getBytesIO())
pagebg = bginput.pages[0]
page = input1.pages[ctr - 1]
bginput: pypdf.PdfReader = pypdf.PdfReader(bgouter.getBytesIO())
pagebg: pypdf.PageObject = bginput.pages[0]
page: pypdf.PageObject = input1.pages[ctr - 1]
if index % step == 0:
pagebg.merge_page(page)
page = pagebg
Expand Down

0 comments on commit 6a50d43

Please sign in to comment.