-
Notifications
You must be signed in to change notification settings - Fork 594
/
Copy pathtest_textbox.py
291 lines (247 loc) · 10.7 KB
/
test_textbox.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
"""
Fill a given text in a rectangle on some PDF page using
1. TextWriter object
2. Basic text output
Check text is indeed contained in given rectangle.
"""
import pymupdf
# codespell:ignore-begin
text = """Der Kleine Schwertwal (Pseudorca crassidens), auch bekannt als Unechter oder Schwarzer Schwertwal, ist eine Art der Delfine (Delphinidae) und der einzige rezente Vertreter der Gattung Pseudorca.
Er ähnelt dem Orca in Form und Proportionen, ist aber einfarbig schwarz und mit einer Maximallänge von etwa sechs Metern deutlich kleiner.
Kleine Schwertwale bilden Schulen von durchschnittlich zehn bis fünfzig Tieren, wobei sie sich auch mit anderen Delfinen vergesellschaften und sich meistens abseits der Küsten aufhalten.
Sie sind in allen Ozeanen gemäßigter, subtropischer und tropischer Breiten beheimatet, sind jedoch vor allem in wärmeren Jahreszeiten auch bis in die gemäßigte bis subpolare Zone südlich der Südspitze Südamerikas, vor Nordeuropa und bis vor Kanada anzutreffen."""
# codespell:ignore-end
def test_textbox1():
"""Use TextWriter for text insertion."""
doc = pymupdf.open()
page = doc.new_page()
rect = pymupdf.Rect(50, 50, 400, 400)
blue = (0, 0, 1)
tw = pymupdf.TextWriter(page.rect, color=blue)
tw.fill_textbox(
rect,
text,
align=pymupdf.TEXT_ALIGN_LEFT,
fontsize=12,
)
tw.write_text(page, morph=(rect.tl, pymupdf.Matrix(1, 1)))
# check text containment
assert page.get_text() == page.get_text(clip=rect)
page.write_text(writers=tw)
def test_textbox2():
"""Use basic text insertion."""
doc = pymupdf.open()
ocg = doc.add_ocg("ocg1")
page = doc.new_page()
rect = pymupdf.Rect(50, 50, 400, 400)
blue = pymupdf.utils.getColor("lightblue")
red = pymupdf.utils.getColorHSV("red")
page.insert_textbox(
rect,
text,
align=pymupdf.TEXT_ALIGN_LEFT,
fontsize=12,
color=blue,
oc=ocg,
)
# check text containment
assert page.get_text() == page.get_text(clip=rect)
def test_textbox3():
"""Use TextWriter for text insertion."""
doc = pymupdf.open()
page = doc.new_page()
font = pymupdf.Font("cjk")
rect = pymupdf.Rect(50, 50, 400, 400)
blue = (0, 0, 1)
tw = pymupdf.TextWriter(page.rect, color=blue)
tw.fill_textbox(
rect,
text,
align=pymupdf.TEXT_ALIGN_LEFT,
font=font,
fontsize=12,
right_to_left=True,
)
tw.write_text(page, morph=(rect.tl, pymupdf.Matrix(1, 1)))
# check text containment
assert page.get_text() == page.get_text(clip=rect)
doc.scrub()
doc.subset_fonts()
def test_textbox4():
"""Use TextWriter for text insertion."""
doc = pymupdf.open()
ocg = doc.add_ocg("ocg1")
page = doc.new_page()
rect = pymupdf.Rect(50, 50, 400, 600)
blue = (0, 0, 1)
tw = pymupdf.TextWriter(page.rect, color=blue)
tw.fill_textbox(
rect,
text,
align=pymupdf.TEXT_ALIGN_LEFT,
fontsize=12,
font=pymupdf.Font("cour"),
right_to_left=True,
)
tw.write_text(page, oc=ocg, morph=(rect.tl, pymupdf.Matrix(1, 1)))
# check text containment
assert page.get_text() == page.get_text(clip=rect)
def test_textbox5():
"""Using basic text insertion."""
small_glyph_heights0 = pymupdf.TOOLS.set_small_glyph_heights()
pymupdf.TOOLS.set_small_glyph_heights(True)
try:
doc = pymupdf.open()
page = doc.new_page()
r = pymupdf.Rect(100, 100, 150, 150)
text = "words and words and words and more words..."
rc = -1
fontsize = 12
page.draw_rect(r)
while rc < 0:
rc = page.insert_textbox(
r,
text,
fontsize=fontsize,
align=pymupdf.TEXT_ALIGN_JUSTIFY,
)
fontsize -= 0.5
blocks = page.get_text("blocks")
bbox = pymupdf.Rect(blocks[0][:4])
assert bbox in r
finally:
# Must restore small_glyph_heights, otherwise other tests can fail.
pymupdf.TOOLS.set_small_glyph_heights(small_glyph_heights0)
def test_2637():
"""Ensure correct calculation of fitting text."""
doc = pymupdf.open()
page = doc.new_page()
text = (
"The morning sun painted the sky with hues of orange and pink. "
"Birds chirped harmoniously, greeting the new day. "
"Nature awakened, filling the air with life and promise."
)
rect = pymupdf.Rect(50, 50, 500, 280)
fontsize = 50
rc = -1
while rc < 0: # look for largest font size that makes the text fit
rc = page.insert_textbox(rect, text, fontname="hebo", fontsize=fontsize)
fontsize -= 1
# confirm text won't lap outside rect
blocks = page.get_text("blocks")
bbox = pymupdf.Rect(blocks[0][:4])
assert bbox in rect
def test_htmlbox1():
"""Write HTML-styled text into a rect with different rotations.
The text is styled and contains a link.
Then extract the text again, and
- assert that text was written in the 4 different angles,
- assert that text properties are correct (bold, italic, color),
- assert that the link has been correctly inserted.
We try to insert into a rectangle that is too small, setting
scale=False and confirming we have a negative return code.
"""
if not hasattr(pymupdf, "mupdf"):
print("'test_htmlbox1' not executed in classic.")
return
rect = pymupdf.Rect(100, 100, 200, 200) # this only works with scale=True
base_text = """Lorem ipsum dolor sit amet, consectetur adipisici elit, sed eiusmod tempor incidunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquid ex ea commodi consequat. Quis aute iure reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint obcaecat cupiditat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."""
text = """Lorem ipsum dolor sit amet, consectetur adipisici elit, sed eiusmod tempor incidunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation <b>ullamco</b> <i>laboris</i> nisi ut aliquid ex ea commodi consequat. Quis aute iure reprehenderit in <span style="color: #0f0;font-weight:bold;">voluptate</span> velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint obcaecat cupiditat non proident, sunt in culpa qui <a href="https://www.artifex.com">officia</a> deserunt mollit anim id est laborum."""
doc = pymupdf.Document()
for rot in (0, 90, 180, 270):
wdirs = ((1, 0), (0, -1), (-1, 0), (0, 1)) # all writing directions
page = doc.new_page()
spare_height, scale = page.insert_htmlbox(rect, text, rotate=rot, scale_low=1)
assert spare_height < 0
assert scale == 1
spare_height, scale = page.insert_htmlbox(rect, text, rotate=rot, scale_low=0)
assert spare_height == 0
assert 0 < scale < 1
page = doc.reload_page(page)
link = page.get_links()[0] # extracts the links on the page
assert link["uri"] == "https://www.artifex.com"
# Assert plain text is complete.
# We must remove line breaks and any ligatures for this.
assert base_text == page.get_text(flags=0)[:-1].replace("\n", " ")
encounters = 0 # counts the words with selected properties
for b in page.get_text("dict")["blocks"]:
for l in b["lines"]:
wdir = l["dir"] # writing direction
assert wdir == wdirs[page.number]
for s in l["spans"]:
stext = s["text"]
color = pymupdf.sRGB_to_pdf(s["color"])
bold = bool(s["flags"] & 16)
italic = bool(s["flags"] & 2)
if stext in ("ullamco", "laboris", "voluptate"):
encounters += 1
if stext == "ullamco":
assert bold is True
assert italic is False
assert color == pymupdf.pdfcolor["black"]
elif stext == "laboris":
assert bold is False
assert italic is True
assert color == pymupdf.pdfcolor["black"]
elif stext == "voluptate":
assert bold is True
assert italic is False
assert color == pymupdf.pdfcolor["green"]
else:
assert bold is False
assert italic is False
# all 3 special special words were encountered
assert encounters == 3
def test_htmlbox2():
"""Test insertion without scaling"""
if not hasattr(pymupdf, "mupdf"):
print("'test_htmlbox2' not executed in classic.")
return
doc = pymupdf.open()
rect = pymupdf.Rect(100, 100, 200, 200) # large enough to hold text
page = doc.new_page()
bottoms = set()
for rot in (0, 90, 180, 270):
spare_height, scale = page.insert_htmlbox(
rect, "Hello, World!", scale_low=1, rotate=rot
)
assert scale == 1
assert 0 < spare_height < rect.height
bottoms.add(spare_height)
assert len(bottoms) == 1 # same result for all rotations
def test_htmlbox3():
"""Test insertion with opacity"""
if not hasattr(pymupdf, "mupdf"):
print("'test_htmlbox3' not executed in classic.")
return
rect = pymupdf.Rect(100, 250, 300, 350)
text = """<span style="color:red;font-size:20px;">Just some text.</span>"""
doc = pymupdf.open()
page = doc.new_page()
# insert some text with opacity
page.insert_htmlbox(rect, text, opacity=0.5)
# lowlevel-extract inserted text to access opacity
span = page.get_texttrace()[0]
assert span["opacity"] == 0.5
def test_3559():
if pymupdf.mupdf_version_tuple < (1, 24, 4):
print(f'test_3559(): Not running because mupdf known to SEGV.')
return
doc = pymupdf.Document()
page = doc.new_page()
text_insert="""<body><h3></h3></body>"""
rect = pymupdf.Rect(100, 100, 200, 200)
page.insert_htmlbox(rect, text_insert)
def test_3916():
doc = pymupdf.open()
rect = pymupdf.Rect(100, 100, 101, 101) # Too small for the text.
page = doc.new_page()
spare_height, scale = page.insert_htmlbox(rect, "Hello, World!", scale_low=0.5)
assert spare_height == -1
def test_4400():
with pymupdf.open() as document:
page = document.new_page()
writer = pymupdf.TextWriter(page.rect)
text = '111111111'
print(f'Calling writer.fill_textbox().', flush=1)
writer.fill_textbox(rect=pymupdf.Rect(0, 0, 100, 20), pos=(80, 0), text=text, fontsize=8)