/
crossrefs.go
379 lines (324 loc) · 11.6 KB
/
crossrefs.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package core
import (
"bufio"
"bytes"
"errors"
"os"
"strings"
"github.com/xwc1125/gopdf/common"
)
// xrefType indicates the type of a cross-references entry which can be either regular table entry or xref object
// stream.
type xrefType int
const (
// XrefTypeTableEntry indicates a normal xref table entry.
XrefTypeTableEntry xrefType = iota
// XrefTypeObjectStream indicates an xref entry in an xref object stream.
XrefTypeObjectStream xrefType = iota
)
// XrefObject defines a cross reference entry which is a map between object number (with generation number) and the
// location of the actual object, either as a file offset (xref table entry), or as a location within an xref
// stream object (xref object stream).
type XrefObject struct {
XType xrefType
ObjectNumber int
Generation int
// For normal xrefs (defined by OFFSET)
Offset int64
// For xrefs to object streams.
OsObjNumber int
OsObjIndex int
}
// XrefTable represents the cross references in a PDF, i.e. the table of objects and information
// where to access within the PDF file.
type XrefTable struct {
ObjectMap map[int]XrefObject // Maps object number to XrefObject
// List of objects sorted by offset (only objects with offsets, not ones in streams).
sortedObjects []XrefObject
}
// objectStream represents an object stream's information which can contain multiple indirect objects.
// The information specifies the number of objects and has information about offset locations for
// each object.
type objectStream struct {
N int
ds []byte
offsets map[int]int64
}
// objectStreams defines a map between object numbers (object streams only) and underlying objectStream information.
type objectStreams map[int]objectStream
// objectCache defines a map between object numbers and corresponding PdfObject. Serves as a cache for PdfObjects that
// have already been parsed.
type objectCache map[int]PdfObject
// lookupObjectViaOS returns an object from an object stream.
func (parser *PdfParser) lookupObjectViaOS(sobjNumber int, objNum int) (PdfObject, error) {
var bufReader *bytes.Reader
var objstm objectStream
var cached bool
objstm, cached = parser.objstms[sobjNumber]
if !cached {
soi, err := parser.LookupByNumber(sobjNumber)
if err != nil {
common.Log.Debug("Missing object stream with number %d", sobjNumber)
return nil, err
}
so, ok := soi.(*PdfObjectStream)
if !ok {
return nil, errors.New("invalid object stream")
}
if parser.crypter != nil && !parser.crypter.isDecrypted(so) {
return nil, errors.New("need to decrypt the stream")
}
sod := so.PdfObjectDictionary
common.Log.Trace("so d: %s\n", sod.String())
name, ok := sod.Get("Type").(*PdfObjectName)
if !ok {
common.Log.Debug("ERROR: Object stream should always have a Type")
return nil, errors.New("object stream missing Type")
}
if strings.ToLower(string(*name)) != "objstm" {
common.Log.Debug("ERROR: Object stream type shall always be ObjStm !")
return nil, errors.New("object stream type != ObjStm")
}
N, ok := sod.Get("N").(*PdfObjectInteger)
if !ok {
return nil, errors.New("invalid N in stream dictionary")
}
firstOffset, ok := sod.Get("First").(*PdfObjectInteger)
if !ok {
return nil, errors.New("invalid First in stream dictionary")
}
common.Log.Trace("type: %s number of objects: %d", name, *N)
ds, err := DecodeStream(so)
if err != nil {
return nil, err
}
common.Log.Trace("Decoded: %s", ds)
// Temporarily change the reader object to this decoded buffer.
// Change back afterwards.
bakOffset := parser.GetFileOffset()
defer func() { parser.SetFileOffset(bakOffset) }()
bufReader = bytes.NewReader(ds)
parser.reader = bufio.NewReader(bufReader)
common.Log.Trace("Parsing offset map")
// Load the offset map (relative to the beginning of the stream...)
offsets := map[int]int64{}
// Object list and offsets.
for i := 0; i < int(*N); i++ {
parser.skipSpaces()
// Object number.
obj, err := parser.parseNumber()
if err != nil {
return nil, err
}
onum, ok := obj.(*PdfObjectInteger)
if !ok {
return nil, errors.New("invalid object stream offset table")
}
parser.skipSpaces()
// Offset.
obj, err = parser.parseNumber()
if err != nil {
return nil, err
}
offset, ok := obj.(*PdfObjectInteger)
if !ok {
return nil, errors.New("invalid object stream offset table")
}
common.Log.Trace("obj %d offset %d", *onum, *offset)
offsets[int(*onum)] = int64(*firstOffset + *offset)
}
objstm = objectStream{N: int(*N), ds: ds, offsets: offsets}
parser.objstms[sobjNumber] = objstm
} else {
// Temporarily change the reader object to this decoded buffer.
// Point back afterwards.
bakOffset := parser.GetFileOffset()
defer func() { parser.SetFileOffset(bakOffset) }()
bufReader = bytes.NewReader(objstm.ds)
// Temporarily change the reader object to this decoded buffer.
parser.reader = bufio.NewReader(bufReader)
}
offset := objstm.offsets[objNum]
common.Log.Trace("ACTUAL offset[%d] = %d", objNum, offset)
bufReader.Seek(offset, os.SEEK_SET)
parser.reader = bufio.NewReader(bufReader)
bb, _ := parser.reader.Peek(100)
common.Log.Trace("OBJ peek \"%s\"", string(bb))
val, err := parser.parseObject()
if err != nil {
common.Log.Debug("ERROR Fail to read object (%s)", err)
return nil, err
}
if val == nil {
return nil, errors.New("object cannot be null")
}
// Make an indirect object around it.
io := PdfIndirectObject{}
io.ObjectNumber = int64(objNum)
io.PdfObject = val
return &io, nil
}
// LookupByNumber looks up a PdfObject by object number. Returns an error on failure.
func (parser *PdfParser) LookupByNumber(objNumber int) (PdfObject, error) {
// Outside interface for lookupByNumberWrapper. Default attempts repairs of bad xref tables.
obj, _, err := parser.lookupByNumberWrapper(objNumber, true)
return obj, err
}
// Wrapper for lookupByNumber, checks if object encrypted etc.
func (parser *PdfParser) lookupByNumberWrapper(objNumber int, attemptRepairs bool) (PdfObject, bool, error) {
obj, inObjStream, err := parser.lookupByNumber(objNumber, attemptRepairs)
if err != nil {
return nil, inObjStream, err
}
// If encrypted, decrypt it prior to returning.
// Do not attempt to decrypt objects within object streams.
if !inObjStream && parser.crypter != nil && !parser.crypter.isDecrypted(obj) {
err := parser.crypter.Decrypt(obj, 0, 0)
if err != nil {
return nil, inObjStream, err
}
}
return obj, inObjStream, nil
}
// getObjectNumber returns the object and revision number for indirect object and stream objects. An error
// is returned if type is incorrect.
func getObjectNumber(obj PdfObject) (int64, int64, error) {
if io, isIndirect := obj.(*PdfIndirectObject); isIndirect {
return io.ObjectNumber, io.GenerationNumber, nil
}
if so, isStream := obj.(*PdfObjectStream); isStream {
return so.ObjectNumber, so.GenerationNumber, nil
}
return 0, 0, errors.New("not an indirect/stream object")
}
// lookupByNumber is used by LookupByNumber.
// attemptRepairs signals whether to attempt repair if broken.
func (parser *PdfParser) lookupByNumber(objNumber int, attemptRepairs bool) (PdfObject, bool, error) {
obj, ok := parser.ObjCache[objNumber]
if ok {
common.Log.Trace("Returning cached object %d", objNumber)
return obj, false, nil
}
xref, ok := parser.xrefs.ObjectMap[objNumber]
if !ok {
// An indirect reference to an undefined object shall not be
// considered an error by a conforming reader; it shall be
// treated as a reference to the null object.
common.Log.Trace("Unable to locate object in xrefs! - Returning null object")
var nullObj PdfObjectNull
return &nullObj, false, nil
}
common.Log.Trace("Lookup obj number %d", objNumber)
if xref.XType == XrefTypeTableEntry {
common.Log.Trace("xrefobj obj num %d", xref.ObjectNumber)
common.Log.Trace("xrefobj gen %d", xref.Generation)
common.Log.Trace("xrefobj offset %d", xref.Offset)
parser.rs.Seek(xref.Offset, os.SEEK_SET)
parser.reader = bufio.NewReader(parser.rs)
obj, err := parser.ParseIndirectObject()
if err != nil {
common.Log.Debug("ERROR Failed reading xref (%s)", err)
// Offset pointing to a non-object. Try to repair the file.
if attemptRepairs {
common.Log.Debug("Attempting to repair xrefs (top down)")
xrefTable, err := parser.repairRebuildXrefsTopDown()
if err != nil {
common.Log.Debug("ERROR Failed repair (%s)", err)
return nil, false, err
}
parser.xrefs = *xrefTable
return parser.lookupByNumber(objNumber, false)
}
return nil, false, err
}
if attemptRepairs {
// Check the object number..
// If it does not match, then try to rebuild, i.e. loop through
// all the items in the xref and look each one up and correct.
realObjNum, _, _ := getObjectNumber(obj)
if int(realObjNum) != objNumber {
common.Log.Debug("Invalid xrefs: Rebuilding")
err := parser.rebuildXrefTable()
if err != nil {
return nil, false, err
}
// Empty the cache.
parser.ObjCache = objectCache{}
// Try looking up again and return.
return parser.lookupByNumberWrapper(objNumber, false)
}
}
common.Log.Trace("Returning obj")
parser.ObjCache[objNumber] = obj
return obj, false, nil
} else if xref.XType == XrefTypeObjectStream {
common.Log.Trace("xref from object stream!")
common.Log.Trace(">Load via OS!")
common.Log.Trace("Object stream available in object %d/%d", xref.OsObjNumber, xref.OsObjIndex)
if xref.OsObjNumber == objNumber {
common.Log.Debug("ERROR Circular reference!?!")
return nil, true, errors.New("xref circular reference")
}
if _, exists := parser.xrefs.ObjectMap[xref.OsObjNumber]; exists {
optr, err := parser.lookupObjectViaOS(xref.OsObjNumber, objNumber) //xref.OsObjIndex)
if err != nil {
common.Log.Debug("ERROR Returning ERR (%s)", err)
return nil, true, err
}
common.Log.Trace("<Loaded via OS")
parser.ObjCache[objNumber] = optr
if parser.crypter != nil {
// Mark as decrypted (inside object stream) for caching.
// and avoid decrypting decrypted object.
parser.crypter.decryptedObjects[optr] = true
}
return optr, true, nil
}
common.Log.Debug("?? Belongs to a non-cross referenced object ...!")
return nil, true, errors.New("os belongs to a non cross referenced object")
}
return nil, false, errors.New("unknown xref type")
}
// LookupByReference looks up a PdfObject by a reference.
func (parser *PdfParser) LookupByReference(ref PdfObjectReference) (PdfObject, error) {
common.Log.Trace("Looking up reference %s", ref.String())
return parser.LookupByNumber(int(ref.ObjectNumber))
}
// Resolve resolves a PdfObject to direct object, looking up and resolving references as needed (unlike TraceToDirect).
func (parser *PdfParser) Resolve(obj PdfObject) (PdfObject, error) {
ref, isRef := obj.(*PdfObjectReference)
if !isRef {
// Direct object already.
return obj, nil
}
bakOffset := parser.GetFileOffset()
defer func() { parser.SetFileOffset(bakOffset) }()
o, err := parser.LookupByReference(*ref)
if err != nil {
return nil, err
}
io, isInd := o.(*PdfIndirectObject)
if !isInd {
// Not indirect (Stream or null object).
return o, nil
}
o = io.PdfObject
_, isRef = o.(*PdfObjectReference)
if isRef {
return io, errors.New("multi depth trace pointer to pointer")
}
return o, nil
}
func printXrefTable(xrefTable XrefTable) {
common.Log.Debug("=X=X=X=")
common.Log.Debug("Xref table:")
i := 0
for _, xref := range xrefTable.ObjectMap {
common.Log.Debug("i+1: %d (obj num: %d gen: %d) -> %d", i+1, xref.ObjectNumber, xref.Generation, xref.Offset)
i++
}
}