Skip to content

Commit

Permalink
Merge pull request #91 from adrg/xref-table-invalid-line
Browse files Browse the repository at this point in the history
Attempt to parse invalid beginning lines of xref table subsections.
  • Loading branch information
gunnsth committed Jun 14, 2019
2 parents 756f5e2 + 9dec1cd commit 2daa144
Showing 1 changed file with 14 additions and 0 deletions.
14 changes: 14 additions & 0 deletions core/parser.go
Expand Up @@ -768,6 +768,7 @@ func (parser *PdfParser) parseXrefTable() (*PdfObjectDictionary, error) {
curObjNum := -1
secObjects := 0
insideSubsection := false
unmatchedContent := ""
for {
parser.skipSpaces()
_, err := parser.reader.Peek(1)
Expand All @@ -781,13 +782,24 @@ func (parser *PdfParser) parseXrefTable() (*PdfObjectDictionary, error) {
}

result1 := reXrefSubsection.FindStringSubmatch(txt)
if len(result1) == 0 {
// Try to match invalid subsection beginning lines from previously
// read, unidentified lines. Covers cases in which the object number
// and the number of entries in the subsection are not on the same line.
tryMatch := len(unmatchedContent) > 0
unmatchedContent += txt + "\n"
if tryMatch {
result1 = reXrefSubsection.FindStringSubmatch(unmatchedContent)
}
}
if len(result1) == 3 {
// Match
first, _ := strconv.Atoi(result1[1])
second, _ := strconv.Atoi(result1[2])
curObjNum = first
secObjects = second
insideSubsection = true
unmatchedContent = ""
common.Log.Trace("xref subsection: first object: %d objects: %d", curObjNum, secObjects)
continue
}
Expand All @@ -801,6 +813,7 @@ func (parser *PdfParser) parseXrefTable() (*PdfObjectDictionary, error) {
first, _ := strconv.ParseInt(result2[1], 10, 64)
gen, _ := strconv.Atoi(result2[2])
third := result2[3]
unmatchedContent = ""

if strings.ToLower(third) == "n" && first > 1 {
// Object in use in the file! Load it.
Expand Down Expand Up @@ -829,6 +842,7 @@ func (parser *PdfParser) parseXrefTable() (*PdfObjectDictionary, error) {
curObjNum++
continue
}

if (len(txt) > 6) && (txt[:7] == "trailer") {
common.Log.Trace("Found trailer - %s", txt)
// Sometimes get "trailer << ...."
Expand Down

0 comments on commit 2daa144

Please sign in to comment.