Skip to content

Commit

Permalink
Insert a space when TJ string is offset by more than a threshold
Browse files Browse the repository at this point in the history
That threshold is hard-coded as -100 (is this font specific?)
  • Loading branch information
njwilson23 committed Oct 29, 2017
1 parent bdf676e commit 30ff60a
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 2 deletions.
13 changes: 11 additions & 2 deletions pdf/contentstream/contentstream.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,17 @@ func (this *ContentStreamParser) ExtractText() (string, error) {
return "", fmt.Errorf("Invalid parameter type, no array (%T)", op.Params[0])
}
for _, obj := range *paramList {
if strObj, ok := obj.(*PdfObjectString); ok {
txt += string(*strObj)
switch v := obj.(type) {
case *PdfObjectString:
txt += string(*v)
case *PdfObjectFloat:
if *v < -100 {
txt += " "
}
case *PdfObjectInteger:
if *v < -100 {
txt += " "
}
}
}
} else if inText && op.Operand == "Tj" {
Expand Down
25 changes: 25 additions & 0 deletions pdf/contentstream/contentstream_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package contentstream

import (
"testing"
)

func TestOperandTJSpacing(t *testing.T) {

content := `BT
[(are)-328(h)5(ypothesized)-328(to)-327(in\003uence)-328(the)-328(stability)-328(of)-328(the)-328(upstream)-327(glaciers,)-328(and)-328(thus)-328(of)-328(the)-328(entire)-327(ice)-328(sheet)]TJ
ET`
referenceText := "are hypothesized to in\003uence the stability of the upstream glaciers, and thus of the entire ice sheet"

cStreamParser := NewContentStreamParser(content)

text, err := cStreamParser.ExtractText()
if err != nil {
t.Error()
}

if text != referenceText {
t.Fail()
}

}

0 comments on commit 30ff60a

Please sign in to comment.