another update to the pdf parser

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6463 6c8d7289-2bf4-0310-a012-ef5d649a1542
yacy · Nov 6, 2009 · 08f1cbb · 08f1cbb
1 parent 54c54fb
commit 08f1cbb
Showing 1 changed file with 10 additions and 20 deletions.
diff --git a/source/net/yacy/document/parser/pdfParser.java b/source/net/yacy/document/parser/pdfParser.java
@@ -88,31 +88,22 @@ public Set<String> supportedExtensions() {
 
     public Document parse(final DigestURI location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
 
-        PDDocument theDocument = null;
-        Writer writer = null;
-        File writerFile = null;
-
-        String docTitle = null, docSubject = null, docAuthor = null, docKeywordStr = null;
-
-        // check for interruption
-        checkInterruption();
-
-        // creating a pdf parser
+        // create a pdf parser
+        final PDDocument theDocument;
         final PDFParser parser;
-        final PDFTextStripper stripper;
         try {
             Thread.currentThread().setPriority(Thread.MIN_PRIORITY);
             parser = new PDFParser(source);
             parser.parse();
-            checkInterruption();
-            stripper = new PDFTextStripper();
             theDocument = parser.getPDDocument();
         } catch (IOException e) {
             Log.logException(e);
             throw new ParserException(e.getMessage(), location);
         } finally {
             Thread.currentThread().setPriority(Thread.NORM_PRIORITY);
         }
+
+        checkInterruption();
 
         if (theDocument.isEncrypted()) {
             try {
@@ -134,13 +125,16 @@ public Document parse(final DigestURI location, final String mimeType, final Str
 
         // extracting some metadata
         final PDDocumentInformation theDocInfo = theDocument.getDocumentInformation();            
+        String docTitle = null, docSubject = null, docAuthor = null, docKeywordStr = null;
         if (theDocInfo != null) {
             docTitle = theDocInfo.getTitle();
             docSubject = theDocInfo.getSubject();
             docAuthor = theDocInfo.getAuthor();
             docKeywordStr = theDocInfo.getKeywords();
         }            
 
+        Writer writer = null;
+        File writerFile = null;
         try {
             // creating a writer for output
             if ((this.contentLength == -1) || (this.contentLength > Idiom.MAX_KEEP_IN_MEMORY_SIZE)) {
@@ -149,13 +143,9 @@ public Document parse(final DigestURI location, final String mimeType, final Str
             } else {
                 writer = new CharBuffer(); 
             }
-            try {
-                stripper.writeText(theDocument, writer ); // may throw a NPE
-            } catch (Exception e) {
-                Log.logException(e);
-                Log.logWarning("pdfParser", e.getMessage());
-            }
-            theDocument.close(); theDocument = null;            
+            final PDFTextStripper stripper = new PDFTextStripper();
+            stripper.writeText(theDocument, writer); // may throw a NPE
+            theDocument.close();           
             writer.close();
         } catch (IOException e) {
             Log.logException(e);