some patches to get the torrent parser working

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6551 6c8d7289-2bf4-0310-a012-ef5d649a1542
yacy · Jan 7, 2010 · dff4f95 · dff4f95
1 parent 75dfe40
commit dff4f95
Show file tree

Hide file tree

Showing 6 changed files with 66 additions and 19 deletions.
diff --git a/defaults/yacy.init b/defaults/yacy.init
@@ -127,7 +127,7 @@ update.onlySignedFiles = 1
 # a peer can be re-started periodically
 # restart.process can be either 'off' (no automatic restart) or 'time' (time- rule-based, see below)
 restart.process = off
-# the restart.cycle is the number of hours that must pass bevore a restart is done
+# the restart.cycle is the number of hours that must pass before a restart is done
 restart.cycle = 20
 # the restart.hour is a pattern that must match with the hour string (two-digit, 24h)
 # when the restart should be performed

diff --git a/source/de/anomic/crawler/retrieval/Response.java b/source/de/anomic/crawler/retrieval/Response.java
@@ -654,7 +654,7 @@ public final String shallIndexCacheForCrawler() {
 
         // check profile
         if (!profile().indexText() && !profile().indexMedia()) {
-            return "indexing not allowed - indexText and indexMedia not set (for crawler = " + profile.name()+ ")";
+            return "indexing not allowed - indexText and indexMedia not set (for crawler = " + profile.name() + ")";
         }
 
         // -CGI access in request
@@ -670,17 +670,19 @@ public final String shallIndexCacheForCrawler() {
         // -ranges in request
         // we checked that in shallStoreCache
 
-        // check if pictures can be indexed
+        // check if document can be indexed
         if (responseHeader != null) {
             final String mimeType = responseHeader.mime();
             String parserError = TextParser.supportsMime(mimeType);
-            if (parserError != null) { return "Media_Content, parser error: " + parserError; }
+            if (parserError != null && TextParser.supportsExtension(url()) != null)  return "no parser available: " + parserError;
         }
+        /*
         if (Classification.isMediaExtension(url().getFileExtension()) &&
            !Classification.isImageExtension((url().getFileExtension()))) {
             return "Media_Content_(forbidden)";
         }
-
+         */
+
         // -if-modified-since in request
         // if the page is fresh at the very moment we can index it
         // -> this does not apply for the crawler

diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java
@@ -1200,7 +1200,7 @@ public String toIndexer(final Response response) {
             if (log.isFine()) log.logFine("deQueue: not indexed any word in URL " + response.url() + "; cause: " + noIndexReason);
             addURLtoErrorDB(response.url(), (referrerURL == null) ? "" : referrerURL.hash(), response.initiator(), response.name(), noIndexReason);
             // finish this entry
-            return "not indexed any word in URL " + response.url() + "; cause: " + noIndexReason;
+            return "not allowed: " + noIndexReason;
         }
 
         // put document into the concurrent processing queue

diff --git a/source/net/yacy/ai/example/testorder.java b/source/net/yacy/ai/example/testorder.java
@@ -0,0 +1,36 @@
+package net.yacy.ai.example;
+
+import java.util.Random;
+import java.util.concurrent.PriorityBlockingQueue;
+
+public class testorder implements Comparable<testorder> {
+
+    public int x;
+    public testorder(int x) {
+        this.x = x;
+    }
+    public String toString() {
+        return Integer.toString(this.x);
+    }
+
+    public int compareTo(testorder o) {
+        if (this.x > o.x) return 1;
+        if (this.x < o.x) return -1;
+        return 0;
+    }
+
+    public static void main(String[] args) {
+        PriorityBlockingQueue<testorder> q = new PriorityBlockingQueue<testorder>();
+        Random r = new Random();
+        for (int i = 0; i < 10; i++) {
+            q.add(new testorder(r.nextInt(20)));
+        }
+        while (!q.isEmpty())
+            try {
+                System.out.println(q.take().toString());
+            } catch (InterruptedException e) {
+
+                e.printStackTrace();
+            }
+    }
+}
diff --git a/source/net/yacy/document/Document.java b/source/net/yacy/document/Document.java
@@ -391,13 +391,10 @@ private synchronized void resortLinks() {
                         else if (Classification.isAudioExtension(ext)) audiolinks.put(url, entry.getValue());
                         else if (Classification.isVideoExtension(ext)) videolinks.put(url, entry.getValue());
                         else if (Classification.isApplicationExtension(ext)) applinks.put(url, entry.getValue());
-                    } else {
-                        hyperlinks.put(url, entry.getValue());
                     }
-                } else {
-                    // a path to a directory
-                    hyperlinks.put(url, entry.getValue());
                 }
+                // in any case we consider this as a link and let the parser decide if that link can be followed
+                hyperlinks.put(url, entry.getValue());
             }
         }
 

diff --git a/source/net/yacy/document/parser/torrentParser.java b/source/net/yacy/document/parser/torrentParser.java
@@ -87,16 +87,28 @@ public Document parse(DigestURI location, String mimeType, String charset, Input
         if (bo == null) throw new ParserException("BDecoder.parse returned null", location);
         if (bo.getType() != BType.dictionary) throw new ParserException("BDecoder object is not a dictionary", location);
         Map<String, BObject> map = bo.getMap();
-        String comment = map.get("comment").getString();
+        BObject commento = map.get("comment");
+        String comment = (commento == null) ? "" : commento.getString();
         //Date creation = new Date(map.get("creation date").getInteger());
-        Map<String, BObject> info = map.get("info").getMap();
-        List<BObject> filelist = info.get("files").getList();
-        StringBuilder filenames = new StringBuilder(40 * filelist.size());
-        for (BObject fo: filelist) {
-            List<BObject> l = fo.getMap().get("path").getList(); // one file may have several names
-            for (BObject fl: l) filenames.append(fl.toString()).append(" ");
+        BObject infoo = map.get("info");
+        StringBuilder filenames = new StringBuilder();
+        String name = "";
+        if (infoo != null) {
+            Map<String, BObject> info = infoo.getMap();
+            BObject fileso = info.get("files");
+            if (fileso != null) {
+                List<BObject> filelist = fileso.getList();
+                for (BObject fo: filelist) {
+                    BObject patho = fo.getMap().get("path");
+                    if (patho != null) {
+                        List<BObject> l = patho.getList(); // one file may have several names
+                        for (BObject fl: l) filenames.append(fl.toString()).append(" ");
+                    }
+                }
+            }
+            BObject nameo = info.get("name");
+            if (nameo != null) name = nameo.getString();
         }
-        String name = info.get("name").getString();
         try {
             return new Document(
                     location,