Skip to content

Commit

Permalink
some patches to get the torrent parser working
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6551 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Jan 7, 2010
1 parent 75dfe40 commit dff4f95
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 19 deletions.
2 changes: 1 addition & 1 deletion defaults/yacy.init
Expand Up @@ -127,7 +127,7 @@ update.onlySignedFiles = 1
# a peer can be re-started periodically
# restart.process can be either 'off' (no automatic restart) or 'time' (time- rule-based, see below)
restart.process = off
# the restart.cycle is the number of hours that must pass bevore a restart is done
# the restart.cycle is the number of hours that must pass before a restart is done
restart.cycle = 20
# the restart.hour is a pattern that must match with the hour string (two-digit, 24h)
# when the restart should be performed
Expand Down
10 changes: 6 additions & 4 deletions source/de/anomic/crawler/retrieval/Response.java
Expand Up @@ -654,7 +654,7 @@ public final String shallIndexCacheForCrawler() {

// check profile
if (!profile().indexText() && !profile().indexMedia()) {
return "indexing not allowed - indexText and indexMedia not set (for crawler = " + profile.name()+ ")";
return "indexing not allowed - indexText and indexMedia not set (for crawler = " + profile.name() + ")";
}

// -CGI access in request
Expand All @@ -670,17 +670,19 @@ public final String shallIndexCacheForCrawler() {
// -ranges in request
// we checked that in shallStoreCache

// check if pictures can be indexed
// check if document can be indexed
if (responseHeader != null) {
final String mimeType = responseHeader.mime();
String parserError = TextParser.supportsMime(mimeType);
if (parserError != null) { return "Media_Content, parser error: " + parserError; }
if (parserError != null && TextParser.supportsExtension(url()) != null) return "no parser available: " + parserError;
}
/*
if (Classification.isMediaExtension(url().getFileExtension()) &&
!Classification.isImageExtension((url().getFileExtension()))) {
return "Media_Content_(forbidden)";
}

*/

// -if-modified-since in request
// if the page is fresh at the very moment we can index it
// -> this does not apply for the crawler
Expand Down
2 changes: 1 addition & 1 deletion source/de/anomic/search/Switchboard.java
Expand Up @@ -1200,7 +1200,7 @@ public String toIndexer(final Response response) {
if (log.isFine()) log.logFine("deQueue: not indexed any word in URL " + response.url() + "; cause: " + noIndexReason);
addURLtoErrorDB(response.url(), (referrerURL == null) ? "" : referrerURL.hash(), response.initiator(), response.name(), noIndexReason);
// finish this entry
return "not indexed any word in URL " + response.url() + "; cause: " + noIndexReason;
return "not allowed: " + noIndexReason;
}

// put document into the concurrent processing queue
Expand Down
36 changes: 36 additions & 0 deletions source/net/yacy/ai/example/testorder.java
@@ -0,0 +1,36 @@
package net.yacy.ai.example;

import java.util.Random;
import java.util.concurrent.PriorityBlockingQueue;

public class testorder implements Comparable<testorder> {

public int x;
public testorder(int x) {
this.x = x;
}
public String toString() {
return Integer.toString(this.x);
}

public int compareTo(testorder o) {
if (this.x > o.x) return 1;
if (this.x < o.x) return -1;
return 0;
}

public static void main(String[] args) {
PriorityBlockingQueue<testorder> q = new PriorityBlockingQueue<testorder>();
Random r = new Random();
for (int i = 0; i < 10; i++) {
q.add(new testorder(r.nextInt(20)));
}
while (!q.isEmpty())
try {
System.out.println(q.take().toString());
} catch (InterruptedException e) {

e.printStackTrace();
}
}
}
7 changes: 2 additions & 5 deletions source/net/yacy/document/Document.java
Expand Up @@ -391,13 +391,10 @@ private synchronized void resortLinks() {
else if (Classification.isAudioExtension(ext)) audiolinks.put(url, entry.getValue());
else if (Classification.isVideoExtension(ext)) videolinks.put(url, entry.getValue());
else if (Classification.isApplicationExtension(ext)) applinks.put(url, entry.getValue());
} else {
hyperlinks.put(url, entry.getValue());
}
} else {
// a path to a directory
hyperlinks.put(url, entry.getValue());
}
// in any case we consider this as a link and let the parser decide if that link can be followed
hyperlinks.put(url, entry.getValue());
}
}

Expand Down
28 changes: 20 additions & 8 deletions source/net/yacy/document/parser/torrentParser.java
Expand Up @@ -87,16 +87,28 @@ public Document parse(DigestURI location, String mimeType, String charset, Input
if (bo == null) throw new ParserException("BDecoder.parse returned null", location);
if (bo.getType() != BType.dictionary) throw new ParserException("BDecoder object is not a dictionary", location);
Map<String, BObject> map = bo.getMap();
String comment = map.get("comment").getString();
BObject commento = map.get("comment");
String comment = (commento == null) ? "" : commento.getString();
//Date creation = new Date(map.get("creation date").getInteger());
Map<String, BObject> info = map.get("info").getMap();
List<BObject> filelist = info.get("files").getList();
StringBuilder filenames = new StringBuilder(40 * filelist.size());
for (BObject fo: filelist) {
List<BObject> l = fo.getMap().get("path").getList(); // one file may have several names
for (BObject fl: l) filenames.append(fl.toString()).append(" ");
BObject infoo = map.get("info");
StringBuilder filenames = new StringBuilder();
String name = "";
if (infoo != null) {
Map<String, BObject> info = infoo.getMap();
BObject fileso = info.get("files");
if (fileso != null) {
List<BObject> filelist = fileso.getList();
for (BObject fo: filelist) {
BObject patho = fo.getMap().get("path");
if (patho != null) {
List<BObject> l = patho.getList(); // one file may have several names
for (BObject fl: l) filenames.append(fl.toString()).append(" ");
}
}
}
BObject nameo = info.get("name");
if (nameo != null) name = nameo.getString();
}
String name = info.get("name").getString();
try {
return new Document(
location,
Expand Down

0 comments on commit dff4f95

Please sign in to comment.