Skip to content

Commit

Permalink
some more fixes for new plasmaCrawlLURL.load behavior
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2511 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
hermens committed Sep 7, 2006
1 parent 7aeadbe commit ff4362b
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 22 deletions.
8 changes: 5 additions & 3 deletions htroot/Bookmarks.java
Expand Up @@ -138,9 +138,11 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
try {
plasmaCrawlLURL.Entry urlentry = switchboard.urlPool.loadedURL.load(urlHash, null);
prop.put("mode_edit", 0); // create mode
prop.put("mode_title", urlentry.descr());
prop.put("mode_description", urlentry.descr());
prop.put("mode_url", urlentry.url());
if (urlentry != null) {
prop.put("mode_title", urlentry.descr());
prop.put("mode_description", urlentry.descr());
prop.put("mode_url", urlentry.url());
}
prop.put("mode_tags", "");
prop.put("mode_public", 0);
} catch (IOException e) {
Expand Down
46 changes: 32 additions & 14 deletions htroot/IndexControl_p.java
Expand Up @@ -213,11 +213,15 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
if (post.containsKey("urlhashdelete")) {
try {
plasmaCrawlLURL.Entry entry = switchboard.urlPool.loadedURL.load(urlhash, null);
URL url = entry.url();
urlstring = url.toNormalform();
prop.put("urlstring", "");
switchboard.urlPool.loadedURL.remove(urlhash);
prop.put("result", "Removed URL " + urlstring);
if (entry != null) {
URL url = entry.url();
urlstring = url.toNormalform();
prop.put("urlstring", "");
switchboard.urlPool.loadedURL.remove(urlhash);
prop.put("result", "Removed URL " + urlstring);
} else {
prop.put("result", "No Entry for URL hash " + urlhash + "; nothing deleted.");
}
} catch (IOException e) {
prop.put("result", "No Entry for URL hash " + urlhash + "; nothing deleted.");
}
Expand Down Expand Up @@ -263,7 +267,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
iEntry = (indexEntry) urlIter.next();
try {
lurl = switchboard.urlPool.loadedURL.load(iEntry.urlHash(), null);
if (lurl.toString() == null) {
if ((lurl == null)||(lurl.toString() == null)) {
unknownURLEntries.add(iEntry.urlHash());
urlIter.remove();
} else {
Expand Down Expand Up @@ -325,10 +329,14 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
if (post.containsKey("urlhashsearch")) {
try {
plasmaCrawlLURL.Entry entry = switchboard.urlPool.loadedURL.load(urlhash, null);
URL url = entry.url();
urlstring = url.toString();
prop.put("urlstring", urlstring);
prop.put("result", genUrlProfile(switchboard, entry, urlhash));
if (entry != null) {
URL url = entry.url();
urlstring = url.toString();
prop.put("urlstring", urlstring);
prop.put("result", genUrlProfile(switchboard, entry, urlhash));
} else {
prop.put("result", "No Entry for URL hash " + urlhash);
}
} catch (IOException e) {
prop.put("result", "No Entry for URL hash " + urlhash);
}
Expand Down Expand Up @@ -387,7 +395,12 @@ public static String genUrlProfile(plasmaSwitchboard switchboard, plasmaCrawlLUR
URL url = entry.url();
String referrer = null;
try {
referrer = switchboard.urlPool.loadedURL.load(entry.referrerHash(), null).url().toString();
plasmaCrawlLURL.Entry referrerEntry = switchboard.urlPool.loadedURL.load(entry.referrerHash(), null);
if (referrerEntry != null) {
referrer = referrerEntry.url().toString();
} else {
referrer = "<unknown>";
}
} catch (IOException e) {
referrer = "<unknown>";
}
Expand Down Expand Up @@ -444,8 +457,13 @@ public static String genUrlList(plasmaSwitchboard switchboard, String keyhash, S
xi = (indexEntry) en.next();
uh = new String[]{xi.urlHash(), Integer.toString(xi.posintext())};
try {
us = switchboard.urlPool.loadedURL.load(uh[0], null).url().toString();
tm.put(us, uh);
plasmaCrawlLURL.Entry entry = switchboard.urlPool.loadedURL.load(uh[0], null);
if (entry != null) {
us = entry.url().toString();
tm.put(us, uh);
} else {
tm.put(uh[0], uh);
}
} catch (IOException e) {
tm.put(uh[0], uh);
}
Expand Down Expand Up @@ -498,4 +516,4 @@ public static String genUrlList(plasmaSwitchboard switchboard, String keyhash, S
}
}

}
}
11 changes: 8 additions & 3 deletions htroot/yacy/crawlOrder.java
Expand Up @@ -251,9 +251,14 @@ private static Object[] stack(plasmaSwitchboard switchboard, String url, String
// send lurl-Entry as response
try {
plasmaCrawlLURL.Entry entry = switchboard.urlPool.loadedURL.load(indexURL.urlHash(url), null);
response = "double";
switchboard.urlPool.loadedURL.notifyGCrawl(entry.hash(), iam, youare);
lurl = crypt.simpleEncode(entry.toString());
if (entry != null) {
response = "double";
switchboard.urlPool.loadedURL.notifyGCrawl(entry.hash(), iam, youare);
lurl = crypt.simpleEncode(entry.toString());
} else {
response = "rejected";
lurl = "";
}
} catch (IOException e) {
response = "rejected";
lurl = "";
Expand Down
2 changes: 2 additions & 0 deletions source/de/anomic/plasma/plasmaSwitchboard.java
Expand Up @@ -2159,6 +2159,8 @@ public int removeAllUrlReferences(String urlhash, boolean fetchOnline) {
// determine the url string
try {
plasmaCrawlLURL.Entry entry = urlPool.loadedURL.load(urlhash, null);
if (entry == null)
return 0;
URL url = entry.url();
if (url == null)
return 0;
Expand Down
9 changes: 7 additions & 2 deletions source/de/anomic/plasma/plasmaWordIndex.java
Expand Up @@ -692,8 +692,13 @@ public void run() {
// System.out.println("Wordhash: "+wordHash+" UrlHash:
// "+entry.getUrlHash());
try {
url = lurl.load(entry.urlHash(), null).url();
if ((url == null) || (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_CRAWLER, url) == true)) {
plasmaCrawlLURL.Entry lurlEntry = lurl.load(entry.urlHash(), null);
if (lurlEntry != null) {
url = lurlEntry.url();
if ((url == null) || (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_CRAWLER, url) == true)) {
urlHashs.add(entry.urlHash());
}
} else {
urlHashs.add(entry.urlHash());
}
} catch (IOException e) {
Expand Down

0 comments on commit ff4362b

Please sign in to comment.