Skip to content

Commit

Permalink
adopted isListed to discussed new behavior as discussed (url, getFile)
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1940 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Mar 20, 2006
1 parent 59fc55e commit 1f4412a
Show file tree
Hide file tree
Showing 8 changed files with 12 additions and 8 deletions.
2 changes: 1 addition & 1 deletion htroot/IndexControl_p.java
Expand Up @@ -460,7 +460,7 @@ public static String genUrlList(plasmaSwitchboard switchboard, String keyhash, S

} else {
url = new URL(us);
if (plasmaSwitchboard.urlBlacklist.isListed(url.getHost().toLowerCase(), url.getPath())) {
if (plasmaSwitchboard.urlBlacklist.isListed(url)) {
result.append("<input type=\"checkbox\" name=\"urlhx").append(i++).append("\" checked value=\"").append(uh[0]).append("\" align=\"top\">");
} else {
result.append("<input type=\"checkbox\" name=\"urlhx").append(i++).append("\" value=\"").append(uh[0]).append("\" align=\"top\">");
Expand Down
2 changes: 1 addition & 1 deletion htroot/yacy/transferURL.java
Expand Up @@ -96,7 +96,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
lEntry = sb.urlPool.loadedURL.newEntry(urls, true);
if ((lEntry != null) && (lEntry.url() != null)) {
if ((blockBlacklist) &&
(plasmaSwitchboard.urlBlacklist.isListed( lEntry.url().getHost().toLowerCase(), lEntry.url().getPath()))) {
(plasmaSwitchboard.urlBlacklist.isListed(lEntry.url()))) {
int deleted = sb.wordIndex.tryRemoveURLs(lEntry.hash());
yacyCore.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url() + "' from peer " + otherPeerName + "; deleted " + deleted + " URL entries from RWIs");
lEntry = null;
Expand Down
2 changes: 1 addition & 1 deletion source/de/anomic/plasma/plasmaCrawlLURL.java
Expand Up @@ -826,7 +826,7 @@ public void run() {

plasmaCrawlLURL.Entry entry = (plasmaCrawlLURL.Entry) eiter.next();
totalSearchedUrls++;
if (plasmaSwitchboard.urlBlacklist.isListed(entry.url().getHost().toLowerCase(),entry.url().getPath())==true) {
if (plasmaSwitchboard.urlBlacklist.isListed(entry.url())==true) {
lastBlacklistedUrl = entry.url().toString();
lastBlacklistedHash = entry.hash();
serverLog.logFine("URLDBCLEANER", ++blacklistedUrls + " blacklisted (" + ((double)blacklistedUrls/totalSearchedUrls)*100 + "%): " + entry.hash() + " " + entry.url());
Expand Down
3 changes: 1 addition & 2 deletions source/de/anomic/plasma/plasmaCrawlStacker.java
Expand Up @@ -271,8 +271,7 @@ public String stackCrawl(String nexturlString, String referrerString, String ini
}

// check blacklist
String hostlow = nexturl.getHost().toLowerCase();
if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, nexturl.getPath())) {
if (plasmaSwitchboard.urlBlacklist.isListed(nexturl)) {
reason = "denied_(url_in_blacklist)";
this.log.logFine("URL '" + nexturlString + "' is in blacklist. " +
"Stack processing time: " + (System.currentTimeMillis()-startTime));
Expand Down
2 changes: 1 addition & 1 deletion source/de/anomic/plasma/plasmaSwitchboard.java
Expand Up @@ -187,7 +187,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
public plasmaCrawlStacker sbStackCrawlThread;
public messageBoard messageDB;
public wikiBoard wikiDB;
public blogBoard blogDB;
public blogBoard blogDB;
public static plasmaCrawlRobotsTxt robots;
public plasmaCrawlProfile profiles;
public plasmaCrawlProfile.entry defaultProxyProfile;
Expand Down
5 changes: 5 additions & 0 deletions source/de/anomic/plasma/plasmaURLPattern.java
Expand Up @@ -42,6 +42,7 @@
package de.anomic.plasma;

import java.io.File;
import java.net.URL;
import java.util.HashMap;
import de.anomic.kelondro.kelondroMSetTools;

Expand Down Expand Up @@ -84,6 +85,10 @@ public void add(String host, String path) {
hostpaths.put(host.toLowerCase(), path);
}

public boolean isListed(URL url) {
return isListed(url.getHost().toLowerCase(), url.getFile());
}

public boolean isListed(String hostlow, String path) {
if (path.length() > 0 && path.charAt(0) == '/') path = path.substring(1);
String pp = ""; // path-pattern
Expand Down
2 changes: 1 addition & 1 deletion source/de/anomic/plasma/plasmaWordIndex.java
Expand Up @@ -559,7 +559,7 @@ public void run() {
// "+entry.getUrlHash());
try {
url = lurl.getEntry(entry.getUrlHash(), null).url();
if ((url == null) || (plasmaSwitchboard.urlBlacklist.isListed(url.getHost().toLowerCase(), url.getPath()) == true)) {
if ((url == null) || (plasmaSwitchboard.urlBlacklist.isListed(url) == true)) {
urlHashs.add(entry.getUrlHash());
}
} catch (IOException e) {
Expand Down
2 changes: 1 addition & 1 deletion source/de/anomic/yacy/yacyClient.java
Expand Up @@ -465,7 +465,7 @@ public static int search(
for (int n = 0; n < results; n++) {
// get one single search result
urlEntry = urlManager.newEntry((String) result.get("resource" + n), true);
if (urlEntry != null && blacklist.isListed(urlEntry.url().getHost().toLowerCase(), urlEntry.url().getPath())) { continue; } // block with backlist
if (urlEntry != null && blacklist.isListed(urlEntry.url())) { continue; } // block with backlist
urlEntry.store();
int urlLength = urlEntry.url().toString().length();
int urlComps = htmlFilterContentScraper.urlComps(urlEntry.url().toString()).length;
Expand Down

0 comments on commit 1f4412a

Please sign in to comment.