Skip to content

Commit

Permalink
*) adding an interface for customized blacklist classes
Browse files Browse the repository at this point in the history
   - now it's possible to use a customized blacklist engine
     instead of the default one
   - this can be done by configuring the property BlackLists.class
   See: http://www.yacy-forum.de/viewtopic.php?t=2108

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2397 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
theli committed Aug 12, 2006
1 parent 6d2f159 commit eee44be
Show file tree
Hide file tree
Showing 15 changed files with 45 additions and 18 deletions.
2 changes: 2 additions & 0 deletions htroot/Blacklist_p.html
Expand Up @@ -8,6 +8,8 @@
#%env/templates/header.template%#
<br><br>
<h2>Blacklist</h2>
<p>Used Blacklist engine: <span class="settingsValue">#[blacklistEngine]#</span></p>

<p>This function provides an URL filter to the proxy; any blacklisted URL is blocked
from being loaded. You can define several blacklists and activate them separately.
You may also provide your blacklist to other peers by sharing them; in return you may
Expand Down
5 changes: 3 additions & 2 deletions htroot/Blacklist_p.java
Expand Up @@ -79,6 +79,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve

String blacklistToUse = null;
serverObjects prop = new serverObjects();
prop.put("blacklistEngine", plasmaSwitchboard.urlBlacklist.getEngineInfo());

// do all post operations
if (post != null) {
Expand Down Expand Up @@ -334,7 +335,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
if (dirlist != null) {
for (int i = 0; i <= dirlist.length - 1; i++) {
prop.put(BLACKLIST + blacklistCount + "_name", dirlist[i]);
prop.put(BLACKLIST + blacklistCount + "_shared", 0);
prop.put(BLACKLIST + blacklistCount + "_selected", 0);

if (dirlist[i].equals(blacklistToUse)) { //current List
prop.put(BLACKLIST + blacklistCount + "_selected", 1);
Expand All @@ -351,7 +352,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
if (listManager.ListInListslist(BLACKLIST_SHARED, dirlist[i])) {
prop.put(BLACKLIST + blacklistCount + "_shared", 1);
} else {
prop.put(BLACKLIST + blacklistCount + "_selected", 0);
prop.put(BLACKLIST + blacklistCount + "_shared", 0);
}

int activeCount = 0;
Expand Down
2 changes: 1 addition & 1 deletion htroot/IndexControl_p.java
Expand Up @@ -63,8 +63,8 @@
import de.anomic.index.indexURL;
import de.anomic.plasma.plasmaCrawlLURL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURLPattern;
import de.anomic.plasma.plasmaWordIndex;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyClient;
Expand Down
4 changes: 2 additions & 2 deletions htroot/yacy/transferRWI.java
Expand Up @@ -54,7 +54,7 @@
import de.anomic.index.indexEntry;
import de.anomic.index.indexURLEntry;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURLPattern;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
Expand Down Expand Up @@ -99,7 +99,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
shortCacheFlush = true;
break;
}
try {Thread.sleep(100);} catch (InterruptedException e) {}
try {Thread.sleep(100);} catch (InterruptedException e) {/* */}
}
}

Expand Down
2 changes: 1 addition & 1 deletion htroot/yacy/transferURL.java
Expand Up @@ -48,7 +48,7 @@
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaCrawlLURL;
import de.anomic.plasma.plasmaURLPattern;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
Expand Down
2 changes: 1 addition & 1 deletion source/de/anomic/http/httpdProxyHandler.java
Expand Up @@ -96,7 +96,7 @@
import de.anomic.plasma.plasmaHTCache;
import de.anomic.plasma.plasmaParser;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURLPattern;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverCore;
import de.anomic.server.serverFileUtils;
import de.anomic.server.serverObjects;
Expand Down
1 change: 1 addition & 0 deletions source/de/anomic/plasma/plasmaCrawlLURL.java
Expand Up @@ -73,6 +73,7 @@
import de.anomic.kelondro.kelondroTree;
import de.anomic.kelondro.kelondroRow;
import de.anomic.plasma.plasmaHTCache;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverCodings;
import de.anomic.server.serverObjects;
import de.anomic.server.logging.serverLog;
Expand Down
1 change: 1 addition & 0 deletions source/de/anomic/plasma/plasmaCrawlStacker.java
Expand Up @@ -66,6 +66,7 @@
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroTree;
import de.anomic.plasma.plasmaCrawlEURL;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverSemaphore;
import de.anomic.server.logging.serverLog;
import de.anomic.tools.bitfield;
Expand Down
2 changes: 2 additions & 0 deletions source/de/anomic/plasma/plasmaCrawlWorker.java
Expand Up @@ -51,6 +51,8 @@
import java.net.NoRouteToHostException;
import java.net.SocketException;
import de.anomic.net.URL;
import de.anomic.plasma.urlPattern.plasmaURLPattern;

import java.net.UnknownHostException;
import java.util.Date;
import de.anomic.http.httpHeader;
Expand Down
22 changes: 21 additions & 1 deletion source/de/anomic/plasma/plasmaSwitchboard.java
Expand Up @@ -106,6 +106,7 @@ this class is also the core of the http crawling.
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.net.InetAddress;
import java.net.MalformedURLException;

Expand Down Expand Up @@ -133,6 +134,7 @@ this class is also the core of the http crawling.
import de.anomic.http.httpHeader;
import de.anomic.http.httpRemoteProxyConfig;
import de.anomic.http.httpc;
import de.anomic.http.httpdHandler;
import de.anomic.index.indexContainer;
import de.anomic.index.indexEntry;
import de.anomic.index.indexEntryAttribute;
Expand All @@ -145,6 +147,7 @@ this class is also the core of the http crawling.
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroMapTable;
import de.anomic.plasma.dbImport.dbImportManager;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverAbstractSwitch;
import de.anomic.server.serverCodings;
import de.anomic.server.serverDate;
Expand Down Expand Up @@ -303,7 +306,24 @@ public plasmaSwitchboard(String rootPath, String initPath, String configPath) {

// load the black-list / inspired by [AS]
File ulrBlackListFile = new File(getRootPath(), getConfig("listsPath", "DATA/LISTS"));
urlBlacklist = new plasmaURLPattern(ulrBlackListFile);
String blacklistClassName = getConfig("BlackLists.class", "de.anomic.plasma.urlPattern.defaultURLPattern");

this.log.logConfig("Starting blacklist engine ...");
try {
Class blacklistClass = Class.forName(blacklistClassName);
Constructor blacklistClassConstr = blacklistClass.getConstructor( new Class[] { File.class } );
urlBlacklist = (plasmaURLPattern) blacklistClassConstr.newInstance(new Object[] { ulrBlackListFile });
this.log.logFine("Used blacklist engine class: " + blacklistClassName);
this.log.logConfig("Using blacklist engine: " + urlBlacklist.getEngineInfo());
} catch (Exception e) {
this.log.logSevere("Unable to load the blacklist engine",e);
System.exit(-1);
} catch (Error e) {
this.log.logSevere("Unable to load the blacklist engine",e);
System.exit(-1);
}

this.log.logConfig("Loading backlist data ...");
listManager.switchboard = this;
listManager.listsPath = ulrBlackListFile;
listManager.reloadBlacklists();
Expand Down
1 change: 1 addition & 0 deletions source/de/anomic/plasma/plasmaWordIndex.java
Expand Up @@ -56,6 +56,7 @@
import java.util.Date;
import java.util.TreeSet;
import de.anomic.net.URL;
import de.anomic.plasma.urlPattern.plasmaURLPattern;

import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.index.indexCollectionRI;
Expand Down
10 changes: 4 additions & 6 deletions source/de/anomic/yacy/yacyClient.java
Expand Up @@ -45,7 +45,6 @@

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import de.anomic.net.URL;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
Expand All @@ -59,19 +58,18 @@
import de.anomic.index.indexRowSetContainer;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCrawlLURL;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaSearchTimingProfile;
import de.anomic.plasma.plasmaSnippetCache;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURLPattern;
import de.anomic.plasma.plasmaSearchTimingProfile;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverCodings;
import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.server.serverCodings;
import de.anomic.tools.crypt;
import de.anomic.tools.nxTools;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacyVersion;

public final class yacyClient {

Expand Down
6 changes: 3 additions & 3 deletions source/de/anomic/yacy/yacySearch.java
Expand Up @@ -44,17 +44,17 @@
package de.anomic.yacy;

import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
import java.util.HashMap;

import de.anomic.index.indexContainer;
import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.plasma.plasmaCrawlLURL;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaURLPattern;
import de.anomic.plasma.plasmaSnippetCache;
import de.anomic.plasma.plasmaSearchTimingProfile;
import de.anomic.plasma.plasmaSnippetCache;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.logging.serverLog;

public class yacySearch extends Thread {
Expand Down
2 changes: 1 addition & 1 deletion source/migration.java
Expand Up @@ -247,7 +247,7 @@ public static void migrateSwitchConfigSettings(plasmaSwitchboard sb) {
}

// migration for blacklists
if ((value = sb.getConfig("proxyBlackLists","")).length() > 0) {
if ((value = sb.getConfig("proxyBlackListsActive","")).length() > 0) {
sb.setConfig("proxy.BlackLists", value);
sb.setConfig("crawler.BlackLists", value);
sb.setConfig("dht.BlackLists", value);
Expand Down
1 change: 1 addition & 0 deletions yacy.init
Expand Up @@ -205,6 +205,7 @@ proxyYellowList=yacy.yellow
# the black-list; URLs appearing in this list will not be loaded;
# instead always a 404 is returned
# all these files will be placed in the listsPath
BlackLists.class=de.anomic.plasma.urlPattern.defaultURLPattern
BlackLists.types=proxy,crawler,dht,search
BlackLists.Shared=url.default.black

Expand Down

0 comments on commit eee44be

Please sign in to comment.