Skip to content

Commit

Permalink
* add failsafe mechanisme to domainlist retrieval
Browse files Browse the repository at this point in the history
  domainlist is saved locally, if none of the given urls in network.unit.domainlist
  could be retrieved, the file from the last boot is used instead

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7289 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
f1ori committed Nov 2, 2010
1 parent 70c9560 commit acd93b1
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 18 deletions.
35 changes: 23 additions & 12 deletions source/de/anomic/search/Switchboard.java
Expand Up @@ -345,6 +345,20 @@ public Switchboard(final File dataPath, final File appPath, final String initPat
// set the default segment names
setDefaultSegments();

// load domainList
try {
this.domainList = null;
if(!getConfig("network.unit.domainlist", "").equals("")) {
Reader r = getConfigFileFromWebOrLocally(getConfig("network.unit.domainlist", ""), getAppPath().getAbsolutePath(), new File(this.networkRoot, "domainlist.txt"));
this.domainList = new FilterEngine();
this.domainList.loadList(new BufferedReader(r), null);
}
} catch (FileNotFoundException e) {
log.logSevere("CONFIG: domainlist not found: " + e.getMessage());
} catch (IOException e) {
log.logSevere("CONFIG: error while retrieving domainlist: " + e.getMessage());
}

// create a crawler
crawler = new CrawlSwitchboard(
networkName,
Expand Down Expand Up @@ -824,15 +838,7 @@ public void overwriteNetworkDefinition() {
}
*/
MultiProtocolURI.addBotInfo(getConfig(SwitchboardConstants.NETWORK_NAME, "") + (isRobinsonMode() ? "-" : "/") + getConfig("network.unit.domain", "global"));

try {
this.domainList = null;
Reader r = getConfigFileFromWebOrLocally(getConfig("network.unit.domainlist", ""), getAppPath().getAbsolutePath());
this.domainList = new FilterEngine();
this.domainList.loadList(new BufferedReader(r), null);
} catch (FileNotFoundException e) {
} catch (IOException e) {
}

}

public void switchNetwork(final String networkDefinition) {
Expand Down Expand Up @@ -917,13 +923,18 @@ public void switchNetwork(final String networkDefinition) {
this.webStructure = new WebStructureGraph(log, rankingPath, "LOCAL/010_cr/", getConfig("CRDist0Path", CRDistribution.CR_OWN), new File(queuesRoot, "webStructure.map"));


// load domainList
try {
this.domainList = null;
Reader r = getConfigFileFromWebOrLocally(getConfig("network.unit.domainList", ""), getAppPath().getAbsolutePath());
this.domainList = new FilterEngine();
this.domainList.loadList(new BufferedReader(r), null);
if(!getConfig("network.unit.domainlist", "").equals("")) {
Reader r = getConfigFileFromWebOrLocally(getConfig("network.unit.domainlist", ""), getAppPath().getAbsolutePath(), new File(this.networkRoot, "domainlist.txt"));
this.domainList = new FilterEngine();
this.domainList.loadList(new BufferedReader(r), null);
}
} catch (FileNotFoundException e) {
log.logSevere("CONFIG: domainlist not found: " + e.getMessage());
} catch (IOException e) {
log.logSevere("CONFIG: error while retrieving domainlist: " + e.getMessage());
}

this.crawlStacker = new CrawlStacker(
Expand Down
19 changes: 14 additions & 5 deletions source/de/anomic/server/serverSwitch.java
Expand Up @@ -22,10 +22,10 @@
package de.anomic.server;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
Expand All @@ -43,13 +43,11 @@
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.workflow.BusyThread;
import net.yacy.kelondro.workflow.WorkflowThread;

import de.anomic.search.Switchboard;
import de.anomic.server.serverAccessTracker.Track;
import de.anomic.server.serverCore.Session;

Expand Down Expand Up @@ -573,8 +571,9 @@ public Iterator<String> accessHosts() {
* file may be an url or a filename with path relative to rootPath parameter
* @param file url or filename
* @param rootPath searchpath for file
* @param file file to use when remote fetching fails (null if unused)
*/
public Reader getConfigFileFromWebOrLocally(String uri, String rootPath) throws IOException, FileNotFoundException {
public Reader getConfigFileFromWebOrLocally(String uri, String rootPath, File file) throws IOException, FileNotFoundException {
if(uri.startsWith("http://") || uri.startsWith("https://")) {
String[] uris = uri.split(",");
for (String netdef: uris) {
Expand All @@ -586,12 +585,22 @@ public Reader getConfigFileFromWebOrLocally(String uri, String rootPath) throws
client.setHeader(reqHeader.entrySet());
byte[] data = client.GETbytes(uri);
if (data == null || data.length == 0) continue;
// save locally in case next fetch fails
if (file != null) {
FileOutputStream f = new FileOutputStream(file);
f.write(data);
f.close();
}
return new InputStreamReader(new BufferedInputStream(new ByteArrayInputStream(data)));
} catch (final Exception e) {
continue;
}
}
throw new FileNotFoundException();
if (file != null && file.exists()) {
return new FileReader(file);
} else {
throw new FileNotFoundException();
}
} else {
final File f = (uri.length() > 0 && uri.charAt(0) == '/') ? new File(uri) : new File(rootPath, uri);
if (f.exists()) {
Expand Down
2 changes: 1 addition & 1 deletion source/net/yacy/cora/protocol/http/HTTPClient.java
Expand Up @@ -447,7 +447,7 @@ private byte[] getContentBytes(final HttpUriRequest httpUriRequest, final long m
byte[] content = null;
try {
execute(httpUriRequest);
if (httpResponse == null) return null;
if (httpResponse == null || httpResponse.getStatusLine().getStatusCode() != 200) return null;
// get the response body
final HttpEntity httpEntity = httpResponse.getEntity();
if (httpEntity != null) {
Expand Down

0 comments on commit acd93b1

Please sign in to comment.