Skip to content

Commit

Permalink
*) httpdFileHandler.java:
Browse files Browse the repository at this point in the history
   no stacktrace will be printed into log file for "Connection timed out" Errors now
   See: http://www.yacy-forum.de/viewtopic.php?p=6381

*) plasmaCrawlWorker.java:
   If a "Read timed out" error occurs while crawling a site, the failed crawl will be
   retried.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@493 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
theli committed Aug 4, 2005
1 parent bae369a commit 1d83d7e
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 29 deletions.
18 changes: 12 additions & 6 deletions source/de/anomic/http/httpdFileHandler.java
Expand Up @@ -505,29 +505,35 @@ public void doResponse(Properties conProp, httpHeader requestHeader, OutputStrea
StringBuffer errorMessage = new StringBuffer();
Exception errorExc = null;

if (e instanceof InterruptedException) {
String errorMsg = e.getMessage();
if (
(e instanceof InterruptedException) ||
((errorMsg != null) && (errorMsg.startsWith("Socket closed")) && (Thread.currentThread().isInterrupted()))
) {
errorMessage.append("Interruption detected while processing query.");
httpStatusCode = 503;
} else {
String errorMsg = e.getMessage();
if ((errorMsg != null) &&
(
errorMsg.startsWith("Broken pipe") ||
errorMsg.startsWith("Connection reset") ||
errorMsg.startsWith("Software caused connection abort")
errorMsg.startsWith("Software caused connection abort")
)) {
// client closed the connection, so we just end silently
errorMessage.append("Client unexpectedly closed connection while processing query.");
} else if ((errorMsg != null) && (errorMsg.startsWith("Connection timed out"))) {
errorMessage.append("Connection timed out.");
} else {
errorMessage.append("Unexpected error while processing query.");
httpStatusCode = 500;
errorExc = e;
}
}

errorMessage.append("\nQuery: ").append(path)
.append("\nClient: ").append(conProp.getProperty(httpd.CONNECTION_PROP_CLIENTIP,"unknown"))
.append("\nReason: ").append(e.toString());
errorMessage.append("\nSession: ").append(Thread.currentThread().getName())
.append("\nQuery: ").append(path)
.append("\nClient: ").append(conProp.getProperty(httpd.CONNECTION_PROP_CLIENTIP,"unknown"))
.append("\nReason: ").append(e.toString());

if (!conProp.containsKey(httpd.CONNECTION_PROP_PROXY_RESPOND_HEADER)) {
// sending back an error message to the client
Expand Down
63 changes: 40 additions & 23 deletions source/de/anomic/plasma/plasmaCrawlWorker.java
Expand Up @@ -60,7 +60,8 @@
import de.anomic.server.logging.serverMiniLogFormatter;

public final class plasmaCrawlWorker extends Thread {


private static final int DEFAULT_CRAWLING_RETRY_COUNT = 5;
private static final String threadBaseName = "CrawlerWorker";

private final CrawlerPool myPool;
Expand Down Expand Up @@ -260,7 +261,7 @@ public static void load(
remoteProxyUse,
cacheManager,
log,
0,
DEFAULT_CRAWLING_RETRY_COUNT,
true
);
}
Expand All @@ -278,10 +279,12 @@ private static void load(
boolean remoteProxyUse,
plasmaHTCache cacheManager,
serverLog log,
int redirectionCount,
int crawlingRetryCount,
boolean useContentEncodingGzip
) throws IOException {
if (url == null) return;
if (crawlingRetryCount < 0) return;

Date requestDate = new Date(); // remember the time...
String host = url.getHost();
String path = url.getPath();
Expand Down Expand Up @@ -358,7 +361,7 @@ private static void load(
log.logError("CRAWLER LOADER ERROR1: with URL=" + url.toString() + ": " + e.toString());
}
} else if (res.status.startsWith("30")) {
if (redirectionCount < 5) {
if (crawlingRetryCount < 0) {
if (res.responseHeader.containsKey(httpHeader.LOCATION)) {
// generating the new url
URL redirectionUrl = new URL(url, (String) res.responseHeader.get(httpHeader.LOCATION));
Expand All @@ -382,7 +385,7 @@ private static void load(
remoteProxyUse,
cacheManager,
log,
++redirectionCount,
--crawlingRetryCount,
useContentEncodingGzip
);
}
Expand All @@ -396,24 +399,38 @@ private static void load(
}
if (remote != null) remote.close();
} catch (Exception e) {
if ((e.getMessage() != null) && (e.getMessage().indexOf("Corrupt GZIP trailer") >= 0)) {
log.logWarning("Problems detected while receiving gzip encoded content from '" + url.toString() +
"'. Retrying request without using gzip content encoding.");
load(url,
name,
referer,
initiator,
depth,
profile,
socketTimeout,
remoteProxyHost,
remoteProxyPort,
remoteProxyUse,
cacheManager,
log,
0,
false
);
boolean retryCrawling = false;
String errorMsg = e.getMessage();
if (errorMsg != null) {
if (errorMsg.indexOf("Corrupt GZIP trailer") >= 0) {
log.logWarning("Problems detected while receiving gzip encoded content from '" + url.toString() +
"'. Retrying request without using gzip content encoding.");
retryCrawling = true;
} else if (errorMsg.indexOf("Socket time-out: Read timed out") >= 0) {
log.logWarning("Read timeout while receiving content from '" + url.toString() +
"'. Retrying request.");
retryCrawling = true;
}

if (retryCrawling) {
load(url,
name,
referer,
initiator,
depth,
profile,
socketTimeout,
remoteProxyHost,
remoteProxyPort,
remoteProxyUse,
cacheManager,
log,
0,
false
);
} else {
log.logError("CRAWLER LOADER ERROR2 with URL=" + url.toString() + ": " + e.toString(),e);
}
} else {
// this may happen if the targeted host does not exist or anything with the
// remote server was wrong.
Expand Down

0 comments on commit 1d83d7e

Please sign in to comment.