Skip to content

Commit

Permalink
*) next step of restructuring for new crawlers
Browse files Browse the repository at this point in the history
   - renaming of http specific crawler settings

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2480 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
theli committed Sep 4, 2006
1 parent e3f0136 commit fce9e77
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 12 deletions.
5 changes: 5 additions & 0 deletions source/de/anomic/plasma/crawler/AbstractCrawlWorker.java
Expand Up @@ -99,6 +99,11 @@ public plasmaCrawlLoaderMessage getMessage() {

public abstract void close();

public long getDuration() {
final long startDate = this.startdate;
return (startDate != 0) ? System.currentTimeMillis() - startDate : 0;
}

public void run() {
this.running = true;

Expand Down
11 changes: 3 additions & 8 deletions source/de/anomic/plasma/crawler/http/CrawlWorker.java
Expand Up @@ -108,11 +108,6 @@ public CrawlWorker(
this.protocol = "http";
}

public long getDuration() {
final long startDate = this.startdate;
return (startDate != 0) ? System.currentTimeMillis() - startDate : 0;
}

public void init() {
// refreshing timeout value
if (this.theMsg.timeout < 0) {
Expand All @@ -122,9 +117,9 @@ public void init() {
}

// some http header values
this.acceptEncoding = this.sb.getConfig("crawler.acceptEncoding", "gzip,deflate");
this.acceptLanguage = this.sb.getConfig("crawler.acceptLanguage","en-us,en;q=0.5");
this.acceptCharset = this.sb.getConfig("crawler.acceptCharset","ISO-8859-1,utf-8;q=0.7,*;q=0.7");
this.acceptEncoding = this.sb.getConfig("crawler.http.acceptEncoding", "gzip,deflate");
this.acceptLanguage = this.sb.getConfig("crawler.http.acceptLanguage","en-us,en;q=0.5");
this.acceptCharset = this.sb.getConfig("crawler.http.acceptCharset","ISO-8859-1,utf-8;q=0.7,*;q=0.7");

// getting the http proxy config
this.remoteProxyConfig = this.sb.remoteProxyConfig;
Expand Down
8 changes: 7 additions & 1 deletion source/migration.java
Expand Up @@ -255,6 +255,12 @@ public static void migrateSwitchConfigSettings(plasmaSwitchboard sb) {

sb.setConfig("BlackLists.Shared",sb.getConfig("proxyBlackListsShared",""));
}

// migration of http specific crawler settings
if ((value = sb.getConfig("crawler.acceptLanguage","")).length() > 0) {
sb.setConfig("crawler.http.acceptEncoding", sb.getConfig("crawler.acceptEncoding","gzip,deflate"));
sb.setConfig("crawler.http.acceptLanguage", sb.getConfig("crawler.acceptLanguage","en-us,en;q=0.5"));
sb.setConfig("crawler.http.acceptCharset", sb.getConfig("crawler.acceptCharset","ISO-8859-1,utf-8;q=0.7,*;q=0.7"));
}
}

}
8 changes: 5 additions & 3 deletions yacy.init
Expand Up @@ -631,11 +631,13 @@ msgForwardingTo=root@localhost
onlineCautionDelay=30000

# Some configuration values for the crawler
crawler.acceptEncoding=gzip,deflate
crawler.acceptLanguage=en-us,en;q=0.5
crawler.acceptCharset=ISO-8859-1,utf-8;q=0.7,*;q=0.7
crawler.clientTimeout=9000

# http crawler specific settings
crawler.http.acceptEncoding=gzip,deflate
crawler.http.acceptLanguage=en-us,en;q=0.5
crawler.http.acceptCharset=ISO-8859-1,utf-8;q=0.7,*;q=0.7

# maximum number of crawler threads
crawler.MaxActiveThreads = 10
crawler.MaxIdleThreads = 7
Expand Down

0 comments on commit fce9e77

Please sign in to comment.