Skip to content

Commit

Permalink
fixed a bug in snippet fetch strategy: cache only does not help if re…
Browse files Browse the repository at this point in the history
…source can only be found in web

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6930 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Jun 18, 2010
1 parent fbf021b commit 73f03e0
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 6 deletions.
3 changes: 3 additions & 0 deletions source/de/anomic/crawler/CrawlProfile.java
Expand Up @@ -396,6 +396,9 @@ public CacheStrategy cacheStrategy() {
return CacheStrategy.IFFRESH;
}
}
public void setCacheStrategy(CacheStrategy newStrategy) {
mem.put(CACHE_STRAGEGY, newStrategy.toString());
}
public long recrawlIfOlder() {
// returns a long (millis) that is the minimum age that
// an entry must have to be re-crawled
Expand Down
5 changes: 4 additions & 1 deletion source/de/anomic/crawler/CrawlSwitchboard.java
Expand Up @@ -30,6 +30,8 @@
import java.io.IOException;
import java.util.Iterator;

import de.anomic.crawler.CrawlProfile.CacheStrategy;

import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.kelondroException;
Expand Down Expand Up @@ -183,8 +185,9 @@ private void initActiveCrawlProfiles() {
if (this.defaultTextSnippetGlobalProfile == null) {
// generate new default entry for snippet fetch and optional crawling
defaultTextSnippetGlobalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0,
this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE), -1, -1, true, true, true, true, true, false, true, true, false, CrawlProfile.CacheStrategy.CACHEONLY);
this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE), -1, -1, true, true, true, true, true, false, true, true, false, CrawlProfile.CacheStrategy.IFEXIST);
}
this.defaultTextSnippetGlobalProfile.setCacheStrategy(CacheStrategy.IFEXIST);
if (this.defaultMediaSnippetLocalProfile == null) {
// generate new default entry for snippet fetch and optional crawling
defaultMediaSnippetLocalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0,
Expand Down
5 changes: 1 addition & 4 deletions source/de/anomic/search/TextSnippet.java
Expand Up @@ -352,10 +352,7 @@ public static TextSnippet retrieveTextSnippet(final LoaderDispatcher loader, fin
// trying to load the resource from the cache
resContent = Cache.getContent(url);
responseHeader = Cache.getResponseHeader(url);
if (resContent != null && !fetchOnline && resContent.length > maxDocLen) {
// content may be too large to be parsed here. To be fast, we omit calculation of snippet here
return new TextSnippet(url, null, ERROR_SOURCE_LOADING, queryhashes, "resource available, but too large: " + resContent.length + " bytes");
} else if (fetchOnline) {
if ((resContent == null || responseHeader == null) && fetchOnline) {
// if not found try to download it

// download resource using the crawler and keep resource in memory if possible
Expand Down
2 changes: 1 addition & 1 deletion source/net/yacy/repository/LoaderDispatcher.java
Expand Up @@ -233,7 +233,7 @@ public Response load(final Request request, CrawlProfile.CacheStrategy cacheStra
// check case where we want results from the cache exclusively, and never from the internet (offline mode)
if (cacheStrategy == CrawlProfile.CacheStrategy.CACHEONLY) {
// we had a chance to get the content from the cache .. its over. We don't have it.
return null;
throw new IOException("cache only strategy");
}

// now forget about the cache, nothing there. Try to load the content from the internet
Expand Down

0 comments on commit 73f03e0

Please sign in to comment.