Skip to content

Commit

Permalink
more generic cache methods
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2721 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Oct 9, 2006
1 parent 72482b1 commit 0f10bdd
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 29 deletions.
1 change: 0 additions & 1 deletion htroot/CacheAdmin_p.java
Expand Up @@ -57,7 +57,6 @@
import de.anomic.htmlFilter.htmlFilterWriter;
import de.anomic.http.httpHeader;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaHTCache;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.cache.IResourceInfo;
Expand Down
58 changes: 43 additions & 15 deletions source/de/anomic/plasma/plasmaHTCache.java
Expand Up @@ -103,17 +103,19 @@ public final class plasmaHTCache {
public final File cachePath;
public final serverLog log;
public static final HashSet filesInUse = new HashSet(); // can we delete this file
public final boolean useTreeStorage;
public String cacheLayout;
public boolean cacheMigration;

private ResourceInfoFactory objFactory;
private serverThread cacheScanThread;

public plasmaHTCache(File htCachePath, long maxCacheSize, int bufferkb, long preloadTime, boolean useTreeStorage) {
public plasmaHTCache(File htCachePath, long maxCacheSize, int bufferkb, long preloadTime, String cacheLayout, boolean cacheMigration) {
// this.switchboard = switchboard;

this.log = new serverLog("HTCACHE");
this.cachePath = htCachePath;
this.useTreeStorage = useTreeStorage;
this.cacheLayout = cacheLayout;
this.cacheMigration = cacheMigration;

// create the object factory
this.objFactory = new ResourceInfoFactory();
Expand Down Expand Up @@ -661,23 +663,41 @@ public File getCachePath(final URL url) {
if (port >= 0) {
fileName.append('!').append(port);
}
File FileTree = new File(this.cachePath, fileName.toString() + path);

// generate cache path according to storage method
if (cacheLayout.equals("tree")) {
File FileTree = treeFile(fileName, path);
if (cacheMigration) {
moveCachedObject(hashFile(fileName, extention, url), FileTree);
}
return FileTree;
}
if (cacheLayout.equals("hash")) {
File FileFlat = hashFile(fileName, extention, url);
if (cacheMigration) {
moveCachedObject(treeFile(fileName, path), FileFlat);
}
return FileFlat;
}
return null;
}

private File treeFile(StringBuffer fileName, String path) {
return new File(this.cachePath, fileName.toString() + path);
}

private File hashFile(StringBuffer fileName, String extention, URL url) {
String urlHash = indexURL.urlHash(url);
String hexHash = serverCodings.encodeHex(kelondroBase64Order.enhancedCoder.decode(urlHash));
fileName.append('/').append(hexHash.substring(0,2)).append('/').append(hexHash.substring(2,4)).append('/').append(hexHash);
StringBuffer f = new StringBuffer(18);
f.append('/').append(hexHash.substring(0,2)).append('/').append(hexHash.substring(2,4)).append('/').append(hexHash);
if (extention != null) {
fileName.append(extention);
}
File FileFlat = new File(this.cachePath, fileName.toString());
if (useTreeStorage) {
moveCachedObject(FileFlat, FileTree);
return FileTree;
} else {
moveCachedObject(FileTree, FileFlat);
return FileFlat;
}
return new File(this.cachePath, fileName.toString() + f);
}



/**
* This is a helper funktion that extracts the Hash from the filename
*/
Expand Down Expand Up @@ -922,7 +942,7 @@ public final class Entry {
private String name; // the name of the link, read as anchor from an <a>-tag
private String nomalizedURLHash;
private String nomalizedURLString;
private int status; // cache load/hit/stale etc status
//private int status; // cache load/hit/stale etc status
private Date lastModified;
private char doctype;
private String language;
Expand Down Expand Up @@ -1013,6 +1033,14 @@ public String urlHash() {
return this.nomalizedURLHash;
}

public Date lastModified() {
return this.lastModified;
}

public String language() {
return this.language;
}

public plasmaCrawlProfile.entry profile() {
return this.profile;
}
Expand Down
5 changes: 3 additions & 2 deletions source/de/anomic/plasma/plasmaSwitchboard.java
Expand Up @@ -449,8 +449,9 @@ ramEURL, getConfigBool("useFlexTableForEURL", true),
}
this.log.logInfo("HTCACHE Path = " + htCachePath.getAbsolutePath());
long maxCacheSize = 1024 * 1024 * Long.parseLong(getConfig("proxyCacheSize", "2")); // this is megabyte
boolean useTreeStorage = getConfigBool("proxyCacheTree", true);
this.cacheManager = new plasmaHTCache(htCachePath, maxCacheSize, ramHTTP, ramHTTP_time, useTreeStorage);
String cacheLayout = getConfig("proxyCacheLayout", "tree");
boolean cacheMigration = getConfigBool("proxyCacheMigration", true);
this.cacheManager = new plasmaHTCache(htCachePath, maxCacheSize, ramHTTP, ramHTTP_time, cacheLayout, cacheMigration);

// make parser
log.logConfig("Starting Parser");
Expand Down
19 changes: 8 additions & 11 deletions yacy.init
Expand Up @@ -145,19 +145,11 @@ messConfig = httpd.messages
# to enable that function, set proxy=true
proxy=true

# a path to the proxy's file cache.
# a path to the file cache, used for the internal proxy and as crawl buffer
# This will be used if the server is addressed as a proxy
proxyCache = DATA/HTCACHE

# the proxy's maximum disc cache size in megabytes
# there should be enough space for the browsing load of an internet caffee
# running at 56kbit/s modem speed (this time not unusual)
# during 3 days, 8 hours a day
# necessary space = 3 * 8 * 60 * 60 * 56 / 8 = 604800 KB = ca. 590 MB
# since 600 MB is not much these days (it's below one GB!)
# we recommend using that space
#proxyCacheSize = 600
#for testing:
# the maximum disc cache size for files in proxyCache in megabytes
proxyCacheSize = 200

# use the mostly direct mapping of URLs to Filenames
Expand All @@ -173,7 +165,12 @@ proxyCacheSize = 200
# files that are present under the previously used layout will be renamed
# to the new location and thus be accessible immediately. so an accumulated
# cache is still usable after the switch.
proxyCacheTree = true
# possible values are {tree, hash}
proxyCacheLayout = tree

# the migration flag shows, if the different layout shall be migrated from one to another
proxyCacheMigration = true


# the following mime-types are the whitelist for indexing
#
Expand Down

0 comments on commit 0f10bdd

Please sign in to comment.