Skip to content

Commit

Permalink
performance update
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5653 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Feb 25, 2009
1 parent d884c47 commit 5462536
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 37 deletions.
7 changes: 3 additions & 4 deletions source/de/anomic/data/URLAnalysis.java
Expand Up @@ -57,9 +57,7 @@ public class URLAnalysis {
/**
* processes to analyse URL lists
*/

private static final long cleanuplimit = 50 * 1024 * 1024;


public static yacyURL poison = null;
static {
try {
Expand Down Expand Up @@ -132,7 +130,8 @@ public static void genstat(String urlfile) {

boolean gz = urlfile.endsWith(".gz");
String analysis = (gz) ? urlfile.substring(0, urlfile.length() - 3) + ".stats.gz" : urlfile + ".stats";

long cleanuplimit = Math.max(50 * 1024 * 1024, MemoryControl.available() / 8);

// start threads
ArrayBlockingQueue<yacyURL> in = new ArrayBlockingQueue<yacyURL>(1000);
ConcurrentHashMap<String, Integer> out = new ConcurrentHashMap<String, Integer>();
Expand Down
70 changes: 37 additions & 33 deletions source/de/anomic/yacy/yacyURL.java
Expand Up @@ -48,6 +48,9 @@ public class yacyURL implements Serializable {
private static final long serialVersionUID = -1173233022912141884L;
public static final int TLD_any_zone_filter = 255; // from TLD zones can be filtered during search; this is the catch-all filter
private static final Pattern backPathPattern = Pattern.compile("(/[^/]+(?<!/\\.{1,2})/)[.]{2}(?=/|$)|/\\.(?=/)|/(?=/)");
private static final Pattern patternDot = Pattern.compile("\\.");
private static final Pattern patternSlash = Pattern.compile("/");
private static final Pattern patternAmp = Pattern.compile("&");

// class variables
private String protocol, host, userInfo, path, quest, ref, hash;
Expand Down Expand Up @@ -125,7 +128,7 @@ private void parseURLString(String url) throws MalformedURLException {

// handle international domains
if (!Punycode.isBasic(host)) try {
final String[] domainParts = host.split("\\.");
final String[] domainParts = patternDot.split(host, 0);
StringBuilder buffer = new StringBuilder();
// encode each domainpart seperately
for(int i=0; i<domainParts.length; i++) {
Expand Down Expand Up @@ -273,8 +276,8 @@ private void escape() {
}

private void escapePath() {
final String[] pathp = path.split("/", -1);
StringBuilder ptmp = new StringBuilder(pathp.length + 10);
final String[] pathp = patternSlash.split(path, 0);
StringBuilder ptmp = new StringBuilder(path.length() + 10);
for (int i = 0; i < pathp.length; i++) {
ptmp.append('/');
ptmp.append(escape(pathp[i]));
Expand All @@ -287,8 +290,8 @@ private void escapeRef() {
}

private void escapeQuest() {
final String[] questp = quest.split("&", -1);
StringBuilder qtmp = new StringBuilder(questp.length + 10);
final String[] questp = patternAmp.split(quest, 0);
StringBuilder qtmp = new StringBuilder(quest.length() + 10);
for (int i = 0; i < questp.length; i++) {
if (questp[i].indexOf('=') != -1) {
qtmp.append('&');
Expand Down Expand Up @@ -362,10 +365,9 @@ private void escapeQuest() {
* @return The encoded string
*/
// from: http://www.w3.org/International/URLUTF8Encoder.java
public static StringBuilder escape(final String s)
{
final StringBuilder sbuf = new StringBuilder(s.length() + 10);
public static StringBuilder escape(final String s) {
final int len = s.length();
final StringBuilder sbuf = new StringBuilder(len + 10);
for (int i = 0; i < len; i++) {
final int ch = s.charAt(i);
if ('A' <= ch && ch <= 'Z') { // 'A'..'Z'
Expand Down Expand Up @@ -704,7 +706,7 @@ private final String urlHashComputation() {
// find rootpath
int rootpathStart = 0;
int rootpathEnd = this.path.length() - 1;
if (this.path.startsWith("/"))
if (this.path.length() > 0 && this.path.charAt(0) == '/')
rootpathStart = 1;
if (this.path.endsWith("/"))
rootpathEnd = this.path.length() - 2;
Expand Down Expand Up @@ -869,31 +871,33 @@ public String language() {

public static void main(final String[] args) {
final String[][] test = new String[][]{
new String[]{null, "http://www.anomic.de/home/test?x=1#home"},
new String[]{null, "http://www.anomic.de"},
new String[]{null, "http://www.anomic.de/"},
new String[]{null, "http://www.anomic.de/home/test?x=1#home"},
new String[]{null, "http://www.anomic.de/home/test?x=1"},
new String[]{null, "http://www.anomic.de/home/test#home"},
new String[]{null, "ftp://ftp.anomic.de/home/test#home"},
new String[]{null, "http://www.anomic.de/home/../abc/"},
new String[]{null, "mailto:abcdefg@nomailnomail.com"},
new String[]{"http://www.anomic.de/home", "test"},
new String[]{"http://www.anomic.de/home", "test/"},
new String[]{"http://www.anomic.de/home/", "test"},
new String[]{"http://www.anomic.de/home/", "test/"},
new String[]{"http://www.anomic.de/home/index.html", "test.htm"},
new String[]{"http://www.anomic.de/home/index.html", "http://www.yacy.net/test"},
new String[]{"http://www.anomic.de/home/index.html", "ftp://ftp.yacy.net/test"},
new String[]{"http://www.anomic.de/home/index.html", "../test"},
new String[]{"http://www.anomic.de/home/index.html", "mailto:abcdefg@nomailnomail.com"},
new String[]{null, "news:de.test"},
new String[]{"http://www.anomic.de/home", "news:de.test"},
new String[]{"http://www.anomic.de/home", "ftp://ftp.anomic.de/src"},
new String[]{null, "ftp://ftp.delegate.org/"},
new String[]{"http://www.anomic.de/home", "ftp://ftp.delegate.org/"},
new String[]{"http://www.anomic.de","mailto:yacy@weltherrschaft.org"},
new String[]{"http://www.anomic.de","javascipt:temp"},
new String[]{null,"http://yacy-websuche.de/wiki/index.php?title=De:IntroInformationFreedom&action=history"},
new String[]{null, "http://diskusjion.no/index.php?s=5bad5f431a106d9a8355429b81bb0ca5&showuser=23585"},
new String[]{null, "http://diskusjion.no/index.php?s=5bad5f431a106d9a8355429b81bb0ca5&amp;showuser=23585"}
new String[]{null, "http://www.anomic.de/home/test#home"},
new String[]{null, "ftp://ftp.anomic.de/home/test#home"},
new String[]{null, "http://www.anomic.de/home/../abc/"},
new String[]{null, "mailto:abcdefg@nomailnomail.com"},
new String[]{"http://www.anomic.de/home", "test"},
new String[]{"http://www.anomic.de/home", "test/"},
new String[]{"http://www.anomic.de/home/", "test"},
new String[]{"http://www.anomic.de/home/", "test/"},
new String[]{"http://www.anomic.de/home/index.html", "test.htm"},
new String[]{"http://www.anomic.de/home/index.html", "http://www.yacy.net/test"},
new String[]{"http://www.anomic.de/home/index.html", "ftp://ftp.yacy.net/test"},
new String[]{"http://www.anomic.de/home/index.html", "../test"},
new String[]{"http://www.anomic.de/home/index.html", "mailto:abcdefg@nomailnomail.com"},
new String[]{null, "news:de.test"},
new String[]{"http://www.anomic.de/home", "news:de.test"},
new String[]{"http://www.anomic.de/home", "ftp://ftp.anomic.de/src"},
new String[]{null, "ftp://ftp.delegate.org/"},
new String[]{"http://www.anomic.de/home", "ftp://ftp.delegate.org/"},
new String[]{"http://www.anomic.de","mailto:yacy@weltherrschaft.org"},
new String[]{"http://www.anomic.de","javascipt:temp"},
new String[]{null,"http://yacy-websuche.de/wiki/index.php?title=De:IntroInformationFreedom&action=history"},
new String[]{null, "http://diskusjion.no/index.php?s=5bad5f431a106d9a8355429b81bb0ca5&showuser=23585"},
new String[]{null, "http://diskusjion.no/index.php?s=5bad5f431a106d9a8355429b81bb0ca5&amp;showuser=23585"}
};
String environment, url;
yacyURL aURL, aURL1;
Expand Down

0 comments on commit 5462536

Please sign in to comment.