Skip to content

Commit

Permalink
enhanced computation speed of many replaceAll string operations
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7107 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Sep 5, 2010
1 parent e8228fb commit 22047ff
Show file tree
Hide file tree
Showing 8 changed files with 251 additions and 22 deletions.
11 changes: 5 additions & 6 deletions source/de/anomic/crawler/RobotsEntry.java
Expand Up @@ -40,7 +40,6 @@

public class RobotsEntry {

public static final String ROBOTS_DB_PATH_SEPARATOR = ";";
public static final String HOST_NAME = "hostname";
public static final String ALLOW_PATH_LIST = "allow";
public static final String DISALLOW_PATH_LIST = "disallow";
Expand All @@ -64,7 +63,7 @@ public RobotsEntry(final String hostName, final Map<String, byte[]> mem) {
this.denyPathList = new LinkedList<String>();
final String csPl = new String(this.mem.get(DISALLOW_PATH_LIST));
if (csPl.length() > 0){
final String[] pathArray = csPl.split(ROBOTS_DB_PATH_SEPARATOR);
final String[] pathArray = csPl.split(RobotsTxt.ROBOTS_DB_PATH_SEPARATOR);
if ((pathArray != null)&&(pathArray.length > 0)) {
this.denyPathList.addAll(Arrays.asList(pathArray));
}
Expand All @@ -76,7 +75,7 @@ public RobotsEntry(final String hostName, final Map<String, byte[]> mem) {
this.allowPathList = new LinkedList<String>();
final String csPl = new String(this.mem.get(ALLOW_PATH_LIST));
if (csPl.length() > 0){
final String[] pathArray = csPl.split(ROBOTS_DB_PATH_SEPARATOR);
final String[] pathArray = csPl.split(RobotsTxt.ROBOTS_DB_PATH_SEPARATOR);
if ((pathArray != null)&&(pathArray.length > 0)) {
this.allowPathList.addAll(Arrays.asList(pathArray));
}
Expand Down Expand Up @@ -116,7 +115,7 @@ public RobotsEntry(
final StringBuilder pathListStr = new StringBuilder(allowPathList.size() * 30);
for (String element : allowPathList) {
pathListStr.append(element)
.append(ROBOTS_DB_PATH_SEPARATOR);
.append(RobotsTxt.ROBOTS_DB_PATH_SEPARATOR);
}
this.mem.put(ALLOW_PATH_LIST, pathListStr.substring(0,pathListStr.length()-1).getBytes());
}
Expand All @@ -127,7 +126,7 @@ public RobotsEntry(
final StringBuilder pathListStr = new StringBuilder(disallowPathList.size() * 30);
for (String element : disallowPathList) {
pathListStr.append(element)
.append(ROBOTS_DB_PATH_SEPARATOR);
.append(RobotsTxt.ROBOTS_DB_PATH_SEPARATOR);
}
this.mem.put(DISALLOW_PATH_LIST,pathListStr.substring(0, pathListStr.length()-1).getBytes());
}
Expand Down Expand Up @@ -197,7 +196,7 @@ public boolean isDisallowed(String path) {
// if the path is null or empty we set it to /
if ((path == null) || (path.length() == 0)) path = "/";
// escaping all occurences of ; because this char is used as special char in the Robots DB
else path = path.replaceAll(ROBOTS_DB_PATH_SEPARATOR,"%3B");
else path = RobotsTxt.ROBOTS_DB_PATH_SEPARATOR_MATCHER.matcher(path).replaceAll("%3B");

for (String element : this.denyPathList) {

Expand Down
2 changes: 2 additions & 0 deletions source/de/anomic/crawler/RobotsTxt.java
Expand Up @@ -32,6 +32,7 @@
import java.util.Date;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;

Expand All @@ -50,6 +51,7 @@ public class RobotsTxt {
private static Logger log = Logger.getLogger(RobotsTxt.class);

public static final String ROBOTS_DB_PATH_SEPARATOR = ";";
public static final Pattern ROBOTS_DB_PATH_SEPARATOR_MATCHER = Pattern.compile(ROBOTS_DB_PATH_SEPARATOR);

BEncodedHeap robotsTable;
private final ConcurrentHashMap<String, DomSync> syncObjects;
Expand Down
7 changes: 5 additions & 2 deletions source/de/anomic/crawler/robotsParser.java
Expand Up @@ -33,6 +33,7 @@
import java.io.InputStreamReader;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.regex.Pattern;

/*
* A class for Parsing robots.txt files.
Expand All @@ -55,6 +56,8 @@

public final class robotsParser {

private static final Pattern patternTab = Pattern.compile("\t");

public static final String ROBOTS_USER_AGENT = "User-agent:".toUpperCase();
public static final String ROBOTS_DISALLOW = "Disallow:".toUpperCase();
public static final String ROBOTS_ALLOW = "Allow:".toUpperCase();
Expand Down Expand Up @@ -109,7 +112,7 @@ private void parse(final BufferedReader reader) {
try {
lineparser: while ((line = reader.readLine()) != null) {
// replacing all tabs with spaces
line = line.replaceAll("\t"," ").trim();
line = patternTab.matcher(line).replaceAll(" ").trim();
lineUpper = line.toUpperCase();

// parse empty line
Expand Down Expand Up @@ -218,7 +221,7 @@ private void parse(final BufferedReader reader) {
}

// escaping all occurences of ; because this char is used as special char in the Robots DB
path = path.replaceAll(RobotsTxt.ROBOTS_DB_PATH_SEPARATOR,"%3B");
path = RobotsTxt.ROBOTS_DB_PATH_SEPARATOR_MATCHER.matcher(path).replaceAll("%3B");

// adding it to the pathlist
if (isDisallowRule) {
Expand Down
4 changes: 3 additions & 1 deletion source/de/anomic/data/URLAnalysis.java
Expand Up @@ -67,6 +67,8 @@

public class URLAnalysis {

private static final Pattern patternMinus = Pattern.compile("-");

/**
* processes to analyse URL lists
*/
Expand Down Expand Up @@ -99,7 +101,7 @@ public void run() {
try {
url = in.take();
if (url == poison) break;
update(url.getHost().replaceAll("-", "\\.").split("\\."));
update(patternMinus.matcher(url.getHost()).replaceAll("\\.").split("\\."));
update(p.matcher(url.getPath()).replaceAll("/").split("/"));
} catch (InterruptedException e) {
Log.logException(e);
Expand Down
29 changes: 19 additions & 10 deletions source/de/anomic/server/serverObjects.java
Expand Up @@ -51,6 +51,7 @@ This shall speed up usage when a slow internet connection is used (dial-up)
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;

import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.document.parser.html.CharacterCoding;
Expand All @@ -62,6 +63,14 @@ This shall speed up usage when a slow internet connection is used (dial-up)

public class serverObjects extends HashMap<String, String> implements Cloneable {

private final static Pattern patternNewline = Pattern.compile("\n");
private final static Pattern patternDoublequote = Pattern.compile("\"");
private final static Pattern patternSlash = Pattern.compile("/");
private final static Pattern patternB = Pattern.compile("\b");
private final static Pattern patternF = Pattern.compile("\f");
private final static Pattern patternR = Pattern.compile("\r");
private final static Pattern patternT = Pattern.compile("\t");

private static final long serialVersionUID = 1L;
private boolean localized = true;

Expand Down Expand Up @@ -164,15 +173,15 @@ public String put(final String key, final InetAddress value) {
* @param value a String that will be reencoded for JSON output.
* @return the modified String that was added to the map.
*/
public String putJSON(final String key, String value) {
value = value.replaceAll("\"", "'");
value = value.replaceAll("/", "\\/");
// value = value.replaceAll("\\", "\\\\");
value = value.replaceAll("\b", "\\b");
value = value.replaceAll("\f", "\\f");
value = value.replaceAll("\n", "\\r");
value = value.replaceAll("\r", "\\r");
value = value.replaceAll("\t", "\\t");
public String putJSON(final String key, String value) {
// value = value.replaceAll("\\", "\\\\");
value = patternDoublequote.matcher(value).replaceAll("'");
value = patternSlash.matcher(value).replaceAll("\\/");
value = patternB.matcher(value).replaceAll("\\b");
value = patternF.matcher(value).replaceAll("\\f");
value = patternNewline.matcher(value).replaceAll("\\r");
value = patternR.matcher(value).replaceAll("\\r");
value = patternT.matcher(value).replaceAll("\\t");
return put(key, value);
}
public String putJSON(final String key, final byte[] value) {
Expand Down Expand Up @@ -333,7 +342,7 @@ public void store(final File f) throws IOException {
String key, value;
for (Map.Entry<String, String> entry: entrySet()) {
key = entry.getKey();
value = entry.getValue().replaceAll("\n", "\\\\n");
value = patternNewline.matcher(entry.getValue()).replaceAll("\\\\n");
fos.write((key + "=" + value + "\r\n").getBytes());
}
} finally {
Expand Down
209 changes: 209 additions & 0 deletions source/net/yacy/ai/example/SchwarzerPeter.java
@@ -0,0 +1,209 @@
package net.yacy.ai.example;

import java.util.ArrayList;
import java.util.List;
import java.util.Random;

import net.yacy.ai.greedy.AbstractFinding;
import net.yacy.ai.greedy.AbstractModel;
import net.yacy.ai.greedy.Finding;
import net.yacy.ai.greedy.Model;
import net.yacy.ai.greedy.Role;

public class SchwarzerPeter {

public static enum Kartentyp {
A, B, C, D, E, F, G, H, P;
}

public static enum Kartenzaehler {
p, q;
}

public static class Karte {
private Kartentyp kartentyp;
private Kartenzaehler kartenzaehler;
public Karte(Kartentyp kartentyp, Kartenzaehler kartenzaehler) {
this.kartentyp = kartentyp; this.kartenzaehler = kartenzaehler;
}
public boolean equals(Object obj) {
return this.kartentyp == ((Karte) obj).kartentyp && this.kartenzaehler == ((Karte) obj).kartenzaehler;
}
public int hashCode() {
return this.kartentyp.hashCode() + 16 + this.kartenzaehler.hashCode();
}
public boolean istSchwarzerPeter() {
return this.kartentyp == Kartentyp.P;
}
public static boolean istPaar(Karte k1, Karte k2) {
return k1.kartentyp == k2.kartentyp;
}
}

public static final List<Karte> alleKarten;
static {
alleKarten = new ArrayList<Karte>(33);
for (Kartentyp typ: Kartentyp.values()) {
alleKarten.add(new Karte(typ, Kartenzaehler.p));
alleKarten.add(new Karte(typ, Kartenzaehler.q));
}
alleKarten.add(new Karte(Kartentyp.P, Kartenzaehler.p));
}

public static final List<Karte> neuerStapel(Random r) {
List<Karte> stapel0 = new ArrayList<Karte>();
for (Karte karte: alleKarten) stapel0.add(karte);
List<Karte> stapel1 = new ArrayList<Karte>();
while (stapel0.size() > 0) stapel1.add(stapel0.remove(r.nextInt(stapel0.size())));
return stapel1;
}

public static class Spieler implements Role {

private int spielernummer;
private int spieleranzahl;

public Spieler(int spielernummer, int spieleranzahl) {
this.spielernummer = spielernummer;
this.spieleranzahl = spieleranzahl;
}

@Override
public Spieler nextRole() {
int n = (this.spielernummer == this.spieleranzahl - 1) ? 0 : this.spielernummer + 1;
return new Spieler(n, this.spieleranzahl);
}
public Spieler linkerNachbar() {
int n = (this.spielernummer == 0) ? this.spieleranzahl - 1 : this.spielernummer - 1;
return new Spieler(n, this.spieleranzahl);
}

public boolean equals(Object obj) {
return this.spielernummer == ((Spieler) obj).spielernummer;
}

public int hashCode() {
return this.spielernummer;
}
}

public static enum Strategy {
nichtsortieren_linksziehen,
nichtsortieren_zufallsziehen,
sortieren_linksziehen,
sortieren_zufallsziehen;
}

public static class Hand extends ArrayList<Karte> {
private static final long serialVersionUID = -5274023237476645059L;
private Strategy strategy;
public Hand(Strategy strategy) {
this.strategy = strategy;
}
public void annehmen(Random r, Karte karte) {
if (this.strategy == Strategy.nichtsortieren_linksziehen || this.strategy == Strategy.nichtsortieren_zufallsziehen) {
this.add(this.set(r.nextInt(this.size()), karte));
} else {
this.add(karte);
}
}
public Karte abgeben(Random r) {
if (this.strategy == Strategy.nichtsortieren_linksziehen || this.strategy == Strategy.sortieren_linksziehen) {
return this.remove(0);
} else {
return this.remove(r.nextInt(this.size()));
}
}
public boolean paerchenAblegen() {
return true;
}
}

public static class Zug extends AbstractFinding<Spieler> implements Finding<Spieler> {

public Zug(Spieler spieler, int priority) {
super(spieler, priority);
}

@Override
public Object clone() {
return this;
}

@Override
public boolean equals(Object other) {
return true;
}

@Override
public int hashCode() {
return 0;
}

}

public static class Spiel extends AbstractModel<Spieler, Zug> implements Model<Spieler, Zug>, Cloneable {

private Hand[] haende;
private Random random;

public Spiel(Spieler spieler, Random r) {
super(spieler);
this.random = r;
haende = new Hand[spieler.spieleranzahl];
for (int i = 0; i < spieler.spieleranzahl; i++) haende[i] = new Hand(Strategy.nichtsortieren_linksziehen);
List<Karte> geben = neuerStapel(r);
while (geben.size() > 0) {
haende[spieler.spielernummer].annehmen(r, geben.remove(0));
spieler = spieler.nextRole();
}
}

@Override
public List<Zug> explore() {
return new ArrayList<Zug>(0);
}

@Override
public void applyFinding(Zug finding) {
haende[this.currentRole().spielernummer].annehmen(random, this.haende[this.currentRole().linkerNachbar().spielernummer].abgeben(random));

}

@Override
public int getRanking(int findings, Spieler role) {
// TODO Auto-generated method stub
return 0;
}

@Override
public boolean isTermination(Spieler role) {
// TODO Auto-generated method stub
return false;
}

@Override
public Spieler isTermination() {
// TODO Auto-generated method stub
return null;
}

@Override
public Object clone() {
// TODO Auto-generated method stub
return null;
}

@Override
public boolean equals(Object other) {
// TODO Auto-generated method stub
return false;
}

@Override
public int hashCode() {
// TODO Auto-generated method stub
return 0;
}
}
}

0 comments on commit 22047ff

Please sign in to comment.