Skip to content

Commit

Permalink
enhanced tagging preparation speed which reduces initialization time for
Browse files Browse the repository at this point in the history
very large vocabularies
  • Loading branch information
Orbiter committed Dec 13, 2014
1 parent 6a1865f commit aa80cb1
Showing 1 changed file with 15 additions and 11 deletions.
26 changes: 15 additions & 11 deletions source/net/yacy/cora/lod/vocabulary/Tagging.java
Expand Up @@ -480,14 +480,21 @@ public String getNamespace() {
public String getObjectspace() {
return this.objectspace;
}


private final static Pattern PATTERN_SPACEPLUS = Pattern.compile(" \\+");
private final static Pattern PATTERN_SPACESLASH= Pattern.compile(" /");
private final static Pattern PATTERN_PLUS = Pattern.compile("\\+");
private final static Pattern PATTERN_SLASH = Pattern.compile("/");
private final static Pattern PATTERN_SPACESPACE = Pattern.compile(" ");

private final String normalizeKey(String k) {
k = k.trim();
k = k.replaceAll(" \\+", ", "); // remove symbols that are bad in a query attribute
k = k.replaceAll(" /", ", ");
k = k.replaceAll("\\+", ",");
k = k.replaceAll("/", ",");
k = k.replaceAll(" ", " ");
// remove symbols that are bad in a query attribute
k = PATTERN_SPACEPLUS.matcher(k).replaceAll(", ");
k = PATTERN_SPACESLASH.matcher(k).replaceAll(", ");
k = PATTERN_PLUS.matcher(k).replaceAll(",");
k = PATTERN_SLASH.matcher(k).replaceAll(",");
k = PATTERN_SPACESPACE.matcher(k).replaceAll(" ");
return k;
}

Expand Down Expand Up @@ -537,18 +544,15 @@ public String toString() {
private final static Pattern PATTERN_OE = Pattern.compile("\u00F6");
private final static Pattern PATTERN_UE = Pattern.compile("\u00FC");
private final static Pattern PATTERN_SZ = Pattern.compile("\u00DF");
private final static Pattern PATTERN_COMMA = Pattern.compile(",");

public static final String normalizeTerm(String term) {
term = term.trim().toLowerCase();
term = PATTERN_AE.matcher(term).replaceAll("ae");
term = PATTERN_OE.matcher(term).replaceAll("oe");
term = PATTERN_UE.matcher(term).replaceAll("ue");
term = PATTERN_SZ.matcher(term).replaceAll("ss");
// remove comma
int p;
while ((p = term.indexOf(',')) >= 0) {
term = term.substring(p + 1).trim() + " " + term.substring(0, p);
}
term = PATTERN_COMMA.matcher(term).replaceAll(" ");
return term;
}

Expand Down

0 comments on commit aa80cb1

Please sign in to comment.