Skip to content

Commit

Permalink
*) Better snippets: words like GNU/Linux will not prevent Linux or GN…
Browse files Browse the repository at this point in the history
…U from being marked if they are searchword (see http://www.yacy-forum.de/viewtopic.php?t=2891)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3068 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
low012 committed Dec 10, 2006
1 parent 644d8b8 commit 586add4
Showing 1 changed file with 22 additions and 3 deletions.
25 changes: 22 additions & 3 deletions source/de/anomic/plasma/plasmaSnippetCache.java
Expand Up @@ -52,6 +52,9 @@
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.util.regex.PatternSyntaxException;
import java.util.Set;
import java.util.TreeMap;

Expand Down Expand Up @@ -148,19 +151,35 @@ public String getLineMarked(Set queryHashes) {
prefix = "";
postfix = "";

while((w[j].matches("\\A[^\\p{L}\\p{N}].+"))) {
//cut off prefix if it contains of non-characters or non-numbers
while(w[j].matches("\\A[^\\p{L}\\p{N}].+")) {
prefix = w[j].substring(0,1) + prefix;
w[j] = w[j].substring(1);
}

while((w[j].matches(".+[^\\p{L}\\p{N}]\\Z"))) {
//cut off postfix if it contains of non-characters or non-numbers
while(w[j].matches(".+[^\\p{L}\\p{N}]\\Z")) {
len = w[j].length();
postfix = w[j].substring(len-1,len) + postfix;
w[j] = w[j].substring(0,len-1);
}

//recursion if there are non-characters or non-numbers in the middle of the string
Pattern p = Pattern.compile("\\A([\\p{L}\\p{N}]+)([^\\p{L}\\p{N}])([\\p{L}\\p{N}]+)\\Z");
Matcher m = p.matcher(w[j]);
if(m.find()) {
String left = m.group(1);
String pattern = m.group(2);
String right = m.group(3);
Snippet snip = new Snippet(left,-1,null);
w[j] = snip.getLineMarked(queryHashes);
w[j] = w[j] + pattern;
snip = new Snippet(right,-1,null);
w[j] = w[j] + snip.getLineMarked(queryHashes);
}

//end contrib [MN]
if (plasmaCondenser.word2hash(w[j]).equals(h)) w[j] = "<b>" + w[j] + "</b>";
else if (plasmaCondenser.word2hash(w[j]).equals(h)) w[j] = "<b>" + w[j] + "</b>";
w[j] = prefix + w[j] + postfix;
}
}
Expand Down

0 comments on commit 586add4

Please sign in to comment.