Skip to content

Commit

Permalink
- added a media search for images, audio, video and applications
Browse files Browse the repository at this point in the history
- new search options on search page
- new option in ViewInfo to display all links of a file
- enhanced collection data structure

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3054 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Dec 7, 2006
1 parent 25a64fe commit 10d888e
Show file tree
Hide file tree
Showing 19 changed files with 246 additions and 119 deletions.
2 changes: 1 addition & 1 deletion build.properties
Expand Up @@ -3,7 +3,7 @@ javacSource=1.4
javacTarget=1.4

# Release Configuration
releaseVersion=0.491
releaseVersion=0.492
releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
#releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}
Expand Down
16 changes: 15 additions & 1 deletion htroot/ViewFile.html
Expand Up @@ -42,7 +42,8 @@ <h2>View URL Content</h2>
<a href="?urlHash=#[hash]#&viewMode=iframe&words=#[words]#">Original</a> |
<a href="?urlHash=#[hash]#&viewMode=plain&words=#[words]#">Plain Text</a> |
<a href="?urlHash=#[hash]#&viewMode=parsed&words=#[words]#">Parsed Text</a> |
<a href="?urlHash=#[hash]#&viewMode=sentences&words=#[words]#">Parsed Sentences</a>
<a href="?urlHash=#[hash]#&viewMode=sentences&words=#[words]#">Parsed Sentences</a> |
<a href="?urlHash=#[hash]#&viewMode=links&words=#[words]#">Link List</a>
</td>
</tr>
</table>
Expand Down Expand Up @@ -85,6 +86,19 @@ <h3>Parsed Resource Sentences</h3><br>
<h3>Original Resource Content</h3><br>
<iframe src="#[url]#" width="800" height="400">
</iframe>
:: <!-- 5 -->
<h3>Link List</h3><br>
<table border="0" cellpadding="2" cellspacing="1">
#{links}#
<tr class="TableCell#(dark)#Light::Dark::Summary#(/dark)#">
<td>#[nr]#</td>
<td><tt>#[type]#</tt></td>
<td><tt>#[text]#</tt></td>
<td><tt>#[link]#</tt></td>
<td><tt>#[attr]#</tt></td>
</tr>
#{/links}#
</table>
#(/viewMode)#
</p>

Expand Down
108 changes: 81 additions & 27 deletions htroot/ViewFile.java
Expand Up @@ -51,8 +51,12 @@
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeSet;

import de.anomic.data.wikiCode;
import de.anomic.htmlFilter.htmlFilterImageEntry;
import de.anomic.http.httpHeader;
import de.anomic.http.httpc;
import de.anomic.index.indexURLEntry;
Expand All @@ -75,6 +79,7 @@ public class ViewFile {
public static final int VIEW_MODE_AS_PARSED_TEXT = 2;
public static final int VIEW_MODE_AS_PARSED_SENTENCES = 3;
public static final int VIEW_MODE_AS_IFRAME = 4;
public static final int VIEW_MODE_AS_LINKLIST = 5;

public static final String[] highlightingColors = new String[] {
"255,255,100",
Expand Down Expand Up @@ -271,7 +276,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
} else if (viewMode.equals("iframe")) {
prop.put("viewMode", VIEW_MODE_AS_IFRAME);
prop.put("viewMode_url", url.toNormalform());
} else if (viewMode.equals("parsed") || viewMode.equals("sentences")) {
} else if (viewMode.equals("parsed") || viewMode.equals("sentences") || viewMode.equals("links")) {
// parsing the resource content
plasmaParserDocument document = null;
try {
Expand Down Expand Up @@ -305,45 +310,52 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve

prop.put("viewMode", VIEW_MODE_AS_PARSED_TEXT);
prop.put("viewMode_parsedText", content);
} else {
} else if (viewMode.equals("sentences")) {
prop.put("viewMode", VIEW_MODE_AS_PARSED_SENTENCES);
final Enumeration sentences = document.getSentences(pre);

boolean dark = true;
int i = 0;
if (sentences != null)
if (sentences != null) {
String[] wordArray = wordArray(post.get("words", null));

// Search word highlighting
while (sentences.hasMoreElements()) {
String currentSentence = wikiCode.replaceHTML((String) sentences.nextElement());

// Search word highlighting
String words = post.get("words", null);
if (words != null) {
try {
words = URLDecoder.decode(words, "UTF-8");
} catch (UnsupportedEncodingException e) {
}

String[] wordArray = words.substring(1,
words.length() - 1).split(",");
for (int j = 0; j < wordArray.length; j++) {
String currentWord = wordArray[j].trim();
currentSentence = currentSentence.replaceAll(
currentWord,
"<b style=\"color: black; background-color: rgb("
+ highlightingColors[j % 6]
+ ");\">" + currentWord
+ "</b>");
}
}

prop.put("viewMode_sentences_" + i + "_nr", Integer.toString(i + 1));
prop.put("viewMode_sentences_" + i + "_text", currentSentence);
prop.put("viewMode_sentences_" + i + "_text", markup(wordArray, (String) sentences.nextElement()));
prop.put("viewMode_sentences_" + i + "_dark", ((dark) ? 1 : 0));
dark = !dark;
i++;
}
}
prop.put("viewMode_sentences", i);

} else if (viewMode.equals("links")) {
prop.put("viewMode", VIEW_MODE_AS_LINKLIST);
String[] wordArray = wordArray(post.get("words", null));
boolean dark = true;
int i = 0;
i += putMediaInfo(prop, wordArray, i, document.getVideolinks(), "video", (i % 2 == 0));
i += putMediaInfo(prop, wordArray, i, document.getAudiolinks(), "audio", (i % 2 == 0));
i += putMediaInfo(prop, wordArray, i, document.getApplinks(), "app", (i % 2 == 0));
dark = (i % 2 == 0);

TreeSet ts = document.getImages();
Iterator tsi = ts.iterator();
htmlFilterImageEntry entry;
while (tsi.hasNext()) {
entry = (htmlFilterImageEntry) tsi.next();
prop.put("viewMode_links_" + i + "_nr", i);
prop.put("viewMode_links_" + i + "_dark", ((dark) ? 1 : 0));
prop.put("viewMode_links_" + i + "_type", "image");
prop.put("viewMode_links_" + i + "_text", markup(wordArray, entry.alt()));
prop.put("viewMode_links_" + i + "_link", "<a href=\"" + (String) entry.url().toNormalform() + "\">" + markup(wordArray, (String) entry.url().toNormalform()) + "</a>");
prop.put("viewMode_links_" + i + "_attr", entry.width() + "&nbsp;x&nbsp;" + entry.height());
dark = !dark;
i++;
}
prop.put("viewMode_links", i);

}
if (document != null) document.close();
}
Expand All @@ -358,4 +370,46 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
return prop;
}

private static final String[] wordArray(String words) {
String[] w = null;
if (words != null) try {
words = URLDecoder.decode(words, "UTF-8");
w = words.substring(1, words.length() - 1).split(",");
if (w.length == 0) return null;
} catch (UnsupportedEncodingException e) {}
return w;
}

private static final String markup(String[] wordArray, String message) {
message = wikiCode.replaceHTML(message);
if (wordArray != null) for (int j = 0; j < wordArray.length; j++) {
String currentWord = wordArray[j].trim();
message = message.replaceAll(currentWord,
"<b style=\"color: black; background-color: rgb("
+ highlightingColors[j % 6]
+ ");\">" + currentWord
+ "</b>");
}
return message;
}

private static int putMediaInfo(serverObjects prop, String[] wordArray, int c, Map media, String name, boolean dark) {
Iterator mi = media.entrySet().iterator();
Map.Entry entry;
int i = 0;
while (mi.hasNext()) {
entry = (Map.Entry) mi.next();
prop.put("viewMode_links_" + c + "_nr", c);
prop.put("viewMode_links_" + c + "_dark", ((dark) ? 1 : 0));
prop.put("viewMode_links_" + c + "_type", name);
prop.put("viewMode_links_" + c + "_text", markup(wordArray, (String) entry.getValue()));
prop.put("viewMode_links_" + c + "_link", "<a href=\"" + (String) entry.getKey() + "\">" + markup(wordArray, (String) entry.getKey()) + "</a>");
prop.put("viewMode_links_" + c + "_attr", "");
dark = !dark;
c++;
i++;
}
return i;
}

}
9 changes: 7 additions & 2 deletions htroot/index.html
Expand Up @@ -21,8 +21,13 @@ <h2 class="yacy">
<input type="hidden" name="display" value="#[display]#" />
<input name="search" type="text" size="52" maxlength="80" value="#[former]#" />
<input type="submit" name="Enter" value="Search" />
<input type="hidden" name="former" value="#[former]#" />
#(searchoptions)#
<input type="hidden" name="former" value="#[former]#" /><br />
<input type="radio" name="contentdom" value="text" #(contentdomCheckText)#::checked="checked"#(/contentdomCheckText)# />Text&nbsp;&nbsp;
<input type="radio" name="contentdom" value="image" #(contentdomCheckImage)#::checked="checked"#(/contentdomCheckImage)# />Images&nbsp;&nbsp;
<input type="radio" name="contentdom" value="audio" #(contentdomCheckAudio)#::checked="checked"#(/contentdomCheckAudio)# />Audio&nbsp;&nbsp;
<input type="radio" name="contentdom" value="video" #(contentdomCheckVideo)#::checked="checked"#(/contentdomCheckVideo)# />Video&nbsp;&nbsp;
<input type="radio" name="contentdom" value="app" #(contentdomCheckApp)#::checked="checked"#(/contentdomCheckApp)# />Applications&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
#(searchoptions)#
<input type="hidden" name="count" value="10" />
<input type="hidden" name="order" value="Date-YBR-Quality" />
<input type="hidden" name="resource" value="global" />
Expand Down
17 changes: 15 additions & 2 deletions htroot/index.java
Expand Up @@ -83,6 +83,15 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
}
}

// search domain
int contentdom = plasmaSearchQuery.CONTENTDOM_TEXT;
String cds = (post == null) ? "text" : post.get("contentdom", "text");
if (cds.equals("text")) contentdom = plasmaSearchQuery.CONTENTDOM_TEXT;
if (cds.equals("audio")) contentdom = plasmaSearchQuery.CONTENTDOM_AUDIO;
if (cds.equals("video")) contentdom = plasmaSearchQuery.CONTENTDOM_VIDEO;
if (cds.equals("image")) contentdom = plasmaSearchQuery.CONTENTDOM_IMAGE;
if (cds.equals("app")) contentdom = plasmaSearchQuery.CONTENTDOM_APP;

// we create empty entries for template strings
String promoteSearchPageGreeting = env.getConfig("promoteSearchPageGreeting", "");
if (promoteSearchPageGreeting.length() == 0) promoteSearchPageGreeting = "P2P WEB SEARCH";
Expand Down Expand Up @@ -123,8 +132,12 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
prop.put("display", display);
prop.put("constraint", constraint);
prop.put("searchoptions_display", display);


prop.put("contentdomCheckText", (contentdom == plasmaSearchQuery.CONTENTDOM_TEXT) ? 1 : 0);
prop.put("contentdomCheckAudio", (contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) ? 1 : 0);
prop.put("contentdomCheckVideo", (contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) ? 1 : 0);
prop.put("contentdomCheckImage", (contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) ? 1 : 0);
prop.put("contentdomCheckApp", (contentdom == plasmaSearchQuery.CONTENTDOM_APP) ? 1 : 0);

return prop;
}

Expand Down
20 changes: 14 additions & 6 deletions htroot/yacysearch.html
Expand Up @@ -22,12 +22,20 @@
<p class="yacylogo"><a href="http://yacy.net/yacy/" class="yacylogo"><img src="/env/grafics/yacy.png" alt="yacy" /></a></p>
<h2>#[promoteSearchPageGreeting]#</h2>
<fieldset class="maininput">
<table width="100%" border="0" cellspacing="0" cellpadding="0"><tr><td width="80%">
<input name="search" type="text" size="50" maxlength="80" value="#[former]#" onClick='document.searchform.Enter.value = "Search"' />
<input type="submit" name="Enter" value="Search" />
</td><td width="20%">
<a href="index.html?display=#[display]#&searchoptions=1&count=#[count]#&order=#[order]#&resource=#[resource]#&time=#[time]#&urlmaskfilter=#[urlmaskfilter]#&prefermaskfilter=#[prefermaskfilter]#&cat=#[cat]#&type=#[type]#&constraint=#[constraint]#&former=#[former]#">more options<a>
</td></tr>
<table width="100%" border="0" cellspacing="0" cellpadding="0">
<tr><td width="80%">
<input name="search" type="text" size="50" maxlength="80" value="#[former]#" onClick='document.searchform.Enter.value = "Search"' />
<input type="submit" name="Enter" value="Search" />
</td><td width="20%">
<a href="index.html?display=#[display]#&searchoptions=1&count=#[count]#&order=#[order]#&resource=#[resource]#&time=#[time]#&urlmaskfilter=#[urlmaskfilter]#&prefermaskfilter=#[prefermaskfilter]#&cat=#[cat]#&type=#[type]#&constraint=#[constraint]#&contentdom=#[contentdom]#&former=#[former]#">more options<a>
</td></tr>
<tr><td width="100%" colspan="2">
<input type="radio" name="contentdom" value="text" #(contentdomCheckText)#::checked="checked"#(/contentdomCheckText)# />Text&nbsp;&nbsp;
<input type="radio" name="contentdom" value="image" #(contentdomCheckImage)#::checked="checked"#(/contentdomCheckImage)# />Images&nbsp;&nbsp;
<input type="radio" name="contentdom" value="audio" #(contentdomCheckAudio)#::checked="checked"#(/contentdomCheckAudio)# />Audio&nbsp;&nbsp;
<input type="radio" name="contentdom" value="video" #(contentdomCheckVideo)#::checked="checked"#(/contentdomCheckVideo)# />Video&nbsp;&nbsp;
<input type="radio" name="contentdom" value="app" #(contentdomCheckApp)#::checked="checked"#(/contentdomCheckApp)# />Applications
</td></tr>
</table>
<input type="hidden" name="former" value="#[former]#" />
<input type="hidden" name="count" value="#[count]#" />
Expand Down
26 changes: 23 additions & 3 deletions htroot/yacysearch.java
Expand Up @@ -126,6 +126,12 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
prop.put("type_resultbottomline", 0);
prop.put("type_results", "");
prop.put("display", display);
prop.put("contentdom", "text");
prop.put("contentdomCheckText", 1);
prop.put("contentdomCheckAudio", 0);
prop.put("contentdomCheckVideo", 0);
prop.put("contentdomCheckImage", 0);
prop.put("contentdomCheckApp", 0);
return prop;
}

Expand Down Expand Up @@ -163,8 +169,16 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
final boolean indexReceiveGranted = sb.getConfig("allowReceiveIndex", "true").equals("true");
if (!indexDistributeGranted || !indexReceiveGranted) { global = false; }

// find search domain
int contentdom = plasmaSearchQuery.CONTENTDOM_TEXT;
String cds = post.get("contentdom", "text");
if (cds.equals("text")) contentdom = plasmaSearchQuery.CONTENTDOM_TEXT;
if (cds.equals("audio")) contentdom = plasmaSearchQuery.CONTENTDOM_AUDIO;
if (cds.equals("video")) contentdom = plasmaSearchQuery.CONTENTDOM_VIDEO;
if (cds.equals("image")) contentdom = plasmaSearchQuery.CONTENTDOM_IMAGE;
if (cds.equals("app")) contentdom = plasmaSearchQuery.CONTENTDOM_APP;

serverObjects prop = new serverObjects();

if (post.get("cat", "href").equals("href")) {

final TreeSet query = plasmaSearchQuery.cleanQuery(querystring);
Expand Down Expand Up @@ -234,13 +248,13 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
if (order.endsWith("YBR")) order3 = plasmaSearchRankingProfile.ORDER_YBR;
if (order.endsWith("Date")) order3 = plasmaSearchRankingProfile.ORDER_DATE;
if (order.endsWith("Quality")) order3 = plasmaSearchRankingProfile.ORDER_QUALITY;

// do the search
plasmaSearchQuery thisSearch = new plasmaSearchQuery(
query,
maxDistance,
prefermask,
plasmaSearchQuery.CONTENTDOM_TEXT,
contentdom,
count,
searchtime,
urlmask,
Expand Down Expand Up @@ -408,6 +422,12 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
prop.put("display", display);
prop.put("indexof", (indexof) ? "on" : "off");
prop.put("constraint", constraint.exportB64());
prop.put("contentdom", cds);
prop.put("contentdomCheckText", (contentdom == plasmaSearchQuery.CONTENTDOM_TEXT) ? 1 : 0);
prop.put("contentdomCheckAudio", (contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) ? 1 : 0);
prop.put("contentdomCheckVideo", (contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) ? 1 : 0);
prop.put("contentdomCheckImage", (contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) ? 1 : 0);
prop.put("contentdomCheckApp", (contentdom == plasmaSearchQuery.CONTENTDOM_APP) ? 1 : 0);

// return rewrite properties
return prop;
Expand Down
8 changes: 4 additions & 4 deletions source/de/anomic/index/indexCachedRI.java
Expand Up @@ -145,14 +145,14 @@ public indexContainer getContainer(String wordHash, Set urlselection, long maxTi
if (container == null) {
container = riIntern.getContainer(wordHash, urlselection, maxTime);
} else {
container.add(riIntern.getContainer(wordHash, urlselection, maxTime), maxTime);
container.addAllUnique(riIntern.getContainer(wordHash, urlselection, maxTime));
}

// get from collection index
if (container == null) {
container = backend.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime);
} else {
container.add(backend.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime), maxTime);
container.addAllUnique(backend.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime));
}
return container;
}
Expand Down Expand Up @@ -208,8 +208,8 @@ public void close() {

public indexContainer deleteContainer(String wordHash) {
indexContainer c = riIntern.deleteContainer(wordHash);
if (c == null) c = riExtern.deleteContainer(wordHash); else c.add(riExtern.deleteContainer(wordHash), -1);
if (c == null) c = backend.deleteContainer(wordHash); else c.add(backend.deleteContainer(wordHash), -1);
if (c == null) c = riExtern.deleteContainer(wordHash); else c.addAllUnique(riExtern.deleteContainer(wordHash));
if (c == null) c = backend.deleteContainer(wordHash); else c.addAllUnique(backend.deleteContainer(wordHash));
return c;
}

Expand Down

0 comments on commit 10d888e

Please sign in to comment.