Skip to content

Commit

Permalink
*) Snippet fetching:
Browse files Browse the repository at this point in the history
   Snippet are now fetched synchronous if the query parameter "fetchSnippet=" 
   is appended to the query string on the yacy search page. This is required 
   for the RSS feed.
   See: http://www.yacy-forum.de/viewtopic.php?t=4051
*) Small changes in the XSLT-stylesheet that is used to generate a html page from
   the RSS feed.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3787 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
theli committed Jun 4, 2007
1 parent e1a5bab commit 051a65f
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 20 deletions.
4 changes: 2 additions & 2 deletions htroot/xml/snippet.java
Expand Up @@ -25,8 +25,8 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
//get the timeout for snippet-fetching
int mediasnippet_timeout = 15000;
int textsnippet_timeout = 10000;
mediasnippet_timeout = Integer.parseInt((env.getConfig("timeout_text", "15000")));
textsnippet_timeout = Integer.parseInt((env.getConfig("timeout_media", "10000")));
mediasnippet_timeout = Integer.parseInt(env.getConfig("timeout_text", "15000"));
textsnippet_timeout = Integer.parseInt(env.getConfig("timeout_media", "10000"));

// getting url
String urlString = post.get("url", "");
Expand Down
49 changes: 45 additions & 4 deletions htroot/yacysearch.java
Expand Up @@ -52,6 +52,7 @@
import java.net.URLEncoder;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
import java.util.regex.PatternSyntaxException;
import java.util.TreeSet;

Expand All @@ -69,6 +70,7 @@
import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaSearchTimingProfile;
import de.anomic.plasma.plasmaSnippetCache;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURL;
import de.anomic.plasma.plasmaSearchResults;
Expand Down Expand Up @@ -266,9 +268,10 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
final boolean globalsearch = (global) && (yacyonline) && (!samesearch);

// do the search
TreeSet queryHashes = plasmaCondenser.words2hashes(query[0]);
plasmaSearchQuery thisSearch = new plasmaSearchQuery(
querystring,
plasmaCondenser.words2hashes(query[0]),
queryHashes,
plasmaCondenser.words2hashes(query[1]),
maxDistance,
prefermask,
Expand Down Expand Up @@ -338,9 +341,47 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
if (result.hasSnippet()) {
prop.put("type_results_" + i + "_snippet", 1);
prop.putASIS("type_results_" + i + "_snippet_text", result.getSnippet().getLineMarked(results.getQuery().queryHashes));//FIXME: the ASIS should not be needed, if there is no html in .java
} else {
prop.put("type_results_" + i + "_snippet", 0);
prop.put("type_results_" + i + "_snippet_text", "");
} else {
if (post.containsKey("fetchSnippet")) {
/* fetch the snippet now */
try {
// snippet fetch timeout
int textsnippet_timeout = Integer.parseInt(env.getConfig("timeout_media", "10000"));

// boolean line_end_with_punctuation
boolean pre = post.get("pre", "false").equals("true");

// if 'remove' is set to true, then RWI references to URLs that do not have the snippet are removed
boolean remove = post.get("remove", "false").equals("true");

URL resultURL = new URL(result.getUrl());
plasmaSnippetCache.TextSnippet snippet = sb.snippetCache.retrieveTextSnippet(
resultURL,
queryHashes,
true,
pre,
260,
textsnippet_timeout
);

if (snippet.getErrorCode() < 11) {
// no problems occurred
//prop.put("text", (snippet.exists()) ? snippet.getLineMarked(queryHashes) : "unknown");
prop.putASIS("type_results_" + i + "_snippet_text", (snippet.exists()) ? snippet.getLineMarked(queryHashes) : "unknown");
} else {
// problems with snippet fetch
prop.put("type_results_" + i + "_snippet_text", (remove) ? sb.snippetCache.failConsequences(snippet, queryHashes) : snippet.getError());
}
prop.put("type_results_" + i + "_snippet", 1);
} catch (MalformedURLException e) {
prop.put("type_results_" + i + "_snippet", 0);
prop.put("type_results_" + i + "_snippet_text", "");
}
} else {
/* no snippet available (will be fetched later via ajax) */
prop.put("type_results_" + i + "_snippet", 0);
prop.put("type_results_" + i + "_snippet_text", "");
}
}
prop.put("type_results", results.numResults());
prop.put("references", results.getReferences());
Expand Down
22 changes: 13 additions & 9 deletions htroot/yacysearch.rss
Expand Up @@ -3,6 +3,10 @@
<rss version="2.0"
xmlns:yacyTopwords="http://www.yacy.net/yacy/topwords"
xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">
<!--
YACY P2P WEB SEARCH - Results
Hint: append the query-parameter "fetchSnippet=" to embed snippets
-->
<channel>
<title>YaCy P2P-Search for #[former]#</title>
<description>Search for #[former]#</description>
Expand All @@ -11,21 +15,21 @@
<title>Search for #[former]#</title>
</image>
<opensearch:totalResults>#[type_results]#</opensearch:totalResults>
<opensearch:startIndex>1</opensearch:startIndex>
<opensearch:itemsPerPage>#[type_results]#</opensearch:itemsPerPage>
<opensearch:link rel="search" href="opensearchdescription.xml" type="application/opensearchdescription+xml"/>
<opensearch:Query role="request" searchTerms="#[former]#" />
<opensearch:startIndex>1</opensearch:startIndex>
<opensearch:itemsPerPage>#[type_results]#</opensearch:itemsPerPage>
<opensearch:link rel="search" href="opensearchdescription.xml" type="application/opensearchdescription+xml"/>
<opensearch:Query role="request" searchTerms="#[former]#" />

#(type)#
#{results}#
<item>
#{results}#<item>
<title><![CDATA[#[description]#]]></title>
<link>#[url]#</link>
#(snippet)#::<description><![CDATA[#[text]#]]></description>#(/snippet)#
<pubDate>#[date]#</pubDate>
<guid>#[urlhash]#</guid>
</item>
#{/results}#
<yacyTopwords:topwords>

#{/results}#<yacyTopwords:topwords>
#(combine)#
::
#{words}#
Expand All @@ -34,7 +38,7 @@
</yacyTopwords:item>
#{/words}#
#(/combine)#
</yacyTopwords:topwords>
</yacyTopwords:topwords>
::
#(/type)#
</channel>
Expand Down
13 changes: 8 additions & 5 deletions htroot/yacysearch.xsl
@@ -1,6 +1,7 @@
<?xml version="1.0" encoding="utf-8" ?>
<?xml-stylesheet type='text/xsl' href='/rss.xsl' version='1.0'?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/" version="1.0">
<xsl:output method="html"/>
<xsl:template match='/rss'>
<html>
<head>
Expand All @@ -9,6 +10,7 @@
<link rel="shortcut icon" href="favicon.ico" />
<style type="text/css">
@import "/env/style.css";
@import "/env/base.css";
</style>
</head>
<body>
Expand All @@ -24,11 +26,12 @@
</xsl:template>

<xsl:template match='item'>
<p>
<b><xsl:value-of select='title'/></b><br/>
<a href="{link}" ><xsl:value-of select='link' /></a><br/>
<xsl:value-of select='pubDate' /><br/>
</p>
<div class="searchresults">
<h4 class="linktitle"><a href="{link}" ><xsl:value-of select='title'/></a></h4>
<p class="snippet"><span class="snippetLoaded"><xsl:value-of select='description'/></span></p>
<p class="url"><a href="{link}" ><xsl:value-of select='link' /></a></p>
<p class="urlinfo"><xsl:value-of select='pubDate' /></p>
</div>
</xsl:template>

</xsl:stylesheet>

0 comments on commit 051a65f

Please sign in to comment.