Skip to content

Commit

Permalink
- some corrections in usage of getFile() and getFileName()
Browse files Browse the repository at this point in the history
- added more attributes in json response writer according to yacy
servlet
  • Loading branch information
Orbiter committed Sep 11, 2012
1 parent 62add1d commit e54ac38
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 29 deletions.
2 changes: 1 addition & 1 deletion htroot/yacysearchitem.java
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ public static serverObjects respond(final RequestHeader header, final serverObje
prop.putHTML("content_sizename", sizename(result.filesize()));
prop.putHTML("content_showSize_sizename", sizename(result.filesize()));
prop.putHTML("content_host", resultURL.getHost() == null ? "" : resultURL.getHost());
prop.putHTML("content_file", resultURL.getFile());
prop.putHTML("content_file", resultURL.getFileName());
prop.putHTML("content_path", resultURL.getPath());
prop.put("content_nl", (item == theQuery.offset) ? 0 : 1);
prop.putHTML("content_publisher", result.publisher());
Expand Down
3 changes: 1 addition & 2 deletions source/de/anomic/crawler/ResultImages.java
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,12 @@ public static void registerImages(final DigestURI source, final Document documen
if (doubleCheck.contains(url)) continue;
doubleCheck.add(url);

final String name = image.url().getFile();
boolean good = false;
if (image.width() > 120 &&
image.height() > 100 &&
image.width() < 1200 &&
image.height() < 1000 &&
name.lastIndexOf(".gif") == -1) {
!"gif".equals(image.url().getFileExtension())) {
// && ((urlString.lastIndexOf(".jpg") != -1)) ||
// ((urlString.lastIndexOf(".png") != -1)){

Expand Down
27 changes: 20 additions & 7 deletions source/net/yacy/cora/document/MultiProtocolURI.java
Original file line number Diff line number Diff line change
Expand Up @@ -632,16 +632,29 @@ private void identSearchpart() {
}
}

/**
* get the hpath plus search field plus anchor.
* see http://www.ietf.org/rfc/rfc1738.txt for naming.
* if there is no search and no anchor the result is identical to getPath
* this is defined according to http://docs.oracle.com/javase/1.4.2/docs/api/java/net/URL.html#getFile()
* @return
*/
public String getFile() {
return getFile(false, false);
}

public String getFile(final boolean excludeReference, final boolean removeSessionID) {
// this is the path plus quest plus ref
// if there is no quest and no ref the result is identical to getPath
// this is defined according to http://java.sun.com/j2se/1.4.2/docs/api/java/net/URL.html#getFile()
/**
* get the hpath plus search field plus anchor (if wanted)
* see http://www.ietf.org/rfc/rfc1738.txt for naming.
* if there is no search and no anchor the result is identical to getPath
* this is defined according to http://docs.oracle.com/javase/1.4.2/docs/api/java/net/URL.html#getFile()
* @param excludeAnchor
* @param removeSessionID
* @return
*/
public String getFile(final boolean excludeAnchor, final boolean removeSessionID) {
if (this.searchpart == null) {
if (excludeReference || this.anchor == null) return this.path;
if (excludeAnchor || this.anchor == null) return this.path;
final StringBuilder sb = new StringBuilder(120);
sb.append(this.path);
sb.append('#');
Expand All @@ -654,7 +667,7 @@ public String getFile(final boolean excludeReference, final boolean removeSessio
if (q.toLowerCase().startsWith(sid.toLowerCase() + "=")) {
final int p = q.indexOf('&');
if (p < 0) {
if (excludeReference || this.anchor == null) return this.path;
if (excludeAnchor || this.anchor == null) return this.path;
final StringBuilder sb = new StringBuilder(120);
sb.append(this.path);
sb.append('#');
Expand All @@ -678,7 +691,7 @@ public String getFile(final boolean excludeReference, final boolean removeSessio
sb.append(this.path);
sb.append('?');
sb.append(q);
if (excludeReference || this.anchor == null) return sb.toString();
if (excludeAnchor || this.anchor == null) return sb.toString();
sb.append('#');
sb.append(this.anchor);
return sb.toString();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,14 @@

import java.io.IOException;
import java.io.Writer;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.services.federated.solr.OpensearchResponseWriter.ResHead;
import net.yacy.search.index.YaCySchema;
Expand All @@ -50,6 +53,14 @@
*/
public class JsonResponseWriter implements QueryResponseWriter {

// define a list of simple YaCySchema -> json Token matchings
private static final Map<String, String> field2tag = new HashMap<String, String>();
static {
field2tag.put(YaCySchema.url_protocol_s.name(), "protocol");
field2tag.put(YaCySchema.host_s.name(), "host");
field2tag.put(YaCySchema.url_file_ext_s.name(), "ext");
}

private String title;

public JsonResponseWriter() {
Expand Down Expand Up @@ -109,18 +120,33 @@ public void write(final Writer writer, final SolrQueryRequest request, final Sol
List<Fieldable> fields = doc.getFields();
int fieldc = fields.size();
List<String> texts = new ArrayList<String>();
MultiProtocolURI url = null;
String description = "", title = "";
StringBuilder path = new StringBuilder(80);
for (int j = 0; j < fieldc; j++) {
Fieldable value = fields.get(j);
String fieldName = value.name();
if (YaCySchema.title.name().equals(fieldName)) {
title = value.stringValue();
texts.add(title);

// apply generic matching rule
String stag = field2tag.get(fieldName);
if (stag != null) {
solitaireTag(writer, stag, value.stringValue());
continue;
}

// some special handling here
if (YaCySchema.sku.name().equals(fieldName)) {
solitaireTag(writer, "link", value.stringValue());
String u = value.stringValue();
try {
url = new MultiProtocolURI(u);
solitaireTag(writer, "link", u);
solitaireTag(writer, "file", url.getFileName());
} catch (MalformedURLException e) {}
continue;
}
if (YaCySchema.title.name().equals(fieldName)) {
title = value.stringValue();
texts.add(title);
continue;
}
if (YaCySchema.description.name().equals(fieldName)) {
Expand All @@ -133,18 +159,10 @@ public void write(final Writer writer, final SolrQueryRequest request, final Sol
solitaireTag(writer, "guid", urlhash);
continue;
}
if (YaCySchema.host_s.name().equals(fieldName)) {
solitaireTag(writer, "host", value.stringValue());
continue;
}
if (YaCySchema.url_paths_sxt.name().equals(fieldName)) {
path.append('/').append(value.stringValue());
continue;
}
if (YaCySchema.url_file_ext_s.name().equals(fieldName)) {
solitaireTag(writer, "ext", value.stringValue());
continue;
}
if (YaCySchema.last_modified.name().equals(fieldName)) {
Date d = new Date(Long.parseLong(value.stringValue()));
solitaireTag(writer, "pubDate", HeaderFramework.formatRFC1123(d));
Expand All @@ -169,9 +187,11 @@ public void write(final Writer writer, final SolrQueryRequest request, final Sol
texts.add(value.stringValue());
continue;
}
}
// compute snippet from texts

//missing: "code","faviconCode"
}

// compute snippet from texts
solitaireTag(writer, "path", path.toString());
solitaireTag(writer, "title", title.length() == 0 ? (texts.size() == 0 ? path.toString() : texts.get(0)) : title);
List<String> snippet = urlhash == null ? null : snippets.get(urlhash);
Expand Down
2 changes: 1 addition & 1 deletion source/net/yacy/document/parser/html/ContentScraper.java
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ public void scrapeTag1(final String tagname, final Properties tagopts, char[] te
final String href = tagopts.getProperty("href", EMPTY_STRING);
MultiProtocolURI url;
if ((href.length() > 0) && ((url = absolutePath(href)) != null)) {
final String f = url.getFile();
final String f = url.getFileName();
final int p = f.lastIndexOf('.');
final String type = (p < 0) ? EMPTY_STRING : f.substring(p + 1);
if (type.equals("png") || type.equals("gif") || type.equals("jpg") || type.equals("jpeg") || type.equals("tiff") || type.equals("tif")) {
Expand Down
8 changes: 4 additions & 4 deletions source/net/yacy/search/snippet/ResultEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -82,15 +82,15 @@ public ResultEntry(final URIMetadata urlentry,
int p = host.indexOf('.');
final String hash = Seed.hexHash2b64Hash(host.substring(p + 1, host.length() - 6));
final Seed seed = peers.getConnected(hash);
final String filename = urlentry.url().getFile();
final String path = urlentry.url().getFile();
String address = null;
if ((seed == null) || ((address = seed.getPublicAddress()) == null)) {
// seed is not known from here
try {
indexSegment.termIndex().remove(
Word.words2hashesHandles(Condenser.getWords(
("yacyshare " +
filename.replace('?', ' ') +
path.replace('?', ' ') +
" " +
urlentry.dc_title()), null).keySet()),
urlentry.hash());
Expand All @@ -100,8 +100,8 @@ public ResultEntry(final URIMetadata urlentry,
indexSegment.fulltext().remove(urlentry.hash()); // clean up
throw new RuntimeException("index void");
}
this.alternative_urlstring = "http://" + address + "/" + host.substring(0, p) + filename;
this.alternative_urlname = "http://share." + seed.getName() + ".yacy" + filename;
this.alternative_urlstring = "http://" + address + "/" + host.substring(0, p) + path;
this.alternative_urlname = "http://share." + seed.getName() + ".yacy" + path;
if ((p = this.alternative_urlname.indexOf('?')) > 0) this.alternative_urlname = this.alternative_urlname.substring(0, p);
}
}
Expand Down

0 comments on commit e54ac38

Please sign in to comment.