Skip to content

Commit

Permalink
added refresh parser to htmlFilterContentScraper
Browse files Browse the repository at this point in the history
* getRefreshSeconds() - number of seconds until refresh
* getRefreshPath() - url path
See also: http://www.yacy-forum.de/viewtopic.php?p=16851#16851

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1657 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Feb 15, 2006
1 parent ba5fe0b commit 56516fd
Showing 1 changed file with 34 additions and 1 deletion.
35 changes: 34 additions & 1 deletion source/de/anomic/htmlFilter/htmlFilterContentScraper.java
Expand Up @@ -214,7 +214,18 @@ public void scrapeTag0(String tagname, Properties tagopts) {
if (tagname.equalsIgnoreCase("img")) images.put(absolutePath(tagopts.getProperty("src", "")), tagopts.getProperty("alt",""));
if (tagname.equalsIgnoreCase("base")) try {root = new URL(tagopts.getProperty("href", ""));} catch (MalformedURLException e) {}
if (tagname.equalsIgnoreCase("frame")) anchors.put(absolutePath(tagopts.getProperty("src", "")), tagopts.getProperty("name",""));
if (tagname.equalsIgnoreCase("meta")) metas.put((tagopts.getProperty("name", "")).toLowerCase(), tagopts.getProperty("content",""));
if (tagname.equalsIgnoreCase("meta")) {
String name = tagopts.getProperty("name", "");
if (name.length() > 0) {
metas.put(name.toLowerCase(), tagopts.getProperty("content",""));
return;
}
name = tagopts.getProperty("http-equiv", "");
if (name.length() > 0) {
metas.put(name.toLowerCase(), tagopts.getProperty("content",""));
return;
}
}
}

public void scrapeTag1(String tagname, Properties tagopts, byte[] text) {
Expand Down Expand Up @@ -330,6 +341,28 @@ public String[] getKeywords() {
}
}

public int getRefreshSeconds() {
String s = (String) metas.get("refresh");
if (s == null) return 9999; else try {
int pos = s.indexOf(';');
if (pos < 0) return 9999;
int i = Integer.parseInt(s.substring(0, pos));
return i;
} catch (NumberFormatException e) {
return 9999;
}
}

public String getRefreshPath() {
String s = (String) metas.get("refresh");
if (s == null) return ""; else {
int pos = s.indexOf(';');
if (pos < 0) return "";
s = s.substring(pos + 1);
if (s.toLowerCase().startsWith("url=")) return s.substring(4).trim(); else return "";
}
}

/*
* (non-Javadoc)
* @see de.anomic.htmlFilter.htmlFilterScraper#close()
Expand Down

0 comments on commit 56516fd

Please sign in to comment.