Skip to content

Commit

Permalink
prepared generic text parser environment
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@15 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Apr 12, 2005
1 parent e374aca commit a87a17a
Show file tree
Hide file tree
Showing 4 changed files with 360 additions and 19 deletions.
12 changes: 7 additions & 5 deletions htroot/CacheAdmin_p.java
Expand Up @@ -149,13 +149,15 @@ private static String formatHeader(httpHeader header) {
return out;
}

private static String formatAnchor(Properties a) {
private static String formatAnchor(Map a) {
String out = "<table border=\"0\" cellspacing=\"0\" cellpadding=\"0\">";
Enumeration e = a.keys();
Iterator i = a.entrySet().iterator();
String url, descr;
while (e.hasMoreElements()) {
url = (String) e.nextElement();
descr = a.getProperty(url).trim();
Map.Entry entry;
while (i.hasNext()) {
entry = (Map.Entry) i.next();
url = (String) entry.getKey();
descr = ((String) entry.getValue()).trim();
if (descr.length() == 0) descr = "-";
out += "<tr valign=\"top\"><td><span class=\"small\">" + descr + "&nbsp;</span></td><td class=\"tt\">" + url + "</td></tr>";
}
Expand Down
61 changes: 53 additions & 8 deletions source/de/anomic/htmlFilter/htmlFilterContentScraper.java
Expand Up @@ -161,25 +161,70 @@ public Properties getImage() {
return image;
}

public Properties getHyperlinks() {
public Map getHyperlinks() {
if (hyperlinks == null) resortLinks();
return hyperlinks;
}

public Properties getMedialinks() {
public Map getMedialinks() {
if (medialinks == null) resortLinks();
return medialinks;
}

public Properties getEmaillinks() {
public Map getEmaillinks() {
if (emaillinks == null) resortLinks();
return emaillinks;
}

Properties hyperlinks = null;
Properties medialinks = null;
Properties emaillinks = null;

HashMap hyperlinks = null;
HashMap medialinks = null;
HashMap emaillinks = null;

private synchronized void resortLinks() {
Iterator i;
String url;
int extpos;
String ext;
i = anchor.entrySet().iterator();
hyperlinks = new HashMap();
medialinks = new HashMap();
emaillinks = new HashMap();
Map.Entry entry;
while (i.hasNext()) {
entry = (Map.Entry) i.next();
url = (String) entry.getKey();
if ((url != null) && (url.startsWith("mailto:"))) {
emaillinks.put(url.substring(7), entry.getValue());
} else {
extpos = url.lastIndexOf(".");
String normal;
if (extpos > 0) {
ext = url.substring(extpos).toLowerCase();
normal = urlNormalform(url);
if (normal != null) {
if (mediaExt.indexOf(ext.substring(1)) >= 0) {
// this is not an normal anchor, its a media link
medialinks.put(normal, entry.getValue());
} else {
hyperlinks.put(normal, entry.getValue());
}
}
}
}
}
// finally add the images to the medialinks
i = image.entrySet().iterator();
String normal;
while (i.hasNext()) {
entry = (Map.Entry) i.next();
url = (String) entry.getKey();
normal = urlNormalform(url);
if (normal != null) medialinks.put(normal, entry.getValue()); // avoid NullPointerException
}
expandHyperlinks();
}

/*
private synchronized void resortLinks() {
Enumeration e;
String url;
Expand Down Expand Up @@ -219,7 +264,7 @@ private synchronized void resortLinks() {
if (normal != null) medialinks.setProperty(normal, image.getProperty(url)); // avoid NullPointerException
}
}

*/

public synchronized void expandHyperlinks() {
// we add artificial hyperlinks to the hyperlink set that can be calculated from
Expand Down

0 comments on commit a87a17a

Please sign in to comment.