Permalink
Browse files

Take out mailto collect in internal parsed document

As earlier plans to make use of mailto as separate webgraph entity didn't
materialize (see  http://forum.yacy-websuche.de/viewtopic.php?f=8&t=5726&p=32493&hilit=mailto#p32493)
free the unused handling and resources.
  • Loading branch information...
reger24 committed Apr 19, 2017
1 parent 335868e commit c77e43a39134de4b89b094da9bdbe1a79621e744
Showing with 2 additions and 35 deletions.
  1. +0 −17 htroot/ViewFile.java
  2. +2 −18 source/net/yacy/document/Document.java
@@ -424,23 +424,6 @@ private static void putLinks(final serverObjects prop, final String[] wordArray,
boolean dark = true;
int i = 0;
if (document.getEmaillinks() != null) {
Iterator<AnchorURL> emailit = document.getEmaillinks().iterator();
while (emailit.hasNext()) {
AnchorURL eentry = emailit.next();
prop.put("viewMode_links_" + i + "_nr", i);
prop.put("viewMode_links_" + i + "_dark", dark ? "1" : "0");
prop.put("viewMode_links_" + i + "_type", "email");
prop.put("viewMode_links_" + i + "_text", (eentry.getTextProperty().isEmpty()) ? "&nbsp;" : eentry.getTextProperty());
prop.put("viewMode_links_" + i + "_url", "#");
prop.put("viewMode_links_" + i + "_link", eentry.toNormalform(true));
prop.put("viewMode_links_" + i + "_rel", "");
prop.put("viewMode_links_" + i + "_name", eentry.getNameProperty());
dark = !dark;
i++;
}
}
i += putMediaInfo(prop, wordArray, i, document.getVideolinks(), "video", (i % 2 == 0));
i += putMediaInfo(prop, wordArray, i, document.getAudiolinks(), "audio", (i % 2 == 0));
dark = (i % 2 == 0);
@@ -86,7 +86,6 @@
// text in image tags.
private LinkedHashMap<AnchorURL, String> audiolinks, videolinks, applinks, hyperlinks; // TODO: check if redundant value (set to key.getNameProperty()) is needed
private LinkedHashMap<DigestURL, String> inboundlinks, outboundlinks;
private Set<AnchorURL> emaillinks; // mailto: links
/** links to icons that belongs to the document (mapped by absolute URL) */
private Map<DigestURL, IconEntry> icons;
private boolean resorted;
@@ -141,7 +140,6 @@ public Document(final DigestURL location, final String mimeType, final String ch
this.audiolinks = null;
this.videolinks = null;
this.applinks = null;
this.emaillinks = null;
this.icons = new HashMap<>();
this.resorted = false;
this.inboundlinks = null;
@@ -520,17 +518,9 @@ public long getTextLength() {
return this.applinks;
}
/**
* @return mailto links
*/
public Set<AnchorURL> getEmaillinks() {
// this is part of the getAnchor-set: only links to email addresses
if (!this.resorted) resortLinks();
return this.emaillinks;
}
/**
* @return last modification date of the source document
* @return last modification date of the source document. (The date is initialized with last modification date or received date)
*/
public Date getLastModified() {
return this.lastModified;
@@ -551,7 +541,7 @@ private void resortLinks() {
if (this.resorted) return;
synchronized (this) {
if (this.resorted) return;
// extract hyperlinks, medialinks and emaillinks from anchorlinks
// extract hyperlinks, medialinks from anchorlinks
String u;
int extpos, qpos;
String ext = null;
@@ -562,7 +552,6 @@ private void resortLinks() {
this.videolinks = new LinkedHashMap<AnchorURL, String>();
this.audiolinks = new LinkedHashMap<AnchorURL, String>();
this.applinks = new LinkedHashMap<AnchorURL, String>();
this.emaillinks = new LinkedHashSet<AnchorURL>();
final Map<AnchorURL, ImageEntry> collectedImages = new HashMap<AnchorURL, ImageEntry>(); // this is a set that is collected now and joined later to the imagelinks
for (final Map.Entry<DigestURL, ImageEntry> entry: this.images.entrySet()) {
if (entry.getKey() != null && entry.getKey().getHost() != null && entry.getKey().getHost().equals(thishost)) this.inboundlinks.put(entry.getKey(), "image"); else this.outboundlinks.put(entry.getKey(), "image");
@@ -571,11 +560,6 @@ private void resortLinks() {
if (url == null) continue;
u = url.toNormalform(true);
final String name = url.getNameProperty();
// check mailto scheme first (not suppose to get into in/outboundlinks or hyperlinks -> crawler can't process)
if (url.getProtocol().equals("mailto")) {
this.emaillinks.add(url);
continue;
}
final boolean noindex = url.getRelProperty().toLowerCase().indexOf("noindex",0) >= 0;
final boolean nofollow = url.getRelProperty().toLowerCase().indexOf("nofollow",0) >= 0;

0 comments on commit c77e43a

Please sign in to comment.