Permalink
Browse files

Detailed some Javadoc related to /api/webstructure.xml usage.

  • Loading branch information...
luccioman committed Jan 12, 2017
1 parent 007e2af commit 9cea7cbb10449f0b1b4a3b871424298836bbe633
Showing with 68 additions and 3 deletions.
  1. +43 −0 htroot/api/webstructure.java
  2. +25 −3 source/net/yacy/peers/graphics/WebStructureGraph.java
@@ -45,8 +45,51 @@
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
/**
* Retrieval of a web links structure.
*/
public class webstructure {
/**
* Retrieve the locally known web links structure of a specified resource ("about" parameter supplied) or
* the whole computed links structure since install (no parameter supplied)
* or since last start or last call ("latest" parameter supplied).
* Returned object contains the following information :
* <ul>
* <li>in all cases :
* <ul>
* <li>accumulated list of outgoing links to other domains (per host accumulated anchors)</li>
* </ul>
* </li>
* <li>when "about" parameter is filled :
* <ul>
* <li>accumulated list of incoming links from other domains (per host accumulated references)</li>
* <li>detailed list of outgoing links (anchors) from documents to references</li>
* <li>detailed list of incoming links (citations) from other documents (their references) - reverse link structure</li>
* </ul>
* </li>
* </ul>
* Information detail is limited by {@link WebStructureGraph#maxhosts} and {@link WebStructureGraph#maxref} constants.
*
* @param header
* servlet request header
* @param post
* request parameters. Supported keys :
* <ul>
* <li>about : get only links structure about the resource
* specified as value. Supported values : host hash, URL hash,
* host name or URL</li>
* <li>latest (ignored when about parameter is valued): get the structure that have been computed during
* the current run-time of YaCy, and with each next call only an
* update to the next list of references.</li>
* <li>agentName : name of the user agent string used to load the
* "about" resource</li>
* </ul>
* @param env
* server environment
* @return the servlet answer object
* @see WebStructureGraph
*/
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
final serverObjects prop = new serverObjects();
final Switchboard sb = (Switchboard) env;
@@ -1,4 +1,4 @@
// plasmaWebStructure.java
// WebStructureGraph.java
// -----------------------------
// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 15.05.2007 on http://yacy.net
@@ -65,19 +65,34 @@
import net.yacy.kelondro.util.FileUtils;
import net.yacy.search.Switchboard;
/**
* Holds lists of links per host names to allow reconstructing a web graph structure of links.
*/
public class WebStructureGraph {
public static int maxref = 200; // maximum number of references, to avoid overflow when a large link farm occurs (i.e. wikipedia)
public static int maxhosts = 10000; // maximum number of hosts in web structure map
/** Maximum number of references per host, to avoid overflow when a large link farm occurs (i.e. wikipedia) */
public static int maxref = 200;
/** Maximum number of hosts in web structure map */
public static int maxhosts = 10000;
private final static ConcurrentLog log = new ConcurrentLog("WebStructureGraph");
/** Backup file */
private final File structureFile;
/** Older structure entries (notably loaded from the backup file) */
private final TreeMap<String, byte[]> structure_old; // <b64hash(6)>','<host> to <date-yyyymmdd(8)>{<target-b64hash(6)><target-count-hex(4)>}*
/** Recently computed structure entries */
private final TreeMap<String, byte[]> structure_new;
/** Queue used to receive new entries to store */
private final BlockingQueue<LearnObject> publicRefDNSResolvingQueue;
/** Worker thread consuming the publicRefDNSResolvingQueue */
private final PublicRefDNSResolvingProcess publicRefDNSResolvingWorker;
/** Entry used to terminate the worker thread */
private final static LearnObject leanrefObjectPOISON = new LearnObject(null, null);
private static class LearnObject {
@@ -90,6 +105,13 @@ private LearnObject(final DigestURL url, final Set<DigestURL> globalRefURLs) {
}
}
/**
* Constructs an instance, eventually loads entries from the supplied backup
* structureFile when it exists and starts the worker thread.
*
* @param structureFile
* backup file
*/
public WebStructureGraph(final File structureFile) {
this.structure_old = new TreeMap<String, byte[]>();
this.structure_new = new TreeMap<String, byte[]>();

0 comments on commit 9cea7cb

Please sign in to comment.