Skip to content
Permalink
Browse files

Improved logging for federated search

- Do not use spaces in logger identifier name so the log level can be
configured in yacy.logging
- Hold the logger instance to avoid the logging system to look for it
from its name at each appended log message
  • Loading branch information...
luccioman committed Feb 2, 2019
1 parent 36c4083 commit 5e9a08355a18989e456a5e58127e8eeb32196246
@@ -58,11 +58,21 @@
* Subclasses should/need to override query() and maybe toYaCySchema() if more
* is needed as a basic field mapping
*/
abstract public class AbstractFederateSearchConnector implements FederateSearchConnector {
public abstract class AbstractFederateSearchConnector implements FederateSearchConnector {

/** Logger for this class */
private static final ConcurrentLog LOG = new ConcurrentLog(AbstractFederateSearchConnector.class.getName());

public String instancename; // just a identifying name
protected SchemaConfiguration localcfg; // the schema conversion cfg for each fieldname, yacyname = remote fieldname
public long lastaccesstime = -1; // last time accessed, used for search delay calculation
/** Just a identifying name */
public String instancename;

/** The schema conversion cfg for each fieldname, yacyname = remote fieldname */
protected SchemaConfiguration localcfg;

/** Last time accessed, used for search delay calculation */
public long lastaccesstime = -1;

/** The search URL template */
protected String baseurl;

/**
@@ -84,14 +94,14 @@ public boolean init(String instance, String cfgFileName) {
try {
this.localcfg = new SchemaConfiguration(instanceCfgFile);
} catch (IOException ex) {
ConcurrentLog.config(this.instancename, "error reading schema " + cfgFileName);
LOG.config("Error reading schema " + cfgFileName + " for connector " + this.instancename);
return false;
}
// mandatory to contain a mapping for "sku" or alternatively "cfg_skufieldname" for a conversion to a final url
if (this.localcfg.contains(CollectionSchema.sku) || this.localcfg.contains("_skufieldname")) {
return true;
}
ConcurrentLog.config(this.instancename, "mandatory mapping for sku or _skufieldname missing in " + cfgFileName);
LOG.config("Mandatory mapping for sku or _skufieldname missing in " + cfgFileName + " for connector " + this.instancename);
return false;
}
this.localcfg = null;
@@ -111,26 +121,26 @@ public void search(final SearchEvent theSearch) {
@Override
public void run() {
Thread.currentThread().setName("heuristic:" + instancename);
ConcurrentLog.info("YACY SEARCH (federated)", "Send search query to " + instancename);
LOG.info("Send search query to " + instancename);
theSearch.oneFeederStarted();
List<URIMetadataNode> doclist = query(theSearch.getQuery());
if (doclist != null) {
ConcurrentLog.info("YACY SEARCH (federated)", "Got " + doclist.size() + " documents from " + instancename);
Map<String, LinkedHashSet<String>> snippets = new HashMap<String, LinkedHashSet<String>>(); // add nodes doesn't allow null
LOG.info("Got " + doclist.size() + " documents from " + instancename);
Map<String, LinkedHashSet<String>> snippets = new HashMap<>(); // add nodes doesn't allow null
theSearch.addNodes(doclist, null, snippets, false, instancename, doclist.size(), true);

for (URIMetadataNode doc : doclist) {
theSearch.addHeuristic(doc.hash(), instancename, false);
}
} else {
ConcurrentLog.info("YACY SEARCH (federated)", "Got no results from " + instancename);
LOG.info("Got no results from " + instancename);
}
// that's all we need to display serach result
theSearch.oneFeederTerminated();

// optional: add to crawler to get the full resource (later)
if (doclist != null && !doclist.isEmpty() && theSearch.addResultsToLocalIndex) {
Collection<DigestURL> urls = new ArrayList<DigestURL>();
Collection<DigestURL> urls = new ArrayList<>();
for (URIMetadataNode doc : doclist) {
urls.add(doc.url());
}
@@ -59,6 +59,9 @@
* Handling of queries to configured remote OpenSearch systems.
*/
public class FederateSearchManager {

/** Logger for this class */
private static final ConcurrentLog LOG = new ConcurrentLog(FederateSearchManager.class.getName());

/** Delay between connects (in ms) */
private final int accessDelay = 15000;
@@ -121,7 +124,7 @@ public FederateSearchManager(Switchboard sb) {
conlist.add(sfc);
}
} else {
ConcurrentLog.config("FederateSearchManager", "Error in configuration of: " + url);
LOG.config("Error in configuration of: " + url);
}
} else { // handle opensearch url template
OpenSearchConnector osc = new OpenSearchConnector(url);
@@ -131,8 +134,8 @@ public FederateSearchManager(Switchboard sb) {
}
}
}
} catch (IOException ex) {
ConcurrentLog.logException(ex);
} catch (final IOException ex) {
LOG.config("Unexpected error when reading configuration file : " + this.confFile, ex);
}
}
manager = this; // reference for static access via .getManager()
@@ -257,12 +260,12 @@ public boolean addOpenSearchTarget(String name, String urlTemplate, boolean acti
}
}
} catch (final IOException ex) {
ConcurrentLog.warn("FederateSearchManager", "config file write error");
LOG.warn("config file write error");
}
return true;
}
} catch (final IOException e1) {
ConcurrentLog.logException(e1);
LOG.severe("Unexpected error when writing configuration file : " + confFile, e1);
return false;
}
return false;
@@ -288,7 +291,7 @@ public int getSize() {
try {
connectorURL = new MultiProtocolURL(fsc.baseurl);
} catch (MalformedURLException e) {
ConcurrentLog.warn("FederateSearchManager", "Malformed connector URL : " + fsc.baseurl);
LOG.warn("Malformed connector URL : " + fsc.baseurl);
continue;
}
RobotsTxtEntry robotsEntry = null;
@@ -310,8 +313,7 @@ public int getSize() {
// also check robots.txt exclusion
retset.add(fsc);
} else {
ConcurrentLog.warn("FederateSearchManager",
"Connector URL is disallowed by robots.txt : " + fsc.baseurl);
LOG.warn("Connector URL is disallowed by robots.txt : " + fsc.baseurl);
}
}

@@ -333,15 +335,15 @@ public boolean discoverFromSolrIndex(final Switchboard sb) {
}
// check if needed Solr fields are available (selected)
if (!sb.index.fulltext().useWebgraph()) {
ConcurrentLog.severe("FederateSearchManager", "Error on connecting to embedded Solr webgraph index");
LOG.severe("Error on connecting to embedded Solr webgraph index");
return false;
}
final SolrConnector connector = sb.index.fulltext().getWebgraphConnector();
final boolean metafieldavailable = sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_rel_s.name())
&& (sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_protocol_s.name()) && sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.target_urlstub_s.name()))
&& sb.getConfigBool(SwitchboardConstants.CORE_SERVICE_WEBGRAPH, false);
if (!metafieldavailable) {
ConcurrentLog.warn("FederateSearchManager", "webgraph option and webgraph Schema fields target_rel_s, target_protocol_s and target_urlstub_s must be switched on");
LOG.warn("webgraph option and webgraph Schema fields target_rel_s, target_protocol_s and target_urlstub_s must be switched on");
return false;
}
// the solr search
@@ -354,12 +356,12 @@ public boolean discoverFromSolrIndex(final Switchboard sb) {
SolrDocumentList docList = connector.getDocumentListByQuery(webgraphquerystr, null, 0, 1, webgraphqueryfields);
numfound = docList.getNumFound();
if (numfound == 0) {
ConcurrentLog.info("FederateSearchManager", "no results found, abort discover job");
LOG.info("no results found, abort discover job");
return true;
}
ConcurrentLog.info("FederateSearchManager", "start checking " + Long.toString(numfound) + " found index results");
LOG.info("start checking " + Long.toString(numfound) + " found index results");
} catch (final IOException ex) {
ConcurrentLog.logException(ex);
LOG.severe("Error on Solr webgraph core query", ex);
return false;
}

@@ -375,12 +377,12 @@ public void run() {
int loopnr = 0;
Set<String> dblmem = new HashSet<String>(); // temp memory for already checked url
while (doloop) {
ConcurrentLog.info("FederateSearchManager", "start Solr query loop at " + Integer.toString(loopnr * 20) + " of " + Long.toString(numfound));
LOG.info("start Solr query loop at " + Integer.toString(loopnr * 20) + " of " + Long.toString(numfound));
SolrDocumentList docList = connector.getDocumentListByQuery(webgraphquerystr, null, loopnr * 20, 20, webgraphqueryfields); // check chunk of 20 result documents
loopnr++;
if (stoptime < System.currentTimeMillis()) {// stop after max 1h
doloop = false;
ConcurrentLog.info("FederateSearchManager", "long running discover task aborted");
LOG.info("long running discover task aborted");
}
if (docList != null && docList.size() > 0) {
Iterator<SolrDocument> docidx = docList.iterator();
@@ -392,7 +394,7 @@ public void run() {
try {
url = new URL(hrefurltxt);
} catch (final MalformedURLException ex) {
ConcurrentLog.warn("FederateSearchManager", "OpenSearch description URL is malformed : " + hrefurltxt);
LOG.warn("OpenSearch description URL is malformed : " + hrefurltxt);
continue;
}
//TODO: check Blacklist
@@ -405,32 +407,32 @@ public void run() {
try {
templateURL = new MultiProtocolURL(os.getRSSorAtomUrl());
} catch (final MalformedURLException ex) {
ConcurrentLog.warn("FederateSearchManager", "OpenSearch description URL is malformed : " + hrefurltxt);
LOG.warn("OpenSearch description URL is malformed : " + hrefurltxt);
continue;
}
if(sb.robots != null) {
robotsEntry = sb.robots.getEntry(templateURL, ClientIdentification.yacyInternetCrawlerAgent);
}

if(robotsEntry != null && robotsEntry.isDisallowed(templateURL)) {
ConcurrentLog.info("FederateSearchManager", "OpenSearch description template URL is disallowed by robots.xt");
LOG.info("OpenSearch description template URL is disallowed by robots.xt");
} else {
// add found system to config file
addOpenSearchTarget(os.getShortName(), os.getRSSorAtomUrl(), false, os.getItem("LongName"));
ConcurrentLog.info("FederateSearchManager", "added " + os.getShortName() + " " + hrefurltxt);
LOG.info("added " + os.getShortName() + " " + hrefurltxt);
}
} else {
ConcurrentLog.info("FederateSearchManager", "osd.xml check failed (no RSS or Atom support) for " + hrefurltxt);
LOG.info("osd.xml check failed (no RSS or Atom support) for " + hrefurltxt);
}
}
}
} else {
doloop = false;
}
}
ConcurrentLog.info("FederateSearchManager", "finisched Solr query (checked " + Integer.toString(dblmem.size()) + " unique opensearchdescription links found in " + Long.toString(numfound) + " results)");
LOG.info("finisched Solr query (checked " + Integer.toString(dblmem.size()) + " unique opensearchdescription links found in " + Long.toString(numfound) + " results)");
} catch (final IOException ex) {
ConcurrentLog.logException(ex);
LOG.severe("Unexpected error", ex);
}
}
};
@@ -467,7 +469,7 @@ public boolean init(String cfgFileName) {
conlist.add(sfc);
}
} else {
ConcurrentLog.config("FederateSearchManager", "Init error in configuration of: " + url);
LOG.config("Init error in configuration of: " + url);
}
} else { // handle opensearch url template
OpenSearchConnector osd = new OpenSearchConnector(url);
@@ -478,8 +480,8 @@ public boolean init(String cfgFileName) {
}
}
}
} catch (IOException ex) {
ConcurrentLog.logException(ex);
} catch (final IOException ex) {
LOG.config("Unexpected error when reading configuration file : " + cfgFileName);
}
}
return true;

0 comments on commit 5e9a083

Please sign in to comment.
You can’t perform that action at this time.