Permalink
Browse files

Automatically refresh running recrawl report when JavaScript is enabled.

For users who would prefer to keep JavaScript disabled, a manual Refresh
button is still available.
  • Loading branch information...
luccioman committed Jan 19, 2018
1 parent 19903a9 commit 5e2812c0608288e0d4700909a46adddeb4c26d05
@@ -3,7 +3,8 @@
<head>
<title>YaCy '#[clientname]#': Field Re-Indexing</title>
#%env/templates/metas.template%#
</head>
<script type="text/javascript" src="js/IndexReIndexMonitor.js"></script>
</head>
<body id="IndexReindexMonitor">
#%env/templates/header.template%#
#%env/templates/submenuIndexControl.template%#
@@ -53,7 +54,7 @@ <h2>Re-Crawl Index Documents</h2>
<input type="hidden" name="transactionToken" value="#[transactionToken]#" />
#(recrawljobrunning)#
<fieldset>
<fieldset id="newJobFieldset">
#(error)#::
<div class="alert alert-danger" role="alert">Re-crawl works only with an embedded local Solr index!</div>
#(/error)#
@@ -75,14 +76,12 @@ <h2>Re-Crawl Index Documents</h2>
<input type="submit" name="recrawlDefaults" value="Set defaults" class="btn btn-default" title="Reset to default values"/>
<input type="submit" name="recrawlnow" value="start recrawl job now" class="btn btn-primary"/>
to re-crawl documents selected with the given query.
</fieldset>::
#(/recrawljobrunning)#
#(recrawljobrunning)#::
<fieldset><legend>Re-Crawl Query Details</legend>
</fieldset>
::
<fieldset id="updateJobFieldset"><legend>Re-Crawl Query Details</legend>
<dl>
<dt>Documents to process</dt>
<dd>#[docCount]#</dd>
<dd id="urlsToRecrawlCount">#[docCount]#</dd>
<dt>Current Query</dt>
<dd>#[recrawlquerytext]#</dd>
</dl>
@@ -101,46 +100,53 @@ <h2>Re-Crawl Index Documents</h2>
<div class="col-md-10 col-lg-6">
<div class="panel panel-info">
<div class="panel-heading">
<h3 class="panel-title">#(jobStatus)#::::Last #(/jobStatus)#Re-Crawl job report</h3>
<h3 class="panel-title">#(jobStatus)#::::Last #(/jobStatus)#Re-Crawl job report
<span id="refreshingIcon" class="glyphicon glyphicon-transfer hidden" title="Automatically refreshing"></span>
<span id="refreshFailureIcon" class="glyphicon glyphicon-warning-sign warning hidden" title="An error occurred whil trying to refresh automatically"></span>
</h3>
</div>
<div class="panel-body">
#(error)#::<div class="alert alert-danger" role="alert">The job terminated early due to an error when requesting the Solr index.</div>#(/error)#
<div id="earlyRecrawlTermination" class="alert alert-danger #(error)#hidden::#(/error)#" role="alert">The job terminated early due to an error when requesting the Solr index.</div>
<table class="table">
<tbody>
<tr>
<th scope="row">Status</th>
<td>#(jobStatus)#Running::Shutdown in progress::Terminated#(/jobStatus)#</td>
</tr>
<td id="recrawlStatus"
arial-live="polite"
data-status="#(jobStatus)#0::1::2#(/jobStatus)#"
data-status0="Running" data-status1="Shutdown in progress"
data-status2="Terminated">#(jobStatus)#Running::Shutdown in progress::Terminated#(/jobStatus)#</td>
</tr>
<tr>
<th scope="row">Query</th>
<td>#[recrawlquerytext]#</td>
<td id="recrawlQueryText">#[recrawlquerytext]#</td>
</tr>
<tr>
<th scope="row">Start time</th>
<td>#[startTime]#</td>
<td id="recrawlStartTime">#[startTime]#</td>
</tr>
<tr>
<th scope="row">End time</th>
<td>#[endTime]#</td>
<td id="recrawlEndTime">#[endTime]#</td>
</tr>
<tr>
<th scope="row" title="URLs added to the crawler queue for recrawl">Recrawled URLs</th>
<td>#[recrawledUrlsCount]#</td>
<td id="recrawledUrlsCount" aria-live="polite">#[recrawledUrlsCount]#</td>
</tr>
<tr>
<th scope="row" title="URLs rejected for some reason by the crawl stacker or the crawler queue. Please check the logs for more details.">Rejected URLs</th>
<td>#[rejectedUrlsCount]#</td>
<td id="rejectedUrlsCount">#[rejectedUrlsCount]#</td>
</tr>
<tr>
<th scope="row">Malformed URLs</th>
<td title="#[malformedUrlsDeletedCount]# deleted from the index">#[malformedUrlsCount]#</td>
<td id="malformedUrlsCount" title="#[malformedUrlsDeletedCount]# deleted from the index">#[malformedUrlsCount]#</td>
</tr>
</tbody>
</table>
#(jobStatus)#
<a class="btn btn-default" href="IndexReIndexMonitor_p.html" role="button"><span class="glyphicon glyphicon-refresh"></span> Refresh</a>
<a id="recrawlRefreshBtn" class="btn btn-default" href="IndexReIndexMonitor_p.html" role="button"><span class="glyphicon glyphicon-refresh"></span> Refresh</a>
::
<a class="btn btn-default" href="IndexReIndexMonitor_p.html" role="button"><span class="glyphicon glyphicon-refresh"></span> Refresh</a>
<a id="recrawlRefreshBtn" class="btn btn-default" href="IndexReIndexMonitor_p.html" role="button"><span class="glyphicon glyphicon-refresh"></span> Refresh</a>
::
#(/jobStatus)#
</div>
@@ -84,7 +84,7 @@ public static serverObjects respond(final RequestHeader header, final serverObje
final OrderedScoreMap<String> querylist = ((ReindexSolrBusyThread) reidxbt).getQueryList();
if (querylist != null) {
int i = 0;
for (String oneqs : querylist) { // just use fieldname from query (fieldname:[* TO *])
for (final String oneqs : querylist) { // just use fieldname from query (fieldname:[* TO *])
prop.put("reindexjobrunning_fieldlist_"+i+"_fieldname", oneqs.substring(0, oneqs.indexOf(':')));
prop.put("reindexjobrunning_fieldlist_"+i+"_fieldscore", querylist.get(oneqs));
i++;
@@ -115,6 +115,8 @@ public static serverObjects respond(final RequestHeader header, final serverObje
prop.putHTML("infomessage", "! reindex works only with embedded Solr index !");
}
}
processReindexReport(header, sb, prop, reidxbt instanceof ReindexSolrBusyThread ? (ReindexSolrBusyThread)reidxbt : null);
// recrawl job handling
BusyThread recrawlbt = sb.getThread(RecrawlBusyThread.THREAD_NAME);
@@ -228,6 +230,34 @@ public static serverObjects respond(final RequestHeader header, final serverObje
// return rewrite properties
return prop;
}
/**
* Write information on the eventual currently running or last reindex job
* terminated
*
* @param header
* current request header. Must not be null.
* @param sb
* Switchboard instance holding server environment
* @param prop
* this template result to write on. Must not be null.
* @param recrawlbt
* the eventual reindex thread
*/
private static void processReindexReport(final RequestHeader header, final Switchboard sb, final serverObjects prop,
final ReindexSolrBusyThread recrawlbt) {
if (recrawlbt != null) {
prop.put("reindexReport", 1);
prop.put("reindexReport_currentQuery", recrawlbt.getCurrentQuery());
prop.put("reindexReport_currentQuerySize",
recrawlbt.getQueryList() != null ? recrawlbt.getQueryList().size() : 0);
prop.put("reindexReport_processedCount", recrawlbt.getProcessed());
} else {
prop.put("reindexReport", 0);
}
}
/**
* @param query
@@ -292,6 +322,7 @@ private static void processRecrawlReport(final RequestHeader header, final Switc
.withLocale(formatLocale);
prop.put("recrawlReport_startTime", formatDateTime(formatter, recrawlbt.getStartTime()));
prop.put("recrawlReport_endTime", formatDateTime(formatter, recrawlbt.getEndTime()));
prop.put("recrawlReport_urlsToRecrawlCount", recrawlbt.getUrlsToRecrawl());
prop.put("recrawlReport_recrawledUrlsCount", recrawlbt.getRecrawledUrlsCount());
prop.put("recrawlReport_rejectedUrlsCount", recrawlbt.getRejectedUrlsCount());
prop.put("recrawlReport_malformedUrlsCount", recrawlbt.getMalformedUrlsCount());
@@ -0,0 +1,22 @@
{
#(reindexReport)#::"reindexJob": {
"currentQuery": #[currentQuery]#,
"currentQuerySize": #[currentQuerySize]#,
"processedCount": #[processedCount]#
}#(/reindexReport)#
#(recrawlReport)#::"recrawlJob": {
"status": #(jobStatus)#0::1::2#(/jobStatus)#,
"statusLabel": "#(jobStatus)#Running::Shutdown in progress::Terminated#(/jobStatus)#",
"earlyTerminated": #(error)#false::true#(/error)#,
"query": "#[recrawlquerytext]#",
"startTime": "#[startTime]#",
"endTime": "#[endTime]#",
"urlCounts": {
"toRecrawl": #[urlsToRecrawlCount]#,
"recrawled": #[recrawledUrlsCount]#,
"rejected": #[rejectedUrlsCount]#,
"malformed": #[malformedUrlsCount]#,
"malformedDeleted": #[malformedUrlsDeletedCount]#
}
}#(/recrawlReport)#
}
@@ -0,0 +1,202 @@
/*
* @licstart The following is the entire license notice for the
* JavaScript code in this file.
*
* Copyright (C) 2018 by luccioman; https://github.com/luccioman
*
*
* This file is part of YaCy.
*
* YaCy is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* YaCy is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with YaCy. If not, see <http://www.gnu.org/licenses/>.
*
* @licend The above is the entire license notice
* for the JavaScript code in this file.
*/
/* This is JavaScript for the IndexReIndexMonitor_p.html page */
var REFRESH_DELAY = 5000;
var TERMINATED_STATUS = 2;
var xhr = null;
if (window.XMLHttpRequest) {
if(window.JSON && window.JSON.parse) {
xhr = new XMLHttpRequest();
} else {
console.warn("JSON parsing is not supported by the browser.");
}
} else {
console.warn("XMLHttpRequest is not supported by the browser.");
}
/**
* Once DOM is fully loaded, handle eventual reindex and recrawl jobs reports
* refreshing
*/
function domLoaded() {
/* Get the DOM elements to be refreshed */
var earlyRecrawlTerminationElem = document.getElementById("earlyRecrawlTermination");
var recrawlStatusElem = document.getElementById("recrawlStatus");
var recrawlQueryTextElem = document.getElementById("recrawlQueryText");
var recrawlStartTimeElem = document.getElementById("recrawlStartTime");
var recrawlEndTimeElem = document.getElementById("recrawlEndTime");
var urlsToRecrawlCountElem = document.getElementById("urlsToRecrawlCount");
var recrawledUrlsCountElem = document.getElementById("recrawledUrlsCount");
var rejectedUrlsCountElem = document.getElementById("rejectedUrlsCount");
var malformedUrlsCountElem = document.getElementById("malformedUrlsCount");
var refreshingIcon = document.getElementById("refreshingIcon");
var refreshFailureIcon = document.getElementById("refreshFailureIcon");
var refreshBtn = document.getElementById("recrawlRefreshBtn");
if (recrawlStatusElem != null && recrawlStatusElem.dataset != null) {
/* Initialize status labels that may be translated in the html template */
var recrawlStatusLabels = {};
if(recrawlStatusElem.dataset.status0 != null) {
recrawlStatusLabels["0"] = recrawlStatusElem.dataset.status0;
}
if(recrawlStatusElem.dataset.status1 != null) {
recrawlStatusLabels["1"] = recrawlStatusElem.dataset.status1;
}
if(recrawlStatusElem.dataset.status2 != null) {
recrawlStatusLabels["2"] = recrawlStatusElem.dataset.status2;
}
if (recrawlStatusElem.dataset.status != TERMINATED_STATUS) {
/* Update a DOM element when it exists */
var updateElemText = function(elem, newValue) {
if (newValue != null && elem != null) {
elem.innerText = newValue;
}
}
/* Handle failure to fetch fresh report information */
var handleFetchReportError = function() {
/* Hide the icon marking automated refresh, but keep the manual refresh button */
if(refreshingIcon != null) {
refreshingIcon.className = "hidden";
}
/* Display the icon informing user about failure on automated report refresh */
if(refreshFailureIcon != null && refreshFailureIcon.className != null) {
refreshFailureIcon.className = refreshFailureIcon.className.replace("hidden", "");
}
}
/* Update report DOM elements with the fetched report information */
var updateReportElements = function(report) {
recrawlStatusElem.dataset.status = report.status;
if (recrawlStatusLabels[report.status] != null) {
/* Use the eventually translated status label when available */
updateElemText(recrawlStatusElem, recrawlStatusLabels[report.status]);
} else if(report.statusLabel != null) {
/* Otherwise use the default one provided in the json response */
updateElemText(recrawlStatusElem, report.statusLabel);
}
if(report.earlyTerminated) {
if(earlyRecrawlTerminationElem != null && earlyRecrawlTerminationElem.className != null) {
earlyRecrawlTerminationElem.className = earlyRecrawlTerminationElem.className.replace("hidden", "");
}
}
updateElemText(recrawlQueryTextElem, report.query);
updateElemText(recrawlStartTimeElem,
report.startTime);
updateElemText(recrawlEndTimeElem, report.endTime);
if (report.urlCounts != null) {
updateElemText(urlsToRecrawlCountElem,
report.urlCounts.toRecrawl);
updateElemText(recrawledUrlsCountElem,
report.urlCounts.recrawled);
updateElemText(rejectedUrlsCountElem,
report.urlCounts.rejected);
updateElemText(malformedUrlsCountElem,
report.urlCounts.malformed);
if (report.urlCounts.malformedDeleted != null
&& malformedUrlsCountElem != null) {
malformedUrlsCountElem.title = malformedUrlsCountElem.title
.replace(/\d+/,
report.urlCounts.malformedDeleted);
}
}
}
/* Processing the response from IndexReIndexMonitor_p.json */
var handleResponse = function() {
if (xhr.readyState == 4 /* XMLHttpRequest.DONE */) {
if (xhr.status != 200 || xhr.response == null) {
handleFetchReportError();
return;
}
var report = null;
try {
var jsonResponse = window.JSON.parse(xhr.response);
if (jsonResponse != null) {
report = jsonResponse.recrawlJob;
}
} catch(error) {
console.error("JSON parsing error ", error);
}
if (report == null || report.status == null) {
handleFetchReportError();
return;
}
updateReportElements(report);
if(report.status == TERMINATED_STATUS) {
/* First hide the icon marking automated refresh and the manual refresh button */
if(refreshingIcon != null) {
refreshingIcon.className = "hidden";
}
if(refreshBtn != null) {
refreshBtn.className = "hidden";
}
/*
* Then if the update fieldset is displayed, completely refresh the page to display again the
* new recrawl job fieldset
*/
if(document.getElementById("updateJobFieldset") != null) {
window.location.href = "IndexReIndexMonitor_p.html";
}
} else {
/*
* Continue refreshing while the job is not
* terminated
*/
window.setTimeout(function() {
xhr.onreadystatechange = handleResponse;
xhr.open("get", "IndexReIndexMonitor_p.json");
xhr.send();
}, REFRESH_DELAY);
}
}
};
/*
* We are here so JavaScript is enabled and required API are supported :
* we can show the refreshing icon if the element is present
*/
if(refreshingIcon != null && refreshingIcon.className != null) {
refreshingIcon.className = refreshingIcon.className.replace("hidden", "");
}
window.setTimeout(function() {
xhr.onreadystatechange = handleResponse;
xhr.open("get", "IndexReIndexMonitor_p.json");
xhr.send();
}, REFRESH_DELAY);
}
}
}
if (xhr != null) {
window.onload = domLoaded;
}
Oops, something went wrong.

0 comments on commit 5e2812c

Please sign in to comment.