Permalink
Browse files

Removed time condition on HostBalancer initialization in JUnit test.

Its initialization in main application usage remains asynchronous.
  • Loading branch information...
luccioman committed Jan 26, 2018
1 parent 8b572b7 commit 46b5249c20e68c0b2c0b91ff155b7a9504e40db8
Showing with 72 additions and 34 deletions.
  1. +63 −25 source/net/yacy/crawler/HostBalancer.java
  2. +9 −9 test/java/net/yacy/crawler/HostBalancerTest.java
@@ -71,10 +71,31 @@
private final Set<String> roundRobinHostHashes;
private final int onDemandLimit;
/**
* Create a new instance and asynchronously fills the queue by scanning the hostsPath directory.
* @param hostsPath path with persisted hosts queues
* @param onDemandLimit
* @param exceed134217727
*/
public HostBalancer(
final File hostsPath,
final int onDemandLimit,
final boolean exceed134217727) {
this(hostsPath, onDemandLimit, exceed134217727, true);
}
/**
* Create a new instance and fills the queue by scanning the hostsPath directory.
* @param hostsPath
* @param onDemandLimit
* @param exceed134217727
* @param asyncInit when true, queue filling from file system is launched asynchronously
*/
public HostBalancer(
final File hostsPath,
final int onDemandLimit,
final boolean exceed134217727,
final boolean asyncInit) {
this.hostsPath = hostsPath;
this.onDemandLimit = onDemandLimit;
this.exceed134217727 = exceed134217727;
@@ -83,38 +104,50 @@ public HostBalancer(
if (!(hostsPath.exists())) hostsPath.mkdirs(); // make the path
this.queues = new ConcurrentHashMap<String, HostQueue>();
this.roundRobinHostHashes = new HashSet<String>();
init(); // return without wait but starts a thread to fill the queues
init(asyncInit); // return without wait but starts a thread to fill the queues
}
/**
* fills the queue by scanning the hostsPath directory in a thread to
* Fills the queue by scanning the hostsPath directory.
* @param async when true, launch in a dedicated thread to
* return immediately (as large unfinished crawls may take longer to load)
*/
private void init() {
Thread t = new Thread("HostBalancer.init") {
@Override
public void run() {
final String[] hostlist = hostsPath.list();
for (String hoststr : hostlist) {
try {
File queuePath = new File(hostsPath, hoststr);
HostQueue queue = new HostQueue(queuePath, queues.size() > onDemandLimit, exceed134217727);
if (queue.isEmpty()) {
queue.close();
FileUtils.deletedelete(queuePath);
} else {
queues.put(queue.getHostHash(), queue);
}
} catch (MalformedURLException | RuntimeException e) {
log.warn("delete queue due to init error for " + hostsPath.getName() + " host=" + hoststr + " " + e.getLocalizedMessage());
// if exception thrown we can't init the queue, maybe due to name violation. That won't get better, delete it.
FileUtils.deletedelete(new File(hostsPath, hoststr));
}
private void init(final boolean async) {
if(async) {
Thread t = new Thread("HostBalancer.init") {
@Override
public void run() {
runInit();
}
}
};
};
t.start();
} else {
runInit();
}
}
t.start();
/**
* Fills the queue by scanning the hostsPath directory.
*/
private void runInit() {
final String[] hostlist = hostsPath.list();
for (String hoststr : hostlist) {
try {
File queuePath = new File(hostsPath, hoststr);
HostQueue queue = new HostQueue(queuePath, queues.size() > onDemandLimit, exceed134217727);
if (queue.isEmpty()) {
queue.close();
FileUtils.deletedelete(queuePath);
} else {
queues.put(queue.getHostHash(), queue);
}
} catch (MalformedURLException | RuntimeException e) {
log.warn("delete queue due to init error for " + hostsPath.getName() + " host=" + hoststr + " " + e.getLocalizedMessage());
// if exception thrown we can't init the queue, maybe due to name violation. That won't get better, delete it.
FileUtils.deletedelete(new File(hostsPath, hoststr));
}
}
}
@Override
@@ -196,6 +229,11 @@ public synchronized int remove(final HandleSet urlHashes) throws IOException {
return c;
}
/**
* @return true when the URL is queued is this or any other HostBalancer
* instance (as {@link #depthCache} is shared between all HostBalancer
* instances)
*/
@Override
public boolean has(final byte[] urlhashb) {
if (depthCache.has(urlhashb)) return true;
@@ -8,34 +8,35 @@
import net.yacy.crawler.retrieval.Request;
import net.yacy.crawler.robots.RobotsTxt;
import net.yacy.data.WorkTables;
import static net.yacy.kelondro.util.FileUtils.deletedelete;
import net.yacy.kelondro.util.FileUtils;
import org.junit.Test;
import static org.junit.Assert.*;
public class HostBalancerTest {
final File queuesRoot = new File("test/DATA/INDEX/QUEUES");
final File datadir = new File("test/DATA");
private static final boolean EXCEED_134217727 = true;
private static final int ON_DEMAND_LIMIT = 1000;
/**
* Test of reopen existing HostBalancer cache to test/demonstrate issue with
* HostQueue for file: protocol
*/
@Test
public void testReopen() throws IOException, SpaceExceededException, InterruptedException {
boolean exceed134217727 = true;
int onDemandLimit = 1000;
String hostDir = "C:\\filedirectory";
// prepare one urls for push test
String urlstr = "file:///" + hostDir;
DigestURL url = new DigestURL(urlstr);
Request req = new Request(url, null);
deletedelete(queuesRoot); // start clean test
FileUtils.deletedelete(queuesRoot); // start clean test
HostBalancer hb = new HostBalancer(queuesRoot, onDemandLimit, exceed134217727);
Thread.sleep(100); // wait for file operation
HostBalancer hb = new HostBalancer(queuesRoot, ON_DEMAND_LIMIT, EXCEED_134217727, false);
hb.clear();
Thread.sleep(100);
@@ -58,8 +59,7 @@ public void testReopen() throws IOException, SpaceExceededException, Interrupted
Thread.sleep(200); // wait a bit for file operation
hb = new HostBalancer(queuesRoot, onDemandLimit, exceed134217727); // reopen balancer
Thread.sleep(200); // wait a bit for file operation
hb = new HostBalancer(queuesRoot, ON_DEMAND_LIMIT, EXCEED_134217727, false); // reopen balancer
assertEquals("size after reopen (with one existing url)", 1, hb.size()); // expect size=1 from previous push
assertTrue("check existance of pushed url", hb.has(url.hash())); // check url exists (it fails as after reopen internal queue.hosthash is wrong)

0 comments on commit 46b5249

Please sign in to comment.