Skip to content

Commit

Permalink
avoid ConcurrentModificationException in plasmaCrawlerQueues
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4579 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
danielr committed Mar 17, 2008
1 parent 9d0af17 commit 7008a21
Showing 1 changed file with 10 additions and 18 deletions.
28 changes: 10 additions & 18 deletions source/de/anomic/plasma/crawler/plasmaCrawlQueues.java
Expand Up @@ -31,11 +31,10 @@
import java.net.MalformedURLException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

import de.anomic.data.robotsParser;
import de.anomic.kelondro.kelondroFlexWidthArray;
Expand Down Expand Up @@ -68,7 +67,7 @@ public class plasmaCrawlQueues {
public plasmaCrawlQueues(plasmaSwitchboard sb, File plasmaPath) {
this.sb = sb;
this.log = new serverLog("CRAWLER");
this.workers = Collections.synchronizedMap(new HashMap<Integer, crawlWorker>());
this.workers = new ConcurrentHashMap<Integer, crawlWorker>();
this.loader = new plasmaProtocolLoader(sb, log);
this.remoteCrawlProviderHashes = new ArrayList<String>();

Expand All @@ -93,8 +92,9 @@ public String urlExists(String hash) {
if (noticeURL.existsInStack(hash)) return "crawler";
if (delegatedURL.exists(hash)) return "delegated";
if (errorURL.exists(hash)) return "errors";
Iterator<crawlWorker> i = workers.values().iterator();
while (i.hasNext()) if (i.next().entry.url().hash().equals(hash)) return "worker";
for (crawlWorker worker: workers.values()) {
if (worker.entry.url().hash().equals(hash)) return "worker";
}
return null;
}

Expand All @@ -112,19 +112,17 @@ public yacyURL getURL(String urlhash) {
if (ee != null) return ee.url();
ee = errorURL.getEntry(urlhash);
if (ee != null) return ee.url();
Iterator<crawlWorker> i = workers.values().iterator();
crawlWorker w;
while (i.hasNext()) {
w = i.next();
for (crawlWorker w: workers.values()) {
if (w.entry.url().hash().equals(urlhash)) return w.entry.url();
}
return null;
}

public void close() {
// wait for all workers to finish
Iterator<crawlWorker> i = workers.values().iterator();
while (i.hasNext()) i.next().interrupt();
for (crawlWorker w: workers.values()) {
w.interrupt();
}
// TODO: wait some more time until all threads are finished
noticeURL.close();
errorURL.close();
Expand All @@ -133,13 +131,7 @@ public void close() {

public plasmaCrawlEntry[] activeWorker() {
synchronized (workers) {
plasmaCrawlEntry[] w = new plasmaCrawlEntry[workers.size()];
int i = 0;
Iterator<crawlWorker> j = workers.values().iterator();
while (j.hasNext()) {
w[i++] = j.next().entry;
}
return w;
return workers.values().toArray(new plasmaCrawlEntry[0]);
}
}

Expand Down

0 comments on commit 7008a21

Please sign in to comment.