Skip to content

Commit

Permalink
*) some minor changes for better code readability
Browse files Browse the repository at this point in the history
*) added more SVN properties

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6787 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
low012 committed Apr 5, 2010
1 parent 7a3c198 commit b97ad0f
Show file tree
Hide file tree
Showing 8 changed files with 34 additions and 35 deletions.
2 changes: 2 additions & 0 deletions source/de/anomic/crawler/CrawlProfile.java
Expand Up @@ -85,6 +85,7 @@ public void close() {
this.profileTable = null;
}

@Override
public void finalize() {
this.close();
}
Expand Down Expand Up @@ -319,6 +320,7 @@ public entry(final String name, final DigestURI startURL,
doms = new ConcurrentHashMap<String, DomProfile>();
}

@Override
public String toString() {
final StringBuilder str = new StringBuilder();

Expand Down
1 change: 1 addition & 0 deletions source/de/anomic/crawler/CrawlQueues.java
Expand Up @@ -535,6 +535,7 @@ public long age() {
return System.currentTimeMillis() - start;
}

@Override
public void run() {
try {
// checking robots.txt for http(s) resources
Expand Down
6 changes: 3 additions & 3 deletions source/de/anomic/crawler/CrawlSwitchboard.java
Expand Up @@ -4,9 +4,9 @@
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2009-05-28 01:51:34 +0200 (Do, 28 Mai 2009) $
// $LastChangedRevision: 5988 $
// $LastChangedBy: orbiter $
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
Expand Down
18 changes: 8 additions & 10 deletions source/de/anomic/crawler/ImporterManager.java
Expand Up @@ -30,7 +30,7 @@ public Importer[] getRunningImporter() {
final Thread[] importThreads = new Thread[this.runningJobs.activeCount()*2];
final int activeCount = this.runningJobs.enumerate(importThreads);
final Importer[] importers = new Importer[activeCount];
for (int i=0; i<activeCount; i++) {
for (int i = 0; i < activeCount; i++) {
importers[i] = (Importer) importThreads[i];
}
return importers;
Expand All @@ -43,10 +43,9 @@ public Importer[] getFinishedImporter() {
public Importer getImporterByID(final int jobID) {

final Thread[] importThreads = new Thread[this.runningJobs.activeCount()*2];
final int activeCount = this.runningJobs.enumerate(importThreads);

for (int i=0; i < activeCount; i++) {
final Importer currThread = (Importer) importThreads[i];

for(final Thread importThread : importThreads) {
final Importer currThread = (Importer) importThread;
if (currThread.getJobID() == jobID) {
return currThread;
}
Expand All @@ -73,8 +72,7 @@ public void close() {
try {
// trying to gracefull stop all still running sessions ...
log.logInfo("Signaling shutdown to " + threadCount + " remaining dbImporter threads ...");
for ( int currentThreadIdx = 0; currentThreadIdx < threadCount; currentThreadIdx++ ) {
final Thread currentThread = threadList[currentThreadIdx];
for (final Thread currentThread : threadList) {
if (currentThread.isAlive()) {
((Importer)currentThread).stopIt();
}
Expand All @@ -89,10 +87,10 @@ public void close() {

// we need to use a timeout here because of missing interruptable session threads ...
if (log.isFine()) log.logFine("Waiting for " + runningJobs.activeCount() + " remaining dbImporter threads to finish shutdown ...");
for ( int currentThreadIdx = 0; currentThreadIdx < threadCount; currentThreadIdx++ ) {
final Thread currentThread = threadList[currentThreadIdx];
int currentThreadIdx = 0;
for (final Thread currentThread : threadList) {
if (currentThread.isAlive()) {
if (log.isFine()) log.logFine("Waiting for dbImporter thread '" + currentThread.getName() + "' [" + currentThreadIdx + "] to finish shutdown.");
if (log.isFine()) log.logFine("Waiting for dbImporter thread '" + currentThread.getName() + "' [" + currentThreadIdx++ + "] to finish shutdown.");
try { currentThread.join(500); } catch (final InterruptedException ex) {}
}
}
Expand Down
6 changes: 3 additions & 3 deletions source/de/anomic/crawler/ResultImages.java
Expand Up @@ -4,9 +4,9 @@
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
Expand Down
6 changes: 3 additions & 3 deletions source/de/anomic/crawler/ResultURLs.java
Expand Up @@ -5,9 +5,9 @@
// first published on http://yacy.net
// Frankfurt, Germany, 2004
//
// $LastChangedDate: 2008-03-16 23:31:54 +0100 (So, 16 Mrz 2008) $
// $LastChangedRevision: 4575 $
// $LastChangedBy: orbiter $
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
Expand Down
24 changes: 11 additions & 13 deletions source/de/anomic/crawler/RobotsEntry.java
Expand Up @@ -28,12 +28,11 @@

package de.anomic.crawler;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

import net.yacy.kelondro.data.meta.DigestURI;
Expand All @@ -53,7 +52,7 @@ public class RobotsEntry {

// this is a simple record structure that holds all properties of a single crawl start
private Map<String, byte[]> mem;
private LinkedList<String> allowPathList, denyPathList;
private List<String> allowPathList, denyPathList;
String hostName;

public RobotsEntry(final String hostName, final Map<String, byte[]> mem) {
Expand Down Expand Up @@ -88,8 +87,8 @@ public RobotsEntry(final String hostName, final Map<String, byte[]> mem) {

public RobotsEntry(
final DigestURI theURL,
final ArrayList<String> allowPathList,
final ArrayList<String> disallowPathList,
final List<String> allowPathList,
final List<String> disallowPathList,
final Date loadedDate,
final Date modDate,
final String eTag,
Expand All @@ -114,8 +113,8 @@ public RobotsEntry(
this.allowPathList.addAll(allowPathList);

final StringBuilder pathListStr = new StringBuilder(allowPathList.size() * 30);
for (int i=0; i<allowPathList.size();i++) {
pathListStr.append(allowPathList.get(i))
for (String element : allowPathList) {
pathListStr.append(element)
.append(ROBOTS_DB_PATH_SEPARATOR);
}
this.mem.put(ALLOW_PATH_LIST, pathListStr.substring(0,pathListStr.length()-1).getBytes());
Expand All @@ -125,8 +124,8 @@ public RobotsEntry(
this.denyPathList.addAll(disallowPathList);

final StringBuilder pathListStr = new StringBuilder(disallowPathList.size() * 30);
for (int i=0; i<disallowPathList.size();i++) {
pathListStr.append(disallowPathList.get(i))
for (String element : disallowPathList) {
pathListStr.append(element)
.append(ROBOTS_DB_PATH_SEPARATOR);
}
this.mem.put(DISALLOW_PATH_LIST,pathListStr.substring(0, pathListStr.length()-1).getBytes());
Expand All @@ -138,6 +137,7 @@ public Map<String, byte[]> getMem() {
return this.mem;
}

@Override
public String toString() {
final StringBuilder str = new StringBuilder(6000);
str.append((this.hostName == null) ? "null" : this.hostName).append(": ");
Expand Down Expand Up @@ -198,12 +198,10 @@ public boolean isDisallowed(String path) {
// escaping all occurences of ; because this char is used as special char in the Robots DB
else path = path.replaceAll(ROBOTS_DB_PATH_SEPARATOR,"%3B");

final Iterator<String> pathIter = this.denyPathList.iterator();
while (pathIter.hasNext()) {
final String nextPath = pathIter.next();
for (String element : this.denyPathList) {

// disallow rule
if (path.startsWith(nextPath)) {
if (path.startsWith(element)) {
return true;
}
}
Expand Down
6 changes: 3 additions & 3 deletions source/de/anomic/crawler/ZURL.java
Expand Up @@ -4,9 +4,9 @@
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
Expand Down

0 comments on commit b97ad0f

Please sign in to comment.