Skip to content

Commit

Permalink
* Robots.txt: don't interpret Crawl-Delays for other robots
Browse files Browse the repository at this point in the history
  • Loading branch information
f1ori committed Dec 18, 2008
1 parent 243e73f commit 0881190
Showing 1 changed file with 12 additions and 9 deletions.
21 changes: 12 additions & 9 deletions source/de/anomic/crawler/robotsParser.java
Expand Up @@ -147,15 +147,18 @@ private void parse(final BufferedReader reader) {
if (isRule4YaCyAgent) rule4YaCyFound = true;
}
} else if (lineUpper.startsWith(ROBOTS_CRAWL_DELAY)) {
pos = line.indexOf(" ");
if (pos != -1) {
try {
// the crawl delay can be a float number and means number of seconds
crawlDelayMillis = (long) (1000.0 * Float.parseFloat(line.substring(pos).trim()));
} catch (final NumberFormatException e) {
// invalid crawling delay
}
}
inBlock = true;
if (isRule4YaCyAgent || isRule4AllAgents) {
pos = line.indexOf(" ");
if (pos != -1) {
try {
// the crawl delay can be a float number and means number of seconds
crawlDelayMillis = (long) (1000.0 * Float.parseFloat(line.substring(pos).trim()));
} catch (final NumberFormatException e) {
// invalid crawling delay
}
}
}
} else if (lineUpper.startsWith(ROBOTS_DISALLOW) ||
lineUpper.startsWith(ROBOTS_ALLOW)) {
inBlock = true;
Expand Down

0 comments on commit 0881190

Please sign in to comment.