Permalink
Browse files

fix a bug that causes ZeroDivisionError

  • Loading branch information...
1 parent cdecaea commit f1b9845fd37b17051d116ef629f8dd08c9815d15 @wuzhe committed Dec 13, 2008
Showing with 7 additions and 4 deletions.
  1. +7 −4 src/crawler.py
View
@@ -46,7 +46,7 @@ class User:
('homepage', lambda x: x.link[3].href),
('description', lambda x: x.content.text))
- Sleep_Timeout_init = 2 # 2 seconds
+ Sleep_Timeout_init = 10 # 2 seconds
Sleep_Banned_init = 3600 + 5 # retry in 1 hour, douban remove ban
# after 1 hour
last_req_time = 0
@@ -66,15 +66,16 @@ def _req_api(self, what, uri):
getter = self.client.GetPeople
elif what == 'friends':
getter = self.client.GetFriends
- timeout = User.Sleep_Timeout_init
- banned = User.Sleep_Banned_init
+ # Sleep if request too fast
now = time.time()
if REQ_CONTROL and (now - User.last_req_time) < REQ_INTERVAL:
sleep_time = REQ_INTERVAL - (now - User.last_req_time)
print nowp() + " zzZ for %s seconds, to be polite" % sleep_time
time.sleep(sleep_time)
+ timeout = User.Sleep_Timeout_init
+ banned = User.Sleep_Banned_init
while True:
try:
f = getter(uri)
@@ -284,7 +285,9 @@ def save_state(conn, cursor, queue, visited):
new_reqs = user.api_req_count
req_freq = int(60.0 * new_reqs / duration) # reqs per min
visit_freq = int(3600.0 / duration) # visit per hour
- etr = int((TOTAL_USERS - len(visited)) / visit_freq) # estimated hours left
+ # estimated time remaining
+ etr = int((TOTAL_USERS - len(visited)) / visit_freq) \
+ if visit_freq != 0 else sys.maxint
# Stats printing
total_reqs += new_reqs

0 comments on commit f1b9845

Please sign in to comment.