-
Notifications
You must be signed in to change notification settings - Fork 0
/
Fetcher1.py
65 lines (50 loc) · 1.3 KB
/
Fetcher1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#! /usr/bin/python
## -*- encoding: utf-8 -*-
from threading import Thread, Lock
from Queue import Queue
from urllib2 import urlopen
class Fetcher:
def __init__(self, threads=5):
self.threads = threads
self.lock = Lock()
self.q_req = Queue()
self.q_ans = Queue()
for i in range(self.threads):
t = Thread(target=self.threadget)
t.setDaemon(True)
t.start()
self.running = 0
def __del__(self):
print "Fetcher delete"
self.q_req.join()
self.q_ans.join()
def threadget(self):
while True:
req = self.q_req.get()
with self.lock:
self.running += 1
try:
content = urlopen(req).read()
except:
content = ''
finally:
self.q_ans.put((req, content))
with self.lock:
self.running -= 1
def push(self, req):
return self.q_req.put(req)
def pop(self):
return self.q_ans.get()
def taskleft(self):
return self.q_req.qsize() + self.q_ans.qsize() + self.running
def test():
f = Fetcher(2)
reqs = ["http://www.baidu.com", "http://www.sina.com", "http://www.bing.com"]#, "http://weibo.com/u/2806519004/home?wvr=5&lf=reg"]
for req in reqs:
f.push(req)
while f.taskleft():
ans = f.pop()
print "Url: %s" % (ans[0])
print "Content: %s" % (ans[1])
if __name__ == '__main__':
test()