-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Allow POST requests, and passing arbitrary opts to curl.setopt #4
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
fetcher | ||
======= | ||
|
||
pycurl wrapper | ||
concurrent pycurl wrapper |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import sys | ||
import time | ||
|
||
from fetcher import fetch | ||
|
||
|
||
def get_requests(count, url): | ||
print 'GETting %s from %s' % (count, url) | ||
return ((url, 'request-%s' % i) for i in range(count)) | ||
|
||
|
||
def post_requests(count, url): | ||
print 'POSTting %s from %s' % (count, url) | ||
return ((url, i, 'request=%s' % i) for i in range(count)) | ||
|
||
|
||
def make_requests(requests): | ||
start = time.time() | ||
for ok, resp in fetch(requests, concurrent=100): | ||
print ok, resp | ||
delta = time.time() - start | ||
print '%.02f req/s' % (count / delta) | ||
|
||
if __name__ == '__main__': | ||
count = int(sys.argv[1]) | ||
url = sys.argv[2] | ||
requests_method = post_requests if sys.argv[3:] == ['POST'] else get_requests | ||
sys.exit(make_requests(requests_method(count, url))) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,28 @@ | ||
import pycurl | ||
import sys | ||
import time | ||
|
||
from cStringIO import StringIO | ||
|
||
|
||
def fetch(requests, concurrent=50, timeout_ms=1000, follow_redirects=True): | ||
def fetch(requests, concurrent=50, timeout_ms=1000, follow_redirects=True, | ||
curlopts=None): | ||
""" | ||
requests argument is a generator with the following structure: | ||
|
||
(url, echo_field) - for GET requests | ||
(url, echo_field, post_data) - for POST requests | ||
|
||
curlopts allows arbitrary options to be passed to pycurl.setopt. It is a list | ||
of two-tuples, eg: | ||
|
||
(pycurl.HTTPHEADER, ['Content-Type', 'application/javascript']) | ||
|
||
responses: | ||
success: (True, (echo_field, server_response)) | ||
error: (False, (echo_field, error, effective_URL)) | ||
""" | ||
multi = pycurl.CurlMulti() | ||
|
||
# Sadly, we need to track of pending curls, or they'll get CG'd and | ||
# Sadly, we need to track of pending curls, or they'll get GC'd and | ||
# mysteriously disappear. Don't ask me! | ||
curls = [] | ||
num_handles = 0 | ||
|
@@ -18,26 +32,39 @@ def fetch(requests, concurrent=50, timeout_ms=1000, follow_redirects=True): | |
# If the concurrency cap hasn't been reached yet, another request can be | ||
# pulled off and added to the multi. | ||
if unscheduled_reqs and num_handles < concurrent: | ||
|
||
try: | ||
url, payload = requests.next() | ||
request = requests.next() | ||
except StopIteration: | ||
unscheduled_reqs = False | ||
continue | ||
|
||
if len(request) == 3: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this check would be a lot more readable if they were args.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. see above - each "multi" contains len(requests) actual requests, each with (presumably) different POST data. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I could add a "method" argument, but the requests arity stuff would still need to happen, so I don't see that helping any ... |
||
url, payload, post_data = request | ||
elif len(request) == 2: | ||
url, payload = request | ||
post_data = None | ||
else: | ||
raise Exception('Bad request: {}'.format(repr(request))) | ||
|
||
body = StringIO() | ||
|
||
curl = pycurl.Curl() | ||
curl.setopt(pycurl.URL, url) | ||
curl.setopt(pycurl.WRITEFUNCTION, body.write) | ||
curl.setopt(pycurl.TIMEOUT_MS, timeout_ms) | ||
curl.setopt(pycurl.CONNECTTIMEOUT_MS, timeout_ms) | ||
curl.setopt(pycurl.FOLLOWLOCATION, 1 if follow_redirects else 0) | ||
curl.setopt(pycurl.USERAGENT, 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64;' +\ | ||
' rv:21.0) Gecko/20100101 Firefox/21.0') | ||
|
||
if follow_redirects: | ||
curl.setopt(pycurl.FOLLOWLOCATION, 1) | ||
else: | ||
curl.setopt(pycurl.FOLLOWLOCATION, 0) | ||
# arbitrary options | ||
if curlopts is not None: | ||
for option, value in curlopts: | ||
curl.setopt(option, value) | ||
|
||
if post_data is not None: | ||
curl.setopt(pycurl.POSTFIELDS, post_data) | ||
|
||
curl.body = body | ||
curl.payload = payload | ||
|
@@ -72,18 +99,3 @@ def fetch(requests, concurrent=50, timeout_ms=1000, follow_redirects=True): | |
|
||
if not num_q: | ||
break | ||
|
||
def main(count, url): | ||
print 'Getting %s from %s' % (count, url) | ||
|
||
requests = ((url, 'req-%s' % i) for i in range(count)) | ||
start = time.time() | ||
for ok, resp in fetch(requests, concurrent=100): | ||
print ok, resp | ||
delta = time.time() - start | ||
print '%.02f req/s' % (count / delta) | ||
|
||
if __name__ == '__main__': | ||
count = int(sys.argv[1]) | ||
url = sys.argv[2] | ||
sys.exit(main(count, url)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
any reason why these are a tuple and not just separate args?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
because the elements can and will be different for each request. The args to fetch apply to all of the requests.