Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

urls test

  • Loading branch information...
commit ab417659bd8705661fc2eb6563e12ee4a21fab8c 1 parent b493ff5
@utsavsabharwal authored
View
44 sock/crawler.py
@@ -1,36 +1,53 @@
import socket
+import select
+import time
from urlparse import urlparse
-
class abc:
def __init__(self):
- urls = ['http://docs.python.org/release/3.1.3/library/urllib.parse.html']
+ self.now = time.time()
+ urls = open("urls").readlines()
+ #urls = ['http://docs.python.org/release/3.1.3/library/urllib.parse.html', 'http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35']
+ self.content = ''
self.run(urls)
+
def create_socket(self):
- return socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ return sock
def remove_socket(self, sock):
sock.close()
del sock
def run(self, urls):
+ socks = []
for url in urls:
scheme, self.host, path, params, query, fragment = urlparse(url)
self.uri = url[url.find(self.host)+len(self.host):]
- self.send_request(self.create_socket())
+ socks.append(self.send_request())
+ while 1:
+ rlist, wlist, elist = select.select(socks, [], [], 50)
+ for sock in rlist:
+ content = sock.recv(1024)
+ print time.time() - self.now, len(content)
+ if(len(content)==0):
+ break
+
+
- def send_request(self, sock):
+ def send_request(self, ):
+ sock = self.create_socket()
+ sock.connect((self.host, 80))
+ sock.setblocking(0)
l = ''
- print self.uri, self.host
line1 = "GET %s HTTP/1.1"%(self.uri)
line2 = "Host: %s"%(self.host)
line3 = "Connection: close"
for line in (line1, line2, line3):
- print "--", line
- l+= (line + "\r\n")
- sock.send(l)
+ sock.send(line+"\r\n")
sock.send("\r\n")
-
+ return sock
+
a = abc()
@@ -52,9 +69,4 @@ def send_request(self, sock):
sock.send(line + "\r\n")
sock.send("\r\n")
-while True:
- content = sock.recv(1024)
- if content:
- print content
- else:
- break'''
+'''
View
46 sock/crawler.py~
@@ -1,36 +1,53 @@
import socket
+import select
+import time
from urlparse import urlparse
-
class abc:
def __init__(self):
- urls = ['http://docs.python.org/release/3.1.3/library/urllib.parse.html']
+ self.now = time.time()
+ urls = open("urls").readlines()
+ #urls = ['http://docs.python.org/release/3.1.3/library/urllib.parse.html', 'http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35']
+ self.content = ''
self.run(urls)
+
def create_socket(self):
- return socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ return sock
def remove_socket(self, sock):
sock.close()
del sock
def run(self, urls):
+ socks = []
for url in urls:
scheme, self.host, path, params, query, fragment = urlparse(url)
self.uri = url[url.find(self.host)+len(self.host):]
- self.send_request(self.create_socket())
+ socks.append(self.send_request())
+ while 1:
+ rlist, wlist, elist = select.select(socks, [], [], 50)
+ for sock in rlist:
+ content = sock.recv(1024)
+ print time.time() - self.now, len(content)
+ if(len(content)==0):
+ break
+
+
- def send_request(self, sock):
- l = []
- print self.uri, self.host
+ def send_request(self, ):
+ sock = self.create_socket()
+ sock.connect((self.host, 80))
+ sock.setblocking(0)
+ l = ''
line1 = "GET %s HTTP/1.1"%(self.uri)
line2 = "Host: %s"%(self.host)
line3 = "Connection: close"
for line in (line1, line2, line3):
- print "--", line
- l+= (line + "\r\n")
- sock.send(l)
+ sock.send(line+"\r\n")
sock.send("\r\n")
-
+ return sock
+
a = abc()
@@ -52,9 +69,4 @@ for line in (
sock.send(line + "\r\n")
sock.send("\r\n")
-while True:
- content = sock.recv(1024)
- if content:
- print content
- else:
- break'''
+'''
View
13 sock/test.py
@@ -0,0 +1,13 @@
+import socket
+sock = socket.socket()
+sock.connect(('docs.python.org', 80))
+for line in (
+ "GET /release/3.1.3/library/urllib.parse.html HTTP/1.1",
+ "Host: docs.python.org",
+ "Connection: close",
+):
+ print line
+ sock.send(line + "\r\n")
+sock.send("\r\n")
+
+
View
12 sock/test.py~
@@ -0,0 +1,12 @@
+import socket
+sock = socket.socket()
+sock.connect(('docs.python.org', 80))
+for line in (
+ "GET /release/3.1.3/library/urllib.parse.html HTTP/1.1",
+ "Host: docs.python.org",
+ "Connection: close",
+):
+ sock.send(line + "\r\n")
+sock.send("\r\n")
+
+
View
30 sock/urls
@@ -0,0 +1,30 @@
+http://www.taylorgifts.com/item/MICRO_FIBER_PAD_REFILLS/27970B?src=FINDGIFT
+http://www.barbecues.com/web/catalog/product_detail.aspx?pid=63542&cm_cat=Accents&cm_pla=Pentair&cm_ite=Pentair-Pool+Products-63542
+http://compare.ebay.com/like/120718528535?_lwgsi=y&ltyp=AllFixedPriceItemTypes&var=sbar
+http://www.lightinthebox.com/Trumpet---Mermaid-Strapless-Court-Train-Taffeta-Wedding-Dress--WSM0480-_p66410.html
+http://tracking.searchmarketing.com/click.asp?aid=688160467
+http://www.buy.com/retail/product.asp?sku=223010930&listingid=148009155
+http://www.clickinks.com/toner-cartridges/canon/canon-0384b003aa-black-remanufactured-toner-cartridge-%28gpr18%29/3399/2p-n-014/?pgid=8449ff20-72de-4233-b817-37a1f4bcda5f
+http://www.amazon.com/BlenderBottle-28-Ounce-with-BlenderBall-Green/product-reviews/B0018G3KCK/ref=cm_cr_pr_top_link_next_9/178-4096857-3004021?ie=UTF8&showViewpoints=0&pageNumber=9
+http://www.lgeoo.biz/servlet/the-50842/32IN-AQUOS%2C-720P-HDTV%2C/Detail
+http://www.knockknock.biz/catalog/categories/pads/kk-pads/all-out-of-red-pad/
+http://www.inkers.biz/brtncotofrgr.html?productid=brtncotofrgr&channelid=NEXTA
+http://www.hightechmall.biz/store/customer/product.php?productid=35992&cat=312&page=1
+http://www.harlanservices.biz/servlet/the-11/Lenovo-ThinkPad-T520-15.6%22/Detail
+http://www.hairtools.biz/ProductDetails.asp?ProductCode=21500
+http://www.filters4less.biz/ak-60433.html
+http://www.farmerdave.biz/servlet/the-167/Grapevine-Wreath-FarmerDave/Detail
+http://www.ecoppolicesupply.biz/servlet/the-204142/Streamlight-85010-LED-Scorpion/Detail
+http://www.distributedenergy.biz/cart/shopexd.asp?id=413&bc=no
+http://www.danemax.biz/ProductDetails.asp?ProductCode=HEWQ5421A&Click=1075
+http://www.coveralls.biz/store/MCCC-SH
+http://www.boundarywaters.biz/isclhykitsy.html
+http://store.bestsports.biz/psr717.html
+http://beautyexpress.biz/product_info.php?products_id=852
+http://www.avalive.biz/AJA/KONA-LHE/17765/productDetail.php
+http://www.airwater.biz/soleus_sg_wac_25hce_air_conditioner_9929_prd1.htm
+http://store.1trading.biz/yhst-1884191163917/3siblglgrva.html
+http://store.1trading.biz/yhst-1884191163917/3siblglgrva.html
+http://www.molecule.asia/dry-hydrogen-cargo-pants/?cl=c11&utm_source=nextag&utm_medium=cpc&utm_campaign=Dry+Hydrogen+Cargo+Pants
+http://www.restaurantsupplypro.com/product/cambro-upcs400/pan-carriers
+http://66.70.17.113/go.asp?ic=9960&source=nextag
View
11 sock/urls~
@@ -0,0 +1,11 @@
+http://www.taylorgifts.com/item/MICRO_FIBER_PAD_REFILLS/27970B?src=FINDGIFT
+http://www.barbecues.com/web/catalog/product_detail.aspx?pid=63542&cm_cat=Accents&cm_pla=Pentair&cm_ite=Pentair-Pool+Products-63542
+http://compare.ebay.com/like/120718528535?_lwgsi=y&ltyp=AllFixedPriceItemTypes&var=sbar
+http://www.lightinthebox.com/Trumpet---Mermaid-Strapless-Court-Train-Taffeta-Wedding-Dress--WSM0480-_p66410.html
+http://tracking.searchmarketing.com/click.asp?aid=688160467
+http://www.buy.com/retail/product.asp?sku=223010930&listingid=148009155
+http://www.clickinks.com/toner-cartridges/canon/canon-0384b003aa-black-remanufactured-toner-cartridge-%28gpr18%29/3399/2p-n-014/?pgid=8449ff20-72de-4233-b817-37a1f4bcda5f
+http://www.amazon.com/BlenderBottle-28-Ounce-with-BlenderBall-Green/product-reviews/B0018G3KCK/ref=cm_cr_pr_top_link_next_9/178-4096857-3004021?ie=UTF8&showViewpoints=0&pageNumber=9
+http://www.lgeoo.biz/servlet/the-50842/32IN-AQUOS%2C-720P-HDTV%2C/Detail
+http://www.knockknock.biz/catalog/categories/pads/kk-pads/all-out-of-red-pad/
+http://www.inkers.biz/brtncotofrgr.html?productid=brtncotofrgr&channelid=NEXTA
Please sign in to comment.
Something went wrong with that request. Please try again.