Skip to content

Commit

Permalink
Create scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
walinchus committed Jun 14, 2017
1 parent f65b099 commit 2a56c7e
Showing 1 changed file with 7 additions and 6 deletions.
13 changes: 7 additions & 6 deletions scraper.py
Expand Up @@ -93,20 +93,21 @@ def scrape_table(root):
61 #pass that new concatenated URL to a function, 'scrape_page', which is scripted above
62 scrape_page(next_link)'''

'''def Add_Case_No(next_link):
def Add_Case_No(next_link):
for next_link in range (0,744):
return next_link + 1
print "the next case number is:", next_link
next_link = 'GetCaseInformation.aspx?db=garfield&number=CF-2011-' + str(next_link)'''
next_link = 'GetCaseInformation.aspx?db=garfield&number=CF-2011-' + str(next_link)

def scrape_and_look_for_next_link(url):
html = scraperwiki.scrape(url)
#print html
root = lxml.html.fromstring(html)
scrape_table(root)
for next_link in range (1, 744):
print next_link
#for next_link in range (1, 744):
#print next_link
if next_link:
next_url = base_url+'GetCaseInformation.aspx?db=garfield&number=CF-2011-'+str(next_link)
next_url = base_url+str(next_link)
print next_url
scrape_and_look_for_next_link(next_url)

Expand All @@ -117,7 +118,7 @@ def scrape_and_look_for_next_link(url):
base_url = 'http://www.oscn.net/dockets/'
starting_url = urlparse.urljoin(base_url, 'GetCaseInformation.aspx?db=garfield&number=CF-2011-1')
print starting_url
#Add_Case_No(next_link)
Add_Case_No(next_link)
scrape_and_look_for_next_link(starting_url)


Expand Down

0 comments on commit 2a56c7e

Please sign in to comment.