Skip to content

Commit

Permalink
Create scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
walinchus committed Jun 14, 2017
1 parent 6eb05a3 commit 9fa9555
Showing 1 changed file with 6 additions and 7 deletions.
13 changes: 6 additions & 7 deletions scraper.py
Expand Up @@ -115,11 +115,10 @@ def scrape_and_look_for_next_link(url):
root = lxml.html.fromstring(html)
scrape_table(root)
global i
for i in range(1, 10):
i = (i + 1)
next_url = base_url+'GetCaseInformation.aspx?db=garfield&number=CF-2011-'+str(i)
print next_url
scrape_and_look_for_next_link(next_url)
i = (i + 1)
next_url = base_url+'GetCaseInformation.aspx?db=garfield&number=CF-2011-'+str(i)
print next_url
scrape_and_look_for_next_link(next_url)

# ---------------------------------------------------------------------------
# START HERE: define your starting URL - then
Expand All @@ -129,9 +128,9 @@ def scrape_and_look_for_next_link(url):
starting_url = urlparse.urljoin(base_url, 'GetCaseInformation.aspx?db=garfield&number=CF-2011-1')
print starting_url
global i
#for i in range(1,10):
for i in range(1,10):
#There are 743 cases but 468 appears to be the server request limit
scrape_and_look_for_next_link(starting_url)
scrape_and_look_for_next_link(starting_url)


# # Read in a page
Expand Down

0 comments on commit 9fa9555

Please sign in to comment.