Skip to content

Commit

Permalink
Create scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
walinchus committed Jun 14, 2017
1 parent 343c082 commit 8ff883f
Showing 1 changed file with 19 additions and 7 deletions.
26 changes: 19 additions & 7 deletions scraper.py
Expand Up @@ -82,21 +82,33 @@ def scrape_table(root):
root = lxml.html.fromstring(html)
scrape_table(root)'''

def Add_Case_No(next_link):

case_numbers =['5237521','5244439','5237629','5237823','5234026']
55 #go through the schoolIDs list above, and for each ID...
56 for item in schoolIDs:
57 #show it in the console
58 print item
59 #create a URL called 'next_link' which adds that ID to the end of the base_url variable
60 next_link = base_url+item+'.html'
61 #pass that new concatenated URL to a function, 'scrape_page', which is scripted above
62 scrape_page(next_link)

'''def Add_Case_No(next_link):
return next_link + 1
print "the next case number is:", next_link
next_link = 'GetCaseInformation.aspx?db=garfield&number=CF-2011-' + str(next_link)
next_link = 'GetCaseInformation.aspx?db=garfield&number=CF-2011-' + str(next_link)'''

def scrape_and_look_for_next_link(url):
html = scraperwiki.scrape(url)
#print html
root = lxml.html.fromstring(html)
scrape_table(root)

if next_link < 744:
next_url = urlparse.urljoin(base_url, next_link)
print next_url
scrape_and_look_for_next_link(next_url)
for next_link in range (1, 744):
print next_link
if next_link:
next_url = base_url+'GetCaseInformation.aspx?db=garfield&number=CF-2011-'+str(next_link)
print next_url
scrape_and_look_for_next_link(next_url)

# ---------------------------------------------------------------------------
# START HERE: define your starting URL - then
Expand Down

0 comments on commit 8ff883f

Please sign in to comment.