Create scraper.py

walinchus · Jun 14, 2017 · 9fa9555 · 9fa9555
1 parent 6eb05a3
commit 9fa9555
Showing 1 changed file with 6 additions and 7 deletions.
diff --git a/scraper.py b/scraper.py
@@ -115,11 +115,10 @@ def scrape_and_look_for_next_link(url):
     root = lxml.html.fromstring(html)
     scrape_table(root)
     global i
-    for i in range(1, 10):
-      i = (i + 1)  
-      next_url = base_url+'GetCaseInformation.aspx?db=garfield&number=CF-2011-'+str(i)
-      print next_url
-      scrape_and_look_for_next_link(next_url)
+    i = (i + 1)  
+    next_url = base_url+'GetCaseInformation.aspx?db=garfield&number=CF-2011-'+str(i)
+    print next_url
+    scrape_and_look_for_next_link(next_url)
 
 # ---------------------------------------------------------------------------
 # START HERE: define your starting URL - then 
@@ -129,9 +128,9 @@ def scrape_and_look_for_next_link(url):
 starting_url = urlparse.urljoin(base_url, 'GetCaseInformation.aspx?db=garfield&number=CF-2011-1')
 print starting_url
 global i
-#for i in range(1,10):
+for i in range(1,10):
     #There are 743 cases but 468 appears to be the server request limit
-scrape_and_look_for_next_link(starting_url)     
+    scrape_and_look_for_next_link(starting_url)     
 
 
 # # Read in a page