Skip to content

Commit

Permalink
Merge pull request #1 from vivianluo/vivianluo-patch-1
Browse files Browse the repository at this point in the history
Update scraper.py
  • Loading branch information
vivianluo committed May 6, 2016
2 parents 25dcd78 + 1b0354f commit fa8ab8b
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions scraper.py
Expand Up @@ -6,18 +6,18 @@
import urllib2, lxml.etree
#URL
url = 'https://drive.google.com/open?id=0B90m0udbKK0cMWpOTEZ2VGNDX1k'
pdfdata = urllib2.urlopen(url).read()
xmldata = scraperwiki.pdftoxml(pdfdata)
root = lxml.etree.fromstring(xmldata)
pdfdata = urllib2.urlopen(url).read()
xmldata = scraperwiki.pdftoxml(pdfdata)
root = lxml.etree.fromstring(xmldata)
# # Read in a page
# html = scraperwiki.scrape("http://foo.com")
#
# # Find something on the page using css selectors
# root = lxml.html.fromstring(html)
# root.cssselect("div[align='left']")
root = lxml.html.fromstring(html)
root.cssselect("div[align='left']")
#
# # Write out to the sqlite database using scraperwiki library
# scraperwiki.sqlite.save(unique_keys=['name'], data={"name": "susan", "occupation": "software developer"})
scraperwiki.sqlite.save(unique_keys=['name'], data={"name": "susan", "occupation": "software developer"})
#
# # An arbitrary query against the database
# scraperwiki.sql.select("* from data where 'name'='peter'")
Expand Down

0 comments on commit fa8ab8b

Please sign in to comment.