Skip to content

Commit

Permalink
Update scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
vlakos committed Aug 1, 2018
1 parent 1ba4619 commit 1d1750d
Showing 1 changed file with 9 additions and 9 deletions.
18 changes: 9 additions & 9 deletions scraper.py
Expand Up @@ -4,7 +4,7 @@
import lxml.etree

#createavariablecalled'url'andthenreadwhat'sthere
url="http://www.staffssaferroads.co.uk/media/114997/03092012_forwebsite.pdf"
url="http://www.acas.rs/wp-content/uploads/2017/12/Godisnji-plan-provere-za-2018.pdf"
pdfdata = urllib2.urlopen(url).read()
print "The pdf file has %d bytes" % len(pdfdata)

Expand All @@ -14,12 +14,12 @@
root = lxml.etree.fromstring(xmldata)

#thislineusesxpathtofind<text>tags
lines = root.findall('.//text[@font="5"]')
print lines
for line in lines:
print line.text
#lines = root.findall('.//text[@font="5"]')
#print lines
#for line in lines:
#print line.text

record = {}
for line in lines:
record["date"] = line.text
scraperwiki.sqlite.save(['date'], record)
#record = {}
#for line in lines:
#record["date"] = line.text
#scraperwiki.sqlite.save(['date'], record)

0 comments on commit 1d1750d

Please sign in to comment.