From 1d1750daa817d6dc5114e2b4d3bed0a0522dc69e Mon Sep 17 00:00:00 2001 From: vlakos Date: Wed, 1 Aug 2018 22:28:41 +0200 Subject: [PATCH] Update scraper.py --- scraper.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/scraper.py b/scraper.py index 5d65e86..e856cd9 100644 --- a/scraper.py +++ b/scraper.py @@ -4,7 +4,7 @@ import lxml.etree #createavariablecalled'url'andthenreadwhat'sthere -url="http://www.staffssaferroads.co.uk/media/114997/03092012_forwebsite.pdf" +url="http://www.acas.rs/wp-content/uploads/2017/12/Godisnji-plan-provere-za-2018.pdf" pdfdata = urllib2.urlopen(url).read() print "The pdf file has %d bytes" % len(pdfdata) @@ -14,12 +14,12 @@ root = lxml.etree.fromstring(xmldata) #thislineusesxpathtofindtags -lines = root.findall('.//text[@font="5"]') -print lines -for line in lines: - print line.text +#lines = root.findall('.//text[@font="5"]') +#print lines +#for line in lines: + #print line.text -record = {} -for line in lines: - record["date"] = line.text - scraperwiki.sqlite.save(['date'], record) +#record = {} +#for line in lines: + #record["date"] = line.text + #scraperwiki.sqlite.save(['date'], record)