diff --git a/.gitignore b/.gitignore index 66d464d..0ffa75c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ # Ignore output of scraper -data.sqlite +*.pyc +*.swp +*.sqlite diff --git a/README.md b/README.md index e541894..7308850 100644 --- a/README.md +++ b/README.md @@ -1 +1,2 @@ -This is a scraper that runs on [Morph](https://morph.io). To get started [see the documentation](https://morph.io/documentation) \ No newline at end of file +This is a scraper that runs on [Morph](https://morph.io). It reads the current +water level of Lake Travis in Austin, TX USA \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..fe19313 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +beautifulsoup4==4.3.2 +distribute==0.6.24 +dumptruck==0.1.6 +lxml==3.3.5 +python-dateutil==2.2 +requests==2.3.0 +scraperwiki==0.3.11 +six==1.7.3 diff --git a/scraper.py b/scraper.py index d8a7c35..6a09fcf 100644 --- a/scraper.py +++ b/scraper.py @@ -1,23 +1,28 @@ -# This is a template for a Python scraper on Morph (https://morph.io) -# including some code snippets below that you should find helpful +"""Get the current lake level for Lake Travis""" -# import scraperwiki -# import lxml.html -# -# # Read in a page -# html = scraperwiki.scrape("http://foo.com") -# -# # Find something on the page using css selectors -# root = lxml.html.fromstring(html) -# root.cssselect("div[align='left']") -# -# # Write out to the sqlite database using scraperwiki library -# scraperwiki.sqlite.save(unique_keys=['name'], data={"name": "susan", "occupation": "software developer"}) -# -# # An arbitrary query against the database -# scraperwiki.sql.select("* from data where 'name'='peter'") +import re -# You don't have to do things with the ScraperWiki and lxml libraries. You can use whatever libraries are installed -# on Morph for Python (https://github.com/openaustralia/morph-docker-python/blob/master/pip_requirements.txt) and all that matters -# is that your final data is written to an Sqlite database called data.sqlite in the current working directory which -# has at least a table called data. +import scraperwiki +from bs4 import BeautifulSoup +from dateutil.parser import parse as date_parse + +html = scraperwiki.scrape("http://travis.uslakes.info/Level.asp") + +soup = BeautifulSoup(html) +level_label = soup.find(text="Water Level") +td = level_label.parent.parent.parent + +level = float(td.find('font', attrs={'color': 'Green'}).strong.text) +unit = td.findAll('font')[2].strong.text +date = td.findAll('font')[3].text +time = td.findAll('font')[4].text.strip() +timestamp = date_parse(u"%s %s" % (date, time)) + +full_text_re = re.compile("below full pool of (.*)") +full_text = td.find(text=full_text_re) +full_level = float(full_text_re.match(full_text).group(1)) + +scraperwiki.sqlite.save( + unique_keys=['timestamp'], + data={"timestamp": timestamp, "level": level, "unit": unit} +)