Skip to content

Commit

Permalink
works on my machine[tm]
Browse files Browse the repository at this point in the history
  • Loading branch information
tubaman committed Jul 18, 2014
1 parent d36bf5f commit bd05b42
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 23 deletions.
4 changes: 3 additions & 1 deletion .gitignore
@@ -1,2 +1,4 @@
# Ignore output of scraper
data.sqlite
*.pyc
*.swp
*.sqlite
3 changes: 2 additions & 1 deletion README.md
@@ -1 +1,2 @@
This is a scraper that runs on [Morph](https://morph.io). To get started [see the documentation](https://morph.io/documentation)
This is a scraper that runs on [Morph](https://morph.io). It reads the current
water level of Lake Travis in Austin, TX USA
8 changes: 8 additions & 0 deletions requirements.txt
@@ -0,0 +1,8 @@
beautifulsoup4==4.3.2
distribute==0.6.24
dumptruck==0.1.6
lxml==3.3.5
python-dateutil==2.2
requests==2.3.0
scraperwiki==0.3.11
six==1.7.3
47 changes: 26 additions & 21 deletions scraper.py
@@ -1,23 +1,28 @@
# This is a template for a Python scraper on Morph (https://morph.io)
# including some code snippets below that you should find helpful
"""Get the current lake level for Lake Travis"""

# import scraperwiki
# import lxml.html
#
# # Read in a page
# html = scraperwiki.scrape("http://foo.com")
#
# # Find something on the page using css selectors
# root = lxml.html.fromstring(html)
# root.cssselect("div[align='left']")
#
# # Write out to the sqlite database using scraperwiki library
# scraperwiki.sqlite.save(unique_keys=['name'], data={"name": "susan", "occupation": "software developer"})
#
# # An arbitrary query against the database
# scraperwiki.sql.select("* from data where 'name'='peter'")
import re

# You don't have to do things with the ScraperWiki and lxml libraries. You can use whatever libraries are installed
# on Morph for Python (https://github.com/openaustralia/morph-docker-python/blob/master/pip_requirements.txt) and all that matters
# is that your final data is written to an Sqlite database called data.sqlite in the current working directory which
# has at least a table called data.
import scraperwiki
from bs4 import BeautifulSoup
from dateutil.parser import parse as date_parse

html = scraperwiki.scrape("http://travis.uslakes.info/Level.asp")

soup = BeautifulSoup(html)
level_label = soup.find(text="Water Level")
td = level_label.parent.parent.parent

level = float(td.find('font', attrs={'color': 'Green'}).strong.text)
unit = td.findAll('font')[2].strong.text
date = td.findAll('font')[3].text
time = td.findAll('font')[4].text.strip()
timestamp = date_parse(u"%s %s" % (date, time))

full_text_re = re.compile("below full pool of (.*)")
full_text = td.find(text=full_text_re)
full_level = float(full_text_re.match(full_text).group(1))

scraperwiki.sqlite.save(
unique_keys=['timestamp'],
data={"timestamp": timestamp, "level": level, "unit": unit}
)

0 comments on commit bd05b42

Please sign in to comment.