Skip to content

Commit

Permalink
Fork of code from ScraperWiki at https://classic.scraperwiki.com/scra…
Browse files Browse the repository at this point in the history
  • Loading branch information
webster committed Jan 18, 2016
0 parents commit 1d415f4
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
@@ -0,0 +1,2 @@
# Ignore output of scraper
data.sqlite
31 changes: 31 additions & 0 deletions scraper.py
@@ -0,0 +1,31 @@
import scraperwiki
import lxml.html
import urllib2
import dateutil.parser as parser

url = "http://www.mnd.uscourts.gov/ncs/open_cases_report.html"
html = urllib2.urlopen(url).read()
print "The HTML file is %d bytes" % len(html)
print html


import lxml.html
root = lxml.html.fromstring(html)
for tr in root.cssselect("tr"):
tds = tr.cssselect("td")
if len(tds)==7:

casedate = tds[0].text_content() # @TODO parse to ISO 8601

data = {
'filing_date' : casedate,
'time_reported' : tds[1].text_content(),
'case_number' : tds[2].text_content(),
'case_title' : tds[3].text_content(),
'case_type' : tds[4].text_content(),
'judge' : tds[5].text_content(),
'magistrate' : tds[6].text_content()
}
if data['case_number'] != "Case #":
scraperwiki.sqlite.save(unique_keys=['case_number'], data=data)

0 comments on commit 1d415f4

Please sign in to comment.