Permalink
Browse files

Merge pull request #2 from buddylindsey/master

Added data store
  • Loading branch information...
buddylindsey committed Jan 29, 2012
2 parents cccdccd + 69d3365 commit ac9a50fb23b335a3ad86739b3a5013a35d3b8c32
Showing with 120 additions and 42 deletions.
  1. +1 −0 {tulsamugs → }/Gemfile
  2. +16 −0 Gemfile.lock
  3. +0 −10 tulsamugs/Gemfile.lock
  4. +49 −0 tulsamugs/couch.rb
  5. +31 −0 tulsamugs/person.rb
  6. +13 −32 tulsamugs/scraper.rb
  7. +10 −0 tulsamugs/store.rb
@@ -1,3 +1,4 @@
source 'http://rubygems.org'
gem 'nokogiri'
+gem 'uuid'
View
@@ -0,0 +1,16 @@
+GEM
+ remote: http://rubygems.org/
+ specs:
+ macaddr (1.5.0)
+ systemu (>= 2.4.0)
+ nokogiri (1.5.0)
+ systemu (2.4.2)
+ uuid (2.3.5)
+ macaddr (~> 1.0)
+
+PLATFORMS
+ ruby
+
+DEPENDENCIES
+ nokogiri
+ uuid
View
@@ -1,10 +0,0 @@
-GEM
- remote: http://rubygems.org/
- specs:
- nokogiri (1.5.0)
-
-PLATFORMS
- ruby
-
-DEPENDENCIES
- nokogiri
View
@@ -0,0 +1,49 @@
+require 'net/http'
+
+module Couch
+
+ class Server
+ def initialize(host, port, options = nil)
+ @host = host
+ @port = port
+ @options = options
+ end
+
+ def delete(uri)
+ request(Net::HTTP::Delete.new(uri))
+ end
+
+ def get(uri)
+ request(Net::HTTP::Get.new(uri))
+ end
+
+ def put(uri, json)
+ req = Net::HTTP::Put.new(uri)
+ req["content-type"] = "application/json"
+ req.body = json
+ request(req)
+ end
+
+ def post(uri, json)
+ req = Net::HTTP::Post.new(uri)
+ req["content-type"] = "application/json"
+ req.body = json
+ request(req)
+ end
+
+ def request(req)
+ res = Net::HTTP.start(@host, @port) { |http|http.request(req) }
+ unless res.kind_of?(Net::HTTPSuccess)
+ handle_error(req, res)
+ end
+ res
+ end
+
+ private
+
+ def handle_error(req, res)
+ e = RuntimeError.new("#{res.code}:#{res.message}\nMETHOD:#{req.method}\nURI:#{req.path}\n#{res.body}")
+ raise e
+ end
+ end
+end
View
@@ -0,0 +1,31 @@
+class Person
+ attr_accessor :name, :age, :city, :date, :charges
+
+ def initialize(person)
+ the_person = person.split(':')
+ first_chunk(the_person.first)
+ self.date = second_chunk(the_person[1])
+ self.charges = the_person.last.strip
+ end
+
+ def first_chunk(stuff)
+
+ self.city = stuff.split(',').last.gsub('ARRESTED', '').strip
+ data = []
+ stuff.split(',').first.each_line do |line|
+ data << line
+ end
+
+ self.name = data.first.strip
+ self.age = data.last.strip
+ end
+
+ def second_chunk(date)
+ return date.gsub('CHARGES','')
+ end
+
+ def to_json
+ {'name' => self.name, 'age' => self.age, 'city' => self.city, 'date' => self.date, 'charges' => self.charges}.to_json
+ end
+end
+
View
@@ -1,43 +1,24 @@
require 'nokogiri'
require 'open-uri'
+require 'json'
+require 'uuid'
-URL = "http://tulsamugs.com/"
-
-class Person
- attr_accessor :name, :age, :city, :date, :charges
-
- def initialize(person)
- the_person = person.split(':')
- first_chunk(the_person.first)
- self.date = second_chunk(the_person[1])
- self.charges = the_person.last.strip
- end
-
- def first_chunk(stuff)
-
- self.city = stuff.split(',').last.gsub('ARRESTED', '').strip
- data = []
- stuff.split(',').first.each_line do |line|
- data << line
- end
+require_relative 'person'
+require_relative 'store'
- self.name = data.first.strip
- self.age = data.last.strip
- end
-
- def second_chunk(date)
- return date.gsub('CHARGES','')
- end
-end
-
-@first_run = true
+URL = "http://tulsamugs.com/"
+@first_run = false
+@uuid = UUID.new
def run_scrape(url="")
@doc = Nokogiri::HTML(open("#{URL}/#{url}"))
@people = []
@doc.xpath("//div[@class='picture']").each do |person|
- @people << person.content
+ if(person.content != "")
+ @people << person.content
+ end
+ puts person.content
end
@final_people = []
@@ -46,10 +27,10 @@ def run_scrape(url="")
end
@final_people.each do |p|
- puts "#{p.name} (#{p.age}) - #{p.charges}"
+ couch_store(@uuid.generate, p.to_json)
end
- sleep 1
+ sleep 2
if(@first_run)
@first_run = false
run_scrape(@doc.xpath("//div[@class='wrapper']//h2//font//a")[0].to_s.split('"')[1])
View
@@ -0,0 +1,10 @@
+require_relative 'couch.rb'
+require_relative 'person.rb'
+
+SERVER = "localhost"
+PORT = "5984"
+
+def couch_store(key, json)
+ server = Couch::Server.new(SERVER, PORT)
+ server.put("/tulsamugs/#{key}", json)
+end

0 comments on commit ac9a50f

Please sign in to comment.