Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

:lips: refactoring menudo to put a real user agent on every request #5

Merged
merged 2 commits into from

1 participant

@zenkalia
Owner

@mvattuone this'll make adding new scrapers a little cleaner, too

@zenkalia zenkalia merged commit 577269b into from
@zenkalia zenkalia deleted the branch
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
This page is out of date. Refresh to see the latest.
View
2  README.md
@@ -8,6 +8,7 @@
* Rickshaw Stop
* Brainwash
* New Parish
+* The Fox
## Venues to do
@@ -27,7 +28,6 @@
* Monarch
* Public Works
* Rockapulco
-* The Fox
* Freight & Salvage
* Superb events
* The Chapel
View
9 lib/menudo/base.rb
@@ -3,5 +3,14 @@ class Base
def venue
@venue ||= Venue.where(slug: self.class.to_s.underscore).first
end
+
+ def doc
+ @doc ||= Nokogiri::HTML(open(doc_url,
+ "User-Agent" => "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36" ))
+ end
+
+ def doc_url
+ ''
+ end
end
end
View
7 lib/menudo/bottom_of_the_hill.rb
@@ -1,8 +1,9 @@
class BottomOfTheHill < Menudo::Base
- def refresh
- doc = Nokogiri::HTML(open('http://www.bottomofthehill.com/calendar.html',
- "User-Agent" => "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36" ))
+ def doc_url
+ 'http://www.bottomofthehill.com/calendar.html'
+ end
+ def refresh
shows = doc.css('#listings').children.first.children
shows.each do |show|
View
6 lib/menudo/brainwash.rb
@@ -1,7 +1,9 @@
class Brainwash < Menudo::Base
- def refresh
- doc = Nokogiri::HTML(open('http://www.brainwash.com/calendar.html'))
+ def doc_url
+ 'http://www.brainwash.com/calendar.html'
+ end
+ def refresh
shows = doc.css('table').first.css('tr')
month_year = 'February 2014'
shows.each do |show|
View
5 lib/menudo/fox.rb
@@ -1,8 +1,9 @@
class Fox < Menudo::Base
+ def doc_url
+ 'http://www.thefoxoakland.com/calendar.php'
+ end
def refresh
- doc = Nokogiri::HTML(open('http://www.thefoxoakland.com/calendar.php'))
-
shows = doc.css('table.concert_calendar').css('tr')
shows.each do |show|
View
9 lib/menudo/new_parish.rb
@@ -1,14 +1,15 @@
class NewParish < Menudo::Base
- def refresh
- base_url = 'http://www.thenewparish.com/'
- doc = Nokogiri::HTML(open(base_url))
+ def doc_url
+ 'http://www.thenewparish.com/'
+ end
+ def refresh
shows = doc.css('.list-view-item')
shows.each do |show|
doors = starts = nil
name = "#{show.css('.headliners.summary').text}, #{show.css('.supports.description').text}"
- url = base_url + show.css('a').first.attr('href')
+ url = doc_url + show.css('a').first.attr('href')
price = show.css('.price-range').text.strip
date = show.css('.dates').text
date = date.split(' ').last
View
6 lib/menudo/night_light.rb
@@ -1,7 +1,9 @@
class NightLight < Menudo::Base
- def refresh
- doc = Nokogiri::HTML(open('http://calendar.apps.wix.com/production/calendar/user?cacheKiller=13916147932720&compId=TPWdgt0-8vz&deviceType=desktop&instance=n27R2hvBfXPm9DQEKbh_HGoTJDaG4uUd1Xzyw1fcdvQ.eyJpbnN0YW5jZUlkIjoiMTJjYWUzMmItMzU4MC01NGU4LTc4YTQtNDA4YWY3ZTc5MmMxIiwic2lnbkRhdGUiOiIyMDE0LTAyLTA1VDA5OjM5OjUwLjk4OC0wNjowMCIsImlwQW5kUG9ydCI6Ijc2LjEwMy4yNTMuMTgzLzQ4NTA4IiwiZGVtb01vZGUiOmZhbHNlfQ&locale=en&viewMode=site&width=665'))
+ def doc_url
+ 'http://calendar.apps.wix.com/production/calendar/user?cacheKiller=13916147932720&compId=TPWdgt0-8vz&deviceType=desktop&instance=n27R2hvBfXPm9DQEKbh_HGoTJDaG4uUd1Xzyw1fcdvQ.eyJpbnN0YW5jZUlkIjoiMTJjYWUzMmItMzU4MC01NGU4LTc4YTQtNDA4YWY3ZTc5MmMxIiwic2lnbkRhdGUiOiIyMDE0LTAyLTA1VDA5OjM5OjUwLjk4OC0wNjowMCIsImlwQW5kUG9ydCI6Ijc2LjEwMy4yNTMuMTgzLzQ4NTA4IiwiZGVtb01vZGUiOmZhbHNlfQ&locale=en&viewMode=site&width=665'
+ end
+ def refresh
shows = doc.css('.agenda_date_wrapper')
shows.each do |show|
date_month = show.css('.datetime_month').text
View
8 lib/menudo/rickshaw_stop.rb
@@ -1,8 +1,10 @@
class RickshawStop < Menudo::Base
- def refresh
- #doc = Nokogiri::HTML(open('http://www.rickshawstop.com/calendar/')) # Nope.. using proxy.
- doc = Nokogiri::HTML(open('http://hideme.be/browse.php?u=http%3A%2F%2Fwww.rickshawstop.com%2Fcalendar%2F&b=12&f=norefer'))
+ def doc_url
+ #'http://www.rickshawstop.com/calendar/'
+ 'http://hideme.be/browse.php?u=http%3A%2F%2Fwww.rickshawstop.com%2Fcalendar%2F&b=12&f=norefer'
+ end
+ def refresh
shows = doc.css('.vevent')
shows.each do |show|
url_segment = show.css('a').first.try(:attr, 'href')
View
6 lib/menudo/uptown.rb
@@ -1,7 +1,9 @@
class Uptown < Menudo::Base
- def refresh
- doc = Nokogiri::HTML(open('http://www.uptownnightclub.com/event-calendar/'))
+ def doc_url
+ 'http://www.uptownnightclub.com/event-calendar/'
+ end
+ def refresh
shows = doc.css('.postcal')
shows.each do |show|
price = door_time = start_time = nil
Something went wrong with that request. Please try again.