Permalink
Browse files

added user agent to open calls

  • Loading branch information...
1 parent 0e18df4 commit f34ff0c7b0c5b42dd9043f46c793222922287171 @twoism committed Jun 17, 2010
Showing with 18 additions and 9 deletions.
  1. +7 −6 examples/ning_post.rb
  2. +9 −1 lib/graboid.rb
  3. +1 −1 lib/graboid/entity.rb
  4. +1 −1 lib/graboid/scraper.rb
View
@@ -1,7 +1,8 @@
-%w{rubygems graboid}.each {|f| require f }
+dir = File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib'))
+require File.join(dir, 'graboid')
class NingPost
- include Graboid::Entity
+ include Graboid::Scraper
selector 'div.xg_blog .xg_module_body'
@@ -25,10 +26,10 @@ class NingPost
# ning's list page only has an excerpt of the body. No biggie,
# we'll just go grab it.
show_url = elm.css('a').last["href"]
- Nokogiri::HTML(open(show_url)).css('.postbody').to_html
+ Nokogiri::HTML(open(show_url,"User-Agent" => Graboid.user_agent)).css('.postbody').to_html
end
- pager do |doc|
+ page_with do |doc|
doc.css('.pagination a').select{|a| a.text =~ /previous/i }.first['href'] rescue nil
end
@@ -45,8 +46,8 @@ class NingPost
end
-NingPost.source = 'http://cuwebd.ning.com/profiles/blog/list'
-@posts = NingPost.all(:max_pages => 1)
+NING_URL = 'http://www.friendsorenemies.com/profiles/blog/list?user=3vx1daeuxrt14'
+@posts = NingPost.new( :source => NING_URL ).all(:max_pages => 2)
@posts.each do |post|
puts "#{post.pub_date} -- #{post.title}"
View
@@ -5,6 +5,14 @@
require dir + 'graboid/entity'
require dir + 'graboid/scraper'
-
module Graboid
+ extend self
+
+ def user_agent
+ @user_agent ||= 'Graboid'
+ end
+
+ def user_agent=(agent)
+ @user_agent = agent
+ end
end
@@ -119,7 +119,7 @@ def reset_context
def read_source
case self.source
when /^http[s]?:\/\//
- open self.source
+ open(self.source, "User-Agent" => Graboid.user_agent)
when String
self.source
end
@@ -170,7 +170,7 @@ def paginate
def read_source
case self.source
when /^http[s]?:\/\//
- open self.source
+ open(self.source ,"User-Agent" => Graboid.user_agent)
when String
self.source
end

0 comments on commit f34ff0c

Please sign in to comment.