Web crawler project
Ruby
Switch branches/tags
Nothing to show
Clone or download
Fetching latest commit…
Cannot retrieve the latest commit at this time.
Permalink
Failed to load latest commit information.
bin
lib
spec
.gitignore
.rspec
Gemfile
README
Rakefile
gem_graph.png
web_crawler.gemspec

README

Web crawler help you with parse and collect data from the web

==How it works.


```ruby
class StackoverflowCrawler < WebCrawler::Base

    target "http://stackoverflow.com/questions/tagged/:tag", :tag=> %w{ruby ruby-on-rails ruby-on-rails-3}
    logger "path/to/log/file" # or Logger.new(...)

    cache_to '/tmp/cache/stackoverflow'

    context "#questions .question-summary", :jobs do

      #TODO: defaults :format => lambda{ |v| v.to_i }

      map '.vote-count-post strong', :to => :vote_count, :format => lambda{ |v| v.to_i }
      map '.views', :to => :view_count, :format => lambda{ |v| v.match(/\d+/)[0].to_i }
      map '.status strong', :to => :answer_count, :format => lambda{ |v| v.to_i }
      map '.summary h3 a', :to => :title, :format => lambda{ |v| v.to_i }
      map '.summary .excerpt', :to => :excerpt, :format => lambda{ |v| v.to_i }
      map '.user-action-time .relativetime', :to => :posted_at, :on => [:attr, :title]
      map '.tags .post-tag', :to => :tags, :format => lambda{ |v| v.to_i }

    end
end
```

#TODO
 1. Add documentation
 2. ...
 3. PROFIT!!!1
 (: