Permalink
Browse files

Merge branch 'refactor'

  • Loading branch information...
2 parents 6bdd9cd + 070e178 commit cc455b607730d5f27a0a60a273f9527e8a500575 @bronson bronson committed Jun 8, 2011
Showing with 622 additions and 163 deletions.
  1. +9 −3 Gemfile
  2. +43 −9 Gemfile.lock
  3. +5 −0 Rakefile
  4. +55 −6 TODO
  5. +32 −0 check-remotes
  6. +10 −1 console
  7. +15 −60 delete-repos
  8. +148 −0 lib/github.rb
  9. +169 −0 lib/repo.rb
  10. +16 −40 scraper
  11. +107 −0 spec/github_spec.rb
  12. +13 −44 turn-off-wiki
View
12 Gemfile
@@ -1,7 +1,7 @@
source 'http://rubygems.org'
# the pdf magic in mimemagic 0.1.8 is far too loose (recognizes textfiles and zipfiles as pdf)
-# see comments on https://github.com/minad/mimemagic/commit/50078a4d52bb80f525784f6a4cb874fc7d2a03a0#commitcomment-280266
+# github issue: https://github.com/minad/mimemagic/issues/4
gem 'mimemagic' ,'= 0.1.7'
# scraper problems with 0.8.3
@@ -11,12 +11,18 @@ gem 'json' ,'~> 1.4.6'
gem 'gitrb' ,'~> 0.2.7'
gem 'bzip2-ruby' ,'~> 0.2.6'
gem 'mime-types' ,'~> 1.16'
-gem 'octopussy' ,'~> 0.3.0'
-gem 'hashie' ,'~> 0.2.2'
+gem 'octokit' ,'~> 0.5'
+gem 'hashie' ,'~> 1.0'
gem 'htmlentities','~> 4.2.1'
gem 'feedzirra' ,'~> 0.0.24'
gem 'erubis' ,'~> 2.6.6'
gem 'mail' ,'~> 2.2.7'
gem 'i18n' ,'~> 0.4.1'
+
gem 'retryable' , :git => 'git://github.com/bronson/retryable.git'
+group :test do
+ gem 'rspec' ,'~> 2.5'
+ gem 'webmock' , :git => 'https://github.com/afeld/webmock.git', :branch => "expectations_on_stubs"
+end
+
View
52 Gemfile.lock
@@ -4,17 +4,34 @@ GIT
specs:
retryable (0.9.0)
+GIT
+ remote: https://github.com/afeld/webmock.git
+ revision: 0345d5e84e80f48a0ec8214d8c911cb360313a60
+ branch: expectations_on_stubs
+ specs:
+ webmock (1.6.4)
+ addressable (~> 2.2, > 2.2.5)
+ crack (>= 0.1.7)
+
GEM
remote: http://rubygems.org/
specs:
abstract (1.0.0)
activesupport (3.0.8)
+ addressable (2.2.6)
builder (3.0.0)
bzip2-ruby (0.2.7)
- crack (0.1.6)
+ crack (0.1.8)
curb (0.7.15)
+ diff-lcs (1.1.2)
erubis (2.6.6)
abstract (>= 1.0.0)
+ faraday (0.6.1)
+ addressable (~> 2.2.4)
+ multipart-post (~> 1.1.0)
+ rack (>= 1.1.0, < 2)
+ faraday_middleware (0.6.3)
+ faraday (~> 0.6.0)
feedzirra (0.0.24)
activesupport (>= 2.3.8)
builder (>= 2.1.2)
@@ -23,11 +40,9 @@ GEM
nokogiri (> 0.0.0)
sax-machine (>= 0.0.12)
gitrb (0.2.8)
- hashie (0.2.2)
+ hashie (1.0.0)
hpricot (0.8.2)
htmlentities (4.2.4)
- httparty (0.5.2)
- crack (= 0.1.6)
i18n (0.4.2)
json (1.4.6)
loofah (1.0.0)
@@ -39,11 +54,28 @@ GEM
treetop (~> 1.4.8)
mime-types (1.16)
mimemagic (0.1.7)
+ multi_json (1.0.3)
+ multipart-post (1.1.2)
nokogiri (1.4.4)
- octopussy (0.3.0)
- hashie (~> 0.2.0)
- httparty (~> 0.5.2)
+ octokit (0.6.3)
+ addressable (~> 2.2.4)
+ faraday (~> 0.6.0)
+ faraday_middleware (~> 0.6.0)
+ hashie (~> 1.0.0)
+ multi_json (~> 1.0.0)
+ rash (~> 0.3.0)
polyglot (0.3.1)
+ rack (1.3.0)
+ rash (0.3.0)
+ hashie (~> 1.0.0)
+ rspec (2.6.0)
+ rspec-core (~> 2.6.0)
+ rspec-expectations (~> 2.6.0)
+ rspec-mocks (~> 2.6.0)
+ rspec-core (2.6.4)
+ rspec-expectations (2.6.0)
+ diff-lcs (~> 1.1.2)
+ rspec-mocks (2.6.0)
sax-machine (0.0.16)
nokogiri (> 0.0.0)
treetop (1.4.9)
@@ -57,13 +89,15 @@ DEPENDENCIES
erubis (~> 2.6.6)
feedzirra (~> 0.0.24)
gitrb (~> 0.2.7)
- hashie (~> 0.2.2)
+ hashie (~> 1.0)
hpricot (= 0.8.2)
htmlentities (~> 4.2.1)
i18n (~> 0.4.1)
json (~> 1.4.6)
mail (~> 2.2.7)
mime-types (~> 1.16)
mimemagic (= 0.1.7)
- octopussy (~> 0.3.0)
+ octokit (~> 0.5)
retryable!
+ rspec (~> 2.5)
+ webmock!
View
5 Rakefile
@@ -0,0 +1,5 @@
+require "rubygems"
+require "rspec/core/rake_task"
+
+RSpec::Core::RakeTask.new :spec
+task :default => ["spec"]
View
61 TODO
@@ -1,3 +1,42 @@
+Make retryable accept a string to log, it logs it with a retry count.
+Factor into modules.
+Store all scripts in a single gigantic scripts.json file.
+Remove vim-script.json from git repos. Make them plain repos again.
+Test by parsing webcache, not just converting scripts.
+Get rid of 'scripts' database, everything is either in the webcache or scripts.json file.
+Denormalize all author and script info into each version so that we
+ can recreate the whole archive at any time. No need for separate test branch.
+
+Make more resilient against intermittent failures
+(simple reload seems to fix it every time)
+We should probably re-fetch the script at least once if hpricot dies with an error.
+
+ Processing script 2685: zOS-Enterprise-Compiler-PLI
+ downloading http://www.vim.org/scripts/script.php?script_id=2686
+ ./scraper:481:in `githubify': undefined method `gsub' for nil:NilClass (NoMethodError)
+ from ./scraper:330:in `scrape_script'
+ from ./scraper:1354:in `perform_scrape'
+ from ./scraper:1401:in `perform_all'
+ from ./scraper:1467:in `perform_full'
+ from ./scraper:1486:in `perform_rss'
+ from ./scraper:1585:in `<main>'
+
+
+ Scraped scripts/0631 - cf.vim.json
+ Processing script 631: cf.vim
+ ./scraper:270:in `scrape_author': undefined method `next_sibling' for nil:NilClass (NoMethodError)
+ from ./scraper:340:in `block in scrape_script'
+ from ./scraper:333:in `map'
+ from ./scraper:333:in `scrape_script'
+ from ./scraper:1334:in `perform_scrape'
+ from ./scraper:1381:in `perform_all'
+ from ./scraper:1447:in `perform_full'
+ from ./scraper:1487:in 1487`perform_rss'
+ from ./scraper:1564:in `<main>'
+
+
+what happened to archive_viewer.vim and coding_style.vim and indentcpp.vim?
+Make sure all these look OK:
delete mode 100644 1093 - archive_viewer.vim.json
delete mode 100644 2633 - coding_style.vim.json
delete mode 100644 2636 - indentcpp.vim.json
@@ -11,20 +50,21 @@ delete mode 100644 3286 - LiteTabPage.json
Check on AutoTmpl and AutoTmpl-B
-what happened to archive_viewer.vim and coding_style.vim and indentcpp.vim
-
Why doesn't Lite-Tab-Page exist on github???
Switch to Nokogiri, hpricot appears totally hopeless at charset issues.
-Someone deleted everything and upload a vimball?? Need to suppress.
+Someone deleted everything and uploaded a vimball?? Need to suppress.
https://github.com/vim-scripts/mojo.vim/commits/master
-BufClose and bufkill?
+Switch to Nokogiri, hpricot appears totally hopeless at charset issues.
+
+
+BufClose and bufkill? Try to consolidate close buffer plugins.
fully automate scraping
-is it possible to generate docs incrementally?
+is it possible to generate docs incrementally? (n/a once the central db is done)
make it possible for 2 or more scrapers to work at the same time
@@ -47,14 +87,23 @@ http://github.com/minad/gitrb/commit/0847728518b3fa1f9fb95449fbc329b39872e55c
mimemagic:
http://github.com/minad/mimemagic/commit/84720bb8fee6bac0666c89d0f4c1fdbe5904a961
+https://github.com/minad/mimemagic/issues/1
+https://github.com/minad/mimemagic/issues/4
octopussy:
http://github.com/pengwynn/octopussy/commit/aa4de36cbd4ac2de9fdf88b75e1ad35563979af8
http://github.com/pengwynn/octopussy/commit/25af70b9642dccc98faed7ccfbf9dc2a3efe28c4
-rubyzip: (unfortunately never resulted in satisfactory unzipping, but at least we know what's going on)
+rubyzip: (unfortunately never resulted in satisfactory unzipping but at least we know the problem)
http://www.ruby-forum.com/topic/211146#936159
+7zip:
+https://sourceforge.net/tracker/index.php?func=detail&aid=3310980&group_id=14481&atid=114481#
+
+https://github.com/bronson/retryable
+
+unzip: probably not worth filing a bug on Infozip... wish it could decompress newer zipfiles tho.
+
hpricot monkeypatch
... anything else?
View
32 check-remotes
@@ -0,0 +1,32 @@
+#!/usr/bin/env ruby
+
+# Compares the full list of repos on github with the local repos.
+# Makes sure they match, and all issues and wikis are turned off.
+
+$:.unshift './lib'
+
+require 'bundler'
+Bundler.require
+require 'github'
+
+
+github = GitHub.new
+
+
+# todo: this needs to be refactored into a module used by the scraper
+$repos_dir = ENV['REPOS_DIR'] || 'repos'
+local_repo_names = Dir.entries($repos_dir).reject { |e| %w{. .. .git}.include?(e) }
+local_repos = local_repo_names.map { |n| n.sub /\.git$/, '' }
+
+remote_repo_info = github.list_all_repos
+remote_repos = remote_repo_info.map { |r| r.name }
+
+locals_not_remote = local_repos - remote_repos
+remotes_not_local = remote_repos - local_repos
+wikis_enabled = remote_repo_info.select { |r| r.has_wiki }.map { |r| r.name }
+issues_enabled = remote_repo_info.select { |r| r.has_issues }.map { |r| r.name }
+
+puts "Local but not remote: #{locals_not_remote.join(" ")}"
+puts "Remote but not local: #{remotes_not_local.join(" ")}"
+puts "Wikis enabled: #{wikis_enabled.join(" ")}"
+puts "Issues enabled: #{issues_enabled.join(" ")}"
View
11 console
@@ -1,5 +1,14 @@
#!/usr/bin/env ruby
-load 'scraper'
+
+# Opens a console with all scraper gems and utilities loaded.
+
+$:.unshift './lib'
+
+require 'bundler'
+Bundler.require
+require 'github'
require 'irb'
+ARGV.shift
IRB.start
+
View
75 delete-repos
@@ -3,87 +3,42 @@
# Mass delete, just pass the names or full paths of the repos to delete
# and both the github and local repos/* repositories will be deleted.
#
+##
# You might need to wait for a few hours between deleting and pushing
# since github has a bug where quickly deleting, creating, and pushing a
# repo with the same name will just result in a resurrection of the old
# repo with the old contents, not an empty one.
#
-# Now you can use FORCE=1 ./scraper 0999 to get around this. It doesn't
+# Now you can use "FORCE=1 ./scraper 0999" to get around this. It doesn't
# update the script's description but usually that's not needed and it's
# better than waiting for github to fix this bug!
-require 'rubygems'
+$:.unshift './lib'
+
require 'bundler'
Bundler.require
-
-require 'octopussy'
-require 'json'
-
-$git_script_file = 'vim-script.json'
-creds = Hashie::Mash.new(JSON.parse(File.read('creds.json')))
-$github = Octopussy::Client.new(creds)
-
-
-# monkeypatch octopussy because its delete has a bug
-Octopussy::Client.class_eval do
- def delete(repo, delete_token={})
- repo = Octopussy::Repo.new(repo)
- response = self.class.post("/repos/delete/#{repo.name}", :query => auth_params, :body => {:delete_token => delete_token})
- Hashie::Mash.new(response)
- end
-end
+require 'github'
-def delete_github_repos *names
- start = Time.now
- call_count = 0
- names.flatten.each do |name|
- next unless name
- begin
- result = $github.delete name
- rescue Exception => e
- puts "could not remove #{name}: #{e}"
- next
- end
+scripts = {}
- call_count += 1
- if result
- result = $github.delete name, result.delete_token
- call_count += 1
- if result.status == "deleted"
- puts "removed #{name} from github"
- else
- puts "Unknown response from second stage for #{name}: #{result}"
- end
- else
- puts "got #{result.inspect} trying to delete #{name}"
- end
+ARGV.each do |name|
+ name =~ /^repos\/(.*)\.git\/?$/
+ name = $1 if $1
+ path = "repos/#{name}.git"
- # make sure we don't bump into github's rate limit
- if call_count >= 60
- duration = Time.now - start
- puts "at rate limit, sleeping for #{60 - duration} seconds"
- sleep 60 - duration if duration < 60
- start = Time.now
- call_count = 0
- end
- end
-end
-
-
-scripts = ARGV.reduce({}) do |a,path|
- name = File.basename(path).sub(/\.git$/, '')
- path = "repos/#{name}.git" unless test ?d, path
if test ?d, path
- a[name] = path
+ scripts[name] = path
else
puts "warning: can't find #{path}"
end
- a
end
-delete_github_repos scripts.keys
+github = GitHub.new
+scripts.each do |name,path|
+ github.delete name
+end
scripts.each do |name,path|
begin
View
148 lib/github.rb
@@ -0,0 +1,148 @@
+# Helps the scraper interact with GitHub.
+
+require 'json'
+require 'hashie'
+require 'octokit'
+require 'retryable'
+
+
+class GitHub
+ include Retryable
+
+ attr_accessor :client, :logger
+
+ def initialize opts={}
+ @client = opts[:client]
+ unless @client
+ creds = Hashie::Mash.new(JSON.parse(File.read('creds.json')))
+ @client = Octokit::Client.new(:login => creds.login, :token => creds.token)
+ end
+
+ @logger = opts[:logger] || lambda { |msg| puts msg }
+
+ @start = Time.now.to_i
+ @api_calls = 0
+ end
+
+ def log str
+ @logger.call str
+ end
+
+
+ # sleep to avoid bumping into github's 60-per-minute API limit
+ # Github may change at any time so make sure your code still retries when rate limited.
+ def github_holdoff
+ if @api_calls > 60
+ holdoff = 60 - (Time.now.to_i - @start)
+ if holdoff > 0
+ log "hit github limit, sleeping for #{holdoff} seconds"
+ sleep holdoff
+ end
+ @start = Time.now.to_i
+ @api_calls = 0
+ end
+ end
+
+
+ def call_client method, *args
+ github_holdoff
+ result = @client.send method, *args
+ @api_calls += 1
+ result
+ end
+
+
+ #
+ # here are the utility functions...
+ #
+
+ def info name
+ call_client :repository, "vim-scripts/#{name}"
+ end
+
+ # turns off the issues and wiki tabs for a new repo
+ def turn_off_features name
+ call_client :update_repository, "vim-scripts/#{name}",
+ { :has_issues => false, :has_wiki => false }
+ end
+
+ def create name, *args
+ call_client :create, name, *args
+ turn_off_features name
+ end
+
+ def delete name
+ result = call_client :delete_repository, name
+ if result
+ result = call_client :delete_repository, name, :delete_token => result
+ if result.status == "deleted"
+ log "removed #{name} from github"
+ else
+ raise "Unknown response from second stage for #{name}: #{result.inspect}"
+ end
+ else
+ raise "got #{result.inspect} trying to delete #{name}"
+ end
+ end
+
+ def list_all_repos
+ result = []
+ page = 1
+ loop do
+ log "fetching page #{page}"
+ bunch = call_client :repositories, 'vim-scripts', :page => page
+ break if bunch.empty?
+ result.push *bunch
+ page += 1
+ end
+ result
+ end
+end
+
+
+# This Selenium code does not work anymore.
+# it's kept around in case it is required again.
+class GitHub::Selenium < GitHub
+ def start_selenium
+ sel = Selenium::Client::Driver.new :host => 'localhost',
+ :port => 4444, :browser => 'firefox', :url => 'https://github.com'
+ sel.start
+ sel.set_context "deleee"
+ sel.open "/login"
+ sel.type "login_field", "vim-scripts"
+ password = File.read('password').chomp rescue raise("Put vim-script's password in a file named 'password'.")
+ sel.type "password", password
+ sel.click "commit", :wait_for => :page
+ sel
+ end
+
+ def stop_selenium sel
+ sel.close_current_browser_session
+ sel.stop
+ end
+
+ # github's api is claiming some repos exist when they clearly don't. the
+ # only way to fix this appears to be to create a repo of the same name and
+ # delete it using the regular interface (trying to delete using the api
+ # throws 500 server errors). Hence all this Selenium. Arg.
+ def obliterate_repo sel, name
+ sel.open "/repositories/new"
+ sel.type "repository_name", name
+ sel.click "//button[@type='submit']", :wait_for => :page
+ sel.open "/vim-scripts/#{name}/admin"
+ sel.click "//div[@id='addons_bucket']/div[3]/div[1]/a/span"
+ sel.click "//div[@id='addons_bucket']/div[3]/div[3]/form/button"
+ end
+
+ def perform_obliterate
+ # if selenium is true then we must be having problems with phantom repos
+ if remote && $selenium
+ puts " apparently #{remote.url} exists, obliterating..."
+ obliterate_repo $selenium, script['name']
+ remote = nil
+ puts " obliterate succeeded."
+ sleep 2 # github requires a bit of time to sync
+ end
+ end
+end
+
View
169 lib/repo.rb
@@ -0,0 +1,169 @@
+# Utilities for the scraper to interact with GitHub
+
+require 'json' # json gem
+require 'hashie' # hashie gem
+require 'octokit' # octokit gem
+require 'retryable'
+
+
+class GitHub
+ include Retryable
+
+ def initialize opts
+ @client = opts[:client] || raise("specify :client")
+ @logger = opts[:logger] || lambda { |msg| puts msg }
+ @start = Time.now
+ @api_calls = 0
+ end
+
+ def log str
+ @logger.call str
+ end
+
+ def repo_url name
+ "http://github.com/vim-scripts/#{name}"
+ end
+
+ # We push to vim-scripts.github.com so we don't interfere with your regular ssh key.
+ # create a ~/.ssh/vimscripts-id_rsa and ~/.ssh/vimscripts-id_rsa.pub keypair,
+ # and create a ~/.ssh/config that has 2 Host sections:
+ # Host github.com\nHostName github.com\nUser git\nIdentityFile ~/.ssh/id_rsa
+ # Host vim-scripts.github.com\nHostName github.com\nUser git\nIdentityFile ~/.ssh/vimscripts-id_rsa
+ # see this for more: http://help.github.com/multiple-keys
+ def remote_url name
+ "git@vim-scripts.github.com:vim-scripts/#{name}"
+ end
+
+ # TODO: this does not belong here!
+ def repo_heads repo
+ path = "#{repo.path}/refs/heads"
+ Dir.entries(path).select { |f|
+ test ?f, "#{path}/#{f}"
+ }
+ end
+
+ # sleep to avoid bumping into github's 60-per-minute API limit
+ # just make sure num requests + 60 < num seconds elapsed.
+ def github_holdoff
+ # if @stop - start < @api_calls
+ # sleep_time = @api_calls-(stop-start)
+ # if sleep_time > 0
+ # puts "hit github limit, sleeping for #{}"
+ # sleep sleep_time
+ # end
+ # end
+ end
+
+ def call_client method, *args
+ github_holdoff
+ @client.send method, *args
+ @api_calls += 1
+ end
+
+ def turn_off_features name
+ log " disabling wiki+issues for #{name}"
+ call_client :update_repository, "vim-scripts/#{name}",
+ { :has_issues => false, :has_wiki => false }
+ end
+
+ def perform_push repo_name
+ return unless repo_name
+ repo = Gitrb::Repository.new(:path => repo_name.dup, :bare => true)
+ script = JSON.parse(File.read(File.join(repo_name, $git_script_file)))
+ puts "Uploading #{script['script_id']} - #{script['name']}"
+
+ # rescue nil because an exception is raised when the repo doesn't exist
+ remote = @client.repo("vim-scripts/#{script['name']}") rescue nil
+ @api_calls += 1
+
+ if remote
+ # make sure this actually is the same repo
+ puts " remote already exists: #{remote.url}"
+ remote.homepage =~ /script_id=(\d+)$/
+ raise "bad url on github repo #{script['name']}" unless $1
+ raise "remote #{script['name']} exists but id is for #{$1}" if script['script_id'] != $1
+ else
+ puts " remote doesn't exist, creating..."
+ end
+
+ unless remote
+ retryable(:tries => 4, :sleep => 10) do |retries|
+ puts " creating #{script['script_id']} - #{script['name']}#{retries > 0 ? " RETRY #{retries}" : ""}"
+ remote = @client.create(
+ :name => script['name'],
+ :description => "#{script['summary']}",
+ :homepage => script_id_to_url(script['script_id']),
+ :public => true)
+ end
+ @api_calls += 1
+
+ turn_off_features script['name']
+ end
+
+ repo.git_remote('rm', 'origin') rescue nil
+ repo.git_remote('add', 'origin', remote_url(script))
+ retryable(:tries => 6, :sleep => 15) do |retries|
+ # Gitrb::CommandError is as close to a network timeout error as we're going to get
+ puts " #{"force " if ENV['FORCE']}pushing #{script['script_id']} - #{script['name']}#{retries > 0 ? " RETRY #{retries}" : ""}"
+ args = ['--tags']
+ args << '--force' if ENV['FORCE']
+ args << 'origin'
+ args.push *repo_heads(repo)
+ repo.git_push(*args)
+ end
+
+ github_holdoff
+
+ # we should have a script that will compare the full list of
+ # repos on github and here and print any differences. that is
+ # not a part of this script's job.
+ # Octokit.list_repos('vim-scripts')
+ # Octokit.delete("vim-scripts/#{ghname}")
+
+ # no need to reset the remote because presumably we created this
+ # repo and the remote is already set correctly.
+ end
+end
+
+
+# NOTE: this Selenium code does not work anymore.
+# it's kept around in case it is required again.
+class GitHub::Selenium < GitHub
+ def start_selenium
+ sel = Selenium::Client::Driver.new :host => 'localhost',
+ :port => 4444, :browser => 'firefox', :url => 'https://github.com'
+ sel.start
+ sel.set_context "deleee"
+ sel.open "/login"
+ sel.type "login_field", "vim-scripts"
+ password = File.read('password').chomp rescue raise("Put vim-script's password in a file named 'password'.")
+ sel.type "password", password
+ sel.click "commit", :wait_for => :page
+ sel
+ end
+
+ # github's api is claiming some repos exist when they clearly don't. the
+ # only way to fix this appears to be to create a repo of the same name and
+ # delete it using the regular interface (trying to delete using the api
+ # throws 500 server errors). Hence all this Selenium. Arg.
+ def obliterate_repo sel, name
+ sel.open "/repositories/new"
+ sel.type "repository_name", name
+ sel.click "//button[@type='submit']", :wait_for => :page
+ sel.open "/vim-scripts/#{name}/admin"
+ sel.click "//div[@id='addons_bucket']/div[3]/div[1]/a/span"
+ sel.click "//div[@id='addons_bucket']/div[3]/div[3]/form/button"
+ end
+
+ def perform_obliterate
+ # if selenium is true then we must be having problems with phantom repos
+ if remote && $selenium
+ puts " apparently #{remote.url} exists, obliterating..."
+ obliterate_repo $selenium, script['name']
+ remote = nil
+ puts " obliterate succeeded."
+ sleep 2 # github requires a bit of time to sync
+ end
+ end
+end
+
View
56 scraper
@@ -68,6 +68,8 @@
# locally from the start.
+$:.unshift './lib'
+
require 'rubygems'
require 'bundler'
Bundler.require
@@ -84,7 +86,7 @@ require 'mimemagic' # mimemagic gem
require 'tmpdir'
require 'tempfile'
require 'find'
-require 'octopussy' # octopussy gem
+require 'octokit' # octokit gem
require 'hashie' # hashie gem
require 'htmlentities' # htmlentities gem
require 'feedzirra' # feedzirra gem
@@ -94,6 +96,9 @@ require 'fileutils'
require 'open3'
require 'retryable'
+require 'github'
+
+
include Retryable
# :on => [] means that we won't retry anything unless the caller
# specifies the exact exceptions that it wants to be retried.
@@ -1274,18 +1279,12 @@ end
def perform_push repo_name
return unless repo_name
- creds = Hashie::Mash.new(JSON.parse(File.read('creds.json')))
- github = Octopussy::Client.new(creds)
+ github = GitHub.new
repo = Gitrb::Repository.new(:path => repo_name.dup, :bare => true)
- script = JSON.parse(File.read(File.join(repo_name, $git_script_file)))
+ script = Hashie::Mash.new(JSON.parse(File.read(File.join(repo_name, $git_script_file))))
puts "Uploading #{script['script_id']} - #{script['name']}"
- start = Time.now
- api_calls = 0
-
- remote = github.repo("vim-scripts/#{script['name']}") rescue nil
- api_calls += 1
-
+ remote = github.info script.name rescue nil
if remote
# make sure this actually is the same repo
puts " remote already exists: #{remote.url}"
@@ -1299,20 +1298,11 @@ def perform_push repo_name
unless remote
retryable do |retries|
puts " creating #{script['script_id']} - #{script['name']}#{retries > 0 ? " RETRY #{retries}" : ""}"
- remote = github.create(
- :name => script['name'],
- :description => "#{script['summary']}",
- :homepage => script_id_to_url(script['script_id']),
+ remote = github.create(script.name,
+ :description => script.summary,
+ :homepage => script_id_to_url(script.script_id),
:public => true)
end
- api_calls += 1
-
- # turn off the issues and wiki tabs (wish 'create' would do that)
- retryable do |retries|
- puts " disabling wiki+issues for #{script['script_id']} - #{script['name']}#{retries > 0 ? " RETRY #{retries}" : ""}"
- github.set_repo_info('vim-scripts/' + script['name'], :has_issues => false, :has_wiki => false)
- end
- api_calls += 1
end
repo.git_remote('rm', 'origin') rescue nil
@@ -1326,24 +1316,6 @@ def perform_push repo_name
args.push *repo_heads(repo)
repo.git_push(*args)
end
-
- # sleep to avoid bumping into github's 60-per-minute API limit
- # the push doesn't count toward the API limit
- stop = Time.now
- sleep api_calls-(stop-start) if stop-start < api_calls
-
- # we don't want any provision for forcing pushes. if you delete
- # and recreate a repo, you must manually delete and recreate it
- # on github. forcing is bad, avoid at all costs.
-
- # we should have a script that will compare the full list of
- # repos on github and here and print any differences. that is
- # not a part of this script's job.
- # Octopussy.list_repos('vim-scripts')
- # Octopussy.delete("vim-scripts/#{ghname}")
-
- # no need to reset the remote because presumably we created this
- # repo and the remote is already set correctly.
end
@@ -1582,6 +1554,10 @@ if ARGV.empty?
count = perform_rss
# also cycle through all packages in case the rss feed missed anything
perform_continuous($idle_count - count) if count && $idle_count - count > 0
+elsif ARGV[0] == '--console'
+ require 'irb'
+ ARGV.shift
+ IRB.start
elsif ARGV[0] == '--docs'
generate_docs
elsif ARGV[0] == '--dump'
View
107 spec/github_spec.rb
@@ -0,0 +1,107 @@
+require File.dirname(File.absolute_path(__FILE__)) + '/../lib/github'
+
+require 'webmock/rspec'
+include WebMock::API
+
+
+describe "GitHub" do
+ class FakeClient
+ attr_accessor :count
+ def update_repository *args
+ @count ||= 0
+ @count += 1
+ end
+ end
+
+ # todo: should only create the clients once per run, not once per test.
+ def github
+ GitHub.new :client => Octokit::Client.new(:login => "loggyin", :token => "tokkyen"), :logger => lambda { |msg| }
+ end
+
+ def base
+ "https://loggyin%2Ftoken:tokkyen@github.com/api/v2/json"
+ end
+
+ def should_raise e
+ lambda { yield }.should raise_error e
+ end
+
+
+ it "should get repo info" do
+ stub = stub_request(:get, "#{base}/repos/show/vim-scripts/repo").
+ to_return(:body => {}.to_json)
+ github.info "repo"
+ stub.should have_been_requested
+ end
+
+ it "should get repo info for a nonexistent repo" do
+ stub = stub_request(:get, "#{base}/repos/show/vim-scripts/repo").
+ to_return(:status => 404, :body => { :error => "vim-scripts/repo Repository not found" }.to_json)
+ should_raise(Octokit::NotFound) {
+ github.info "repo"
+ }
+ stub.should have_been_requested
+ end
+
+ it "should turn off issues and wikis" do
+ # For some reason these don't work:
+ # :body => { :data => { :values => {:has_issues => false, :has_wiki => false}}},
+ # :body => { :data => { "values[has_issues]" => false, "values[has_wiki]" => false}},
+ stub = stub_request(:post, "#{base}/repos/show/vim-scripts/repo").
+ with(:body => "values[has_issues]=false&values[has_wiki]=false").to_return(:body => {}.to_json)
+ github.turn_off_features "repo"
+ stub.should have_been_requested
+ end
+
+ it "should create a repository" do
+ stub_a = stub_request(:post, "#{base}/repos/create").
+ with(:body => "description=description&homepage=http%3A%2F%2Fhomepage&public=true&name=repo").
+ to_return(:body => {:repository => {}}.to_json)
+ stub_b = stub_request(:post, "#{base}/repos/show/vim-scripts/repo").
+ with(:body => "values[has_issues]=false&values[has_wiki]=false").to_return(:body => {}.to_json)
+
+ github.create "repo",
+ :description => "description",
+ :homepage => "http://homepage",
+ :public => true
+
+ stub_a.should have_been_requested
+ stub_b.should have_been_requested
+ end
+
+ it "should delete a repository" do
+ stub_a = stub_request(:post, "#{base}/repos/delete/repo").
+ with(:headers => {'Content-Length'=>'0'}).to_return(:body => {}.to_json)
+ stub_b = stub_request(:post, "#{base}/repos/delete/repo").
+ with(:headers => {'Content-Type'=>'application/x-www-form-urlencoded'}).to_return(:body => { :status => :deleted }.to_json)
+
+ github.delete "repo"
+
+ stub_a.should have_been_requested
+ stub_b.should have_been_requested
+ end
+
+ it "should list all repos" do
+ stub_a = stub_request(:get, "#{base}/repos/show/vim-scripts?page=1").
+ to_return(:body => {:repositories => [{ :name => "one" }]}.to_json )
+ stub_b = stub_request(:get, "#{base}/repos/show/vim-scripts?page=2").
+ to_return(:body => {:repositories => [{ :name => "two" }]}.to_json )
+ stub_c = stub_request(:get, "#{base}/repos/show/vim-scripts?page=3").
+ to_return(:body => {:repositories => []}.to_json )
+
+ result = github.list_all_repos
+ result.sort { |a,b| a['name'] <=> b['name'] }.should == [ { 'name' => 'one' }, { 'name' => 'two' } ]
+
+ stub_a.should have_been_requested
+ stub_b.should have_been_requested
+ stub_c.should have_been_requested
+ end
+
+ it "should hold off before hitting github limit" do
+ fakehub = GitHub.new :client => FakeClient.new, :logger => lambda { |msg| }
+ fakehub.should_receive(:sleep).once.with(60)
+ 65.times { fakehub.turn_off_features "repo" }
+ fakehub.client.count.should == 65
+ end
+end
+
View
57 turn-off-wiki
@@ -1,56 +1,25 @@
#!/usr/bin/env ruby
# turns off the issues and wiki tabs for the named repos
-# usage: ./turn-off-wiki repos/*
-# or: ./turn-off-wiki 0101 0111 0112 0124
+# usage: ./turn-off-wiki reponame reponame ...
+# or: ./turn-off-wiki repos/reponame.git repos/reponame.git ...
-require 'json'
-require 'octopussy'
+$:.unshift './lib'
+require 'bundler'
+Bundler.require
+require 'github'
-# http://blog.codefront.net/2008/01/14/retrying-code-blocks-in-ruby-on-exceptions-whatever/
-# modified to pass the number of retries in the arg (0,1,2 if :tries => 3)
-def retryable(options = {}, &block)
- opts = { :tries => 1, :on => Exception, :sleep => 1 }.merge(options)
- return if opts[:tries] < 1
- retry_exception = opts[:on]
- tries = 0
- if opts[:tries] > 1
- begin
- return yield tries
- rescue retry_exception
- sleep opts[:sleep]
- retry if (tries += 1) < opts[:tries] - 1
- end
- end
+github = GitHub.new
- # last try will throw an exception
- yield tries+1
-end
-
-
-def repo_file file
- return file if test ?d, file
- Dir.glob("repos/#{file}*").first
-end
-
-
-$git_script_file = 'vim-script.json'
-creds = Hashie::Mash.new(JSON.parse(File.read('creds.json')))
-github = Octopussy::Client.new(creds)
-
-ARGV.each do |arg|
- script = JSON.parse(File.read(File.join(repo_file(arg), $git_script_file)))
-
- start = Time.now
- retryable(:tries => 4, :on => Timeout::Error, :sleep => 10) do |retries|
- puts "Turning off #{repo_file(arg)}#{retries > 0 ? " RETRY #{retries}" : ""}"
- github.set_repo_info('vim-scripts/' + script['name'], :has_issues => false, :has_wiki => false)
- end
+ARGV.each do |name|
+ # so you can use autocompletion in the repos dir as well
+ name =~ /^repos\/(.*)\.git\/?$/
+ name = $1 if $1
- stop = Time.now
- sleep 1-(stop-start) if stop-start < 1
+ puts " disabling wiki+issues for #{name}"
+ github.turn_off_features name
end

0 comments on commit cc455b6

Please sign in to comment.