Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

start factoring some code into gitrepo

  • Loading branch information...
commit dcbf8f68718ac4a6062d2744b02c598e18b15a57 1 parent 9748a3e
Scott Bronson bronson authored
Showing with 79 additions and 214 deletions.
  1. +2 −0  TODO
  2. +66 −0 lib/gitrepo.rb
  3. +0 −169 lib/repo.rb
  4. +11 −45 scraper
2  TODO
View
@@ -1,3 +1,5 @@
+todo: make relevant calls in lib/github.rb and lib/repo.rb retryable
+
What happens if we try to tag the same tag twice?
WTF happened to 2387? Its description is now <td>Wisely add </td>?
66 lib/gitrepo.rb
View
@@ -0,0 +1,66 @@
+# interface for working on git repos, insulates app from underlying implementation
+
+# todo: only call git via array so no shell interp issues
+# todo: make a way for caller to tell Repo to indent all messages
+
+class GitRepo
+ include Retryable # only for network operations
+
+ class GitError < RuntimeError; end
+
+ # required: :root, the directory to contain the repo
+ # optional: :clone a repo to clone (:bare => true if it should be bare)
+ def initialize opts
+ @root = opts[:root]
+
+ if opts[:clone]
+ retryable(:task => "cloning #{opts[:clone]}") do
+ # todo: add support for :bare
+ output = `git clone #{opts[:clone]} #{opts[:root]} 2>&1`
+ raise GitError.new("git clone failed: #{output}") unless $?.success?
+ end
+ else
+ raise "#{@root} doesn't exist" unless test ?d, @root
+ end
+ end
+
+ # i.e. remote_add 'rails', 'http://github.com/rails/rails.git'
+ def remote_add name, remote
+ Dir.chdir(@root) {
+ output = `git remote add #{name} #{remote} 2>&1`
+ raise GitError.new("generate_docs: git remote add #{name} failed: #{output}") unless $?.success?
+ }
+ end
+
+ # todo: get rid of this call, should be regular git add / git commit
+ def commit_all message
+ Dir.chdir(@root) {
+ output = `git commit -a -m '#{message}' 2>&1`
+ if output =~ /nothing to commit/
+ puts " no changes to generated files"
+ else
+ raise GitError.new("generate_docs: git commit failed: #{output}") unless $?.success?
+ end
+ }
+ end
+
+ def pull *args
+ Dir.chdir(@root) do
+ retryable(:task => "pulling #{args.join ' '}") do
+ # Can we tell the difference between a network error, which we want to retry,
+ # and a merge error, which we want to fail immediately?
+ output = `git pull --no-rebase #{args.join ' '} 2>&1`
+ raise GitError.new("generate_docs: git pull failed: #{output}") unless $?.success?
+ end
+ end
+ end
+
+ def push *args
+ Dir.chdir(@root) do
+ retryable(:task => "pushing #{args.join ' '}") do
+ output = `git push #{args.join ' '} 2>&1`
+ raise "generate_docs: git push failed: #{output}" unless $?.success?
+ end
+ end
+ end
+end
169 lib/repo.rb
View
@@ -1,169 +0,0 @@
-# Utilities for the scraper to interact with GitHub
-
-require 'json' # json gem
-require 'hashie' # hashie gem
-require 'octokit' # octokit gem
-require 'retryable'
-
-
-class GitHub
- include Retryable
-
- def initialize opts
- @client = opts[:client] || raise("specify :client")
- @logger = opts[:logger] || lambda { |msg| puts msg }
- @start = Time.now
- @api_calls = 0
- end
-
- def log str
- @logger.call str
- end
-
- def repo_url name
- "http://github.com/vim-scripts/#{name}"
- end
-
- # We push to vim-scripts.github.com so we don't interfere with your regular ssh key.
- # create a ~/.ssh/vimscripts-id_rsa and ~/.ssh/vimscripts-id_rsa.pub keypair,
- # and create a ~/.ssh/config that has 2 Host sections:
- # Host github.com\nHostName github.com\nUser git\nIdentityFile ~/.ssh/id_rsa
- # Host vim-scripts.github.com\nHostName github.com\nUser git\nIdentityFile ~/.ssh/vimscripts-id_rsa
- # see this for more: http://help.github.com/multiple-keys
- def remote_url name
- "git@vim-scripts.github.com:vim-scripts/#{name}"
- end
-
- # TODO: this does not belong here!
- def repo_heads repo
- path = "#{repo.path}/refs/heads"
- Dir.entries(path).select { |f|
- test ?f, "#{path}/#{f}"
- }
- end
-
- # sleep to avoid bumping into github's 60-per-minute API limit
- # just make sure num requests + 60 < num seconds elapsed.
- def github_holdoff
- # if @stop - start < @api_calls
- # sleep_time = @api_calls-(stop-start)
- # if sleep_time > 0
- # puts "hit github limit, sleeping for #{}"
- # sleep sleep_time
- # end
- # end
- end
-
- def call_client method, *args
- github_holdoff
- @client.send method, *args
- @api_calls += 1
- end
-
- def turn_off_features name
- log " disabling wiki+issues for #{name}"
- call_client :update_repository, "vim-scripts/#{name}",
- { :has_issues => false, :has_wiki => false }
- end
-
- def perform_push repo_name
- return unless repo_name
- repo = Gitrb::Repository.new(:path => repo_name.dup, :bare => true)
- script = JSON.parse(File.read(File.join(repo_name, $git_script_file)))
- puts "Uploading #{script['script_id']} - #{script['name']}"
-
- # rescue nil because an exception is raised when the repo doesn't exist
- remote = @client.repo("vim-scripts/#{script['name']}") rescue nil
- @api_calls += 1
-
- if remote
- # make sure this actually is the same repo
- puts " remote already exists: #{remote.url}"
- remote.homepage =~ /script_id=(\d+)$/
- raise "bad url on github repo #{script['name']}" unless $1
- raise "remote #{script['name']} exists but id is for #{$1}" if script['script_id'] != $1
- else
- puts " remote doesn't exist, creating..."
- end
-
- unless remote
- retryable(:tries => 4, :sleep => 10) do |retries|
- puts " creating #{script['script_id']} - #{script['name']}#{retries > 0 ? " RETRY #{retries}" : ""}"
- remote = @client.create(
- :name => script['name'],
- :description => "#{script['summary']}",
- :homepage => script_id_to_url(script['script_id']),
- :public => true)
- end
- @api_calls += 1
-
- turn_off_features script['name']
- end
-
- repo.git_remote('rm', 'origin') rescue nil
- repo.git_remote('add', 'origin', remote_url(script))
- retryable(:tries => 6, :sleep => 15) do |retries|
- # Gitrb::CommandError is as close to a network timeout error as we're going to get
- puts " #{"force " if ENV['FORCE']}pushing #{script['script_id']} - #{script['name']}#{retries > 0 ? " RETRY #{retries}" : ""}"
- args = ['--tags']
- args << '--force' if ENV['FORCE']
- args << 'origin'
- args.push *repo_heads(repo)
- repo.git_push(*args)
- end
-
- github_holdoff
-
- # we should have a script that will compare the full list of
- # repos on github and here and print any differences. that is
- # not a part of this script's job.
- # Octokit.list_repos('vim-scripts')
- # Octokit.delete("vim-scripts/#{ghname}")
-
- # no need to reset the remote because presumably we created this
- # repo and the remote is already set correctly.
- end
-end
-
-
-# NOTE: this Selenium code does not work anymore.
-# it's kept around in case it is required again.
-class GitHub::Selenium < GitHub
- def start_selenium
- sel = Selenium::Client::Driver.new :host => 'localhost',
- :port => 4444, :browser => 'firefox', :url => 'https://github.com'
- sel.start
- sel.set_context "deleee"
- sel.open "/login"
- sel.type "login_field", "vim-scripts"
- password = File.read('password').chomp rescue raise("Put vim-script's password in a file named 'password'.")
- sel.type "password", password
- sel.click "commit", :wait_for => :page
- sel
- end
-
- # github's api is claiming some repos exist when they clearly don't. the
- # only way to fix this appears to be to create a repo of the same name and
- # delete it using the regular interface (trying to delete using the api
- # throws 500 server errors). Hence all this Selenium. Arg.
- def obliterate_repo sel, name
- sel.open "/repositories/new"
- sel.type "repository_name", name
- sel.click "//button[@type='submit']", :wait_for => :page
- sel.open "/vim-scripts/#{name}/admin"
- sel.click "//div[@id='addons_bucket']/div[3]/div[1]/a/span"
- sel.click "//div[@id='addons_bucket']/div[3]/div[3]/form/button"
- end
-
- def perform_obliterate
- # if selenium is true then we must be having problems with phantom repos
- if remote && $selenium
- puts " apparently #{remote.url} exists, obliterating..."
- obliterate_repo $selenium, script['name']
- remote = nil
- puts " obliterate succeeded."
- sleep 2 # github requires a bit of time to sync
- end
- end
-end
-
56 scraper
View
@@ -99,6 +99,7 @@ require 'open3'
require 'retryable'
require 'github'
+require 'gitrepo'
include Retryable
@@ -178,7 +179,6 @@ MimeMagic.remove 'application/x-gmc-link'
class ScrapeError < RuntimeError; end # retryable problem when scraping
class SourceForgeError < ScrapeError; end # sourceforge being stupid
class NoContentError < ScrapeError; end # page appears to be rendered incorrectly
-class GitError < RuntimeError; end # shelling out to git failed
# Turns out Ruby isn't very good about limiting the types of errors
# we need to handle... These are the ones that make sense to retry.
@@ -190,7 +190,7 @@ def retryable_errors
Errno::ETIMEDOUT, # Connection timed out - connect(2) (Errno::ETIMEDOUT)
OpenURI::HTTPError,
SocketError, # getaddrinfo: Name or service not known (SocketError)
- GitError,
+ GitRepo::GitError,
]
end
@@ -254,18 +254,6 @@ module Hpricot
end
-# Recursively convert a Hashie::Mash back to a plain hash
-#def convert_hash arg
- #if arg.kind_of? Hashie::Mash
- #hash = arg.to_hash
- #hash.each { |k,v| hash[k] = convert_hash v }
- #hash
- #else
- #arg
- #end
-#end
-
-
# There's a bizarre bug when passing a Hashie::Mash to JSON.pretty_generate.
# See https://github.com/bronson/whose-bug for an attempt to track it down.
# This routine just converts to plain hashes before generating.
@@ -521,46 +509,24 @@ end
def generate_docs
+ # wish we could use a bare repo to keep the docs but they don't support merging
doc_dir = 'vim-scraper.github.com'
puts "generating docs"
+ site = nil
unless test ?d, doc_dir
- # wish we could use a bare repo but apparently they don't support merging
- retryable(:task => " cloning vim-scraper.github.com") do |retries|
- output = `git clone git@github.com:vim-scraper/vim-scraper.github.com.git 2>&1`
- raise GitError.new("git clone failed: #{output}") unless $?.success?
- end
- Dir.chdir(doc_dir) {
- output = `git remote add vim-scripts git@github.com:vim-scraper/vim-scripts.git 2>&1`
- raise GitError.new("generate_docs: git remote add vim-scripts git@github.com:vim-scraper/vim-scripts.git failed: #{output}") unless $?.success?
- }
+ site = GitRepo.new :root => doc_dir, :clone => "git@github.com:vim-scraper/vim-scraper.github.com.git"
+ site.remote_add 'vim-scripts', "git@github.com:vim-scraper/vim-scripts.git"
end
- retryable(:task => " pulling from vim-scripts template") do |retries|
- Dir.chdir(doc_dir) {
- # Can we tell the difference between a network error, which we want to retry,
- # and a merge error, which we want to fail immediately?
- output = `git pull --no-rebase vim-scripts master 2>&1`
- raise GitError.new("generate_docs: git pull --no-rebase vim-scripts master failed: #{output}") unless $?.success?
- }
- end
+ site ||= GitRepo.new :root => doc_dir
+ site.pull 'vim-scripts', 'master'
updated_docs = generate_doc_files doc_dir
- Dir.chdir(doc_dir) {
- output = `git commit -a -m 'new scrape' 2>&1`
- if output =~ /nothing to commit/
- puts " no changes to generated files"
- else
- raise GitError.new("generate_docs: git commit -a -m 'new scrape' failed: #{output}") unless $?.success?
- end
- }
- retryable(:task => " pushing to vim-scraper.github.com") do |retries|
- Dir.chdir(doc_dir) {
- output = `git push origin master 2>&1`
- raise "generate_docs: git push origin master failed: #{output}" unless $?.success?
- }
- end
+ # todo: get rid of commit_all, only add and commit updated_docs
+ site.commit_all 'new scrape'
+ site.push 'origin', 'master'
end
Please sign in to comment.
Something went wrong with that request. Please try again.