From f5e530d205702d158ec53e34384e6b78fe70bcb0 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Wed, 20 May 2015 21:05:55 +0200 Subject: [PATCH 01/64] update development gems --- Gemfile | 11 ++++------- Gemfile.lock | 46 +++++++++++++++++++--------------------------- unipept.gemspec | 27 ++++++++++++--------------- 3 files changed, 35 insertions(+), 49 deletions(-) diff --git a/Gemfile b/Gemfile index 3b455e9d..dbc19dfc 100644 --- a/Gemfile +++ b/Gemfile @@ -6,12 +6,9 @@ source "http://rubygems.org" gem "cri", "~> 2.7" gem "typhoeus", "~> 0.6" -# Add dependencies to develop your gem here. -# Include everything needed to run rake, tests, features, etc. group :development do - gem "shoulda", "~> 3.5" - gem "rdoc", "~> 3.12" - gem "bundler", "~> 1.0" - gem "jeweler", "~> 2.0" - gem "simplecov", "~> 0.8" + gem 'rake' + gem 'minitest' + gem 'rubocop' + gem 'jeweler' end diff --git a/Gemfile.lock b/Gemfile.lock index b41b4287..7356dedb 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,20 +1,16 @@ GEM remote: http://rubygems.org/ specs: - activesupport (4.2.1) - i18n (~> 0.7) - json (~> 1.7, >= 1.7.7) - minitest (~> 5.1) - thread_safe (~> 0.3, >= 0.3.4) - tzinfo (~> 1.1) addressable (2.3.8) + ast (2.0.0) + astrolabe (1.3.0) + parser (>= 2.2.0.pre.3, < 3.0) builder (3.2.2) colored (1.2) cri (2.7.0) colored (~> 1.2) descendants_tracker (0.0.4) thread_safe (~> 0.3, >= 0.3.1) - docile (1.1.5) ethon (0.7.3) ffi (>= 1.3.0) faraday (0.9.1) @@ -31,7 +27,6 @@ GEM oauth2 hashie (3.4.1) highline (1.7.2) - i18n (0.7.0) jeweler (2.0.1) builder bundler (>= 1.0) @@ -56,35 +51,32 @@ GEM multi_json (~> 1.3) multi_xml (~> 0.5) rack (~> 1.2) + parser (2.2.0.2) + ast (>= 1.1, < 3.0) + powerpack (0.0.9) rack (1.6.0) + rainbow (2.0.0) rake (10.4.2) - rdoc (3.12.2) + rdoc (4.2.0) json (~> 1.4) - shoulda (3.5.0) - shoulda-context (~> 1.0, >= 1.0.1) - shoulda-matchers (>= 1.4.1, < 3.0) - shoulda-context (1.2.1) - shoulda-matchers (2.8.0) - activesupport (>= 3.0.0) - simplecov (0.10.0) - docile (~> 1.1.0) - json (~> 1.8) - simplecov-html (~> 0.10.0) - simplecov-html (0.10.0) + rubocop (0.28.0) + astrolabe (~> 1.3) + parser (>= 2.2.0.pre.7, < 3.0) + powerpack (~> 0.0.6) + rainbow (>= 1.99.1, < 3.0) + ruby-progressbar (~> 1.4) + ruby-progressbar (1.7.1) thread_safe (0.3.5) typhoeus (0.7.1) ethon (>= 0.7.1) - tzinfo (1.2.2) - thread_safe (~> 0.1) PLATFORMS ruby DEPENDENCIES - bundler (~> 1.0) cri (~> 2.7) - jeweler (~> 2.0) - rdoc (~> 3.12) - shoulda (~> 3.5) - simplecov (~> 0.8) + jeweler + minitest + rake + rubocop typhoeus (~> 0.6) diff --git a/unipept.gemspec b/unipept.gemspec index 8a35ebaa..1294a6f0 100644 --- a/unipept.gemspec +++ b/unipept.gemspec @@ -59,28 +59,25 @@ Gem::Specification.new do |s| if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then s.add_runtime_dependency(%q, ["~> 2.7"]) s.add_runtime_dependency(%q, ["~> 0.6"]) - s.add_development_dependency(%q, ["~> 3.5"]) - s.add_development_dependency(%q, ["~> 3.12"]) - s.add_development_dependency(%q, ["~> 1.0"]) - s.add_development_dependency(%q, ["~> 2.0"]) - s.add_development_dependency(%q, ["~> 0.8"]) + s.add_development_dependency(%q, [">= 0"]) + s.add_development_dependency(%q, [">= 0"]) + s.add_development_dependency(%q, [">= 0"]) + s.add_development_dependency(%q, [">= 0"]) else s.add_dependency(%q, ["~> 2.7"]) s.add_dependency(%q, ["~> 0.6"]) - s.add_dependency(%q, ["~> 3.5"]) - s.add_dependency(%q, ["~> 3.12"]) - s.add_dependency(%q, ["~> 1.0"]) - s.add_dependency(%q, ["~> 2.0"]) - s.add_dependency(%q, ["~> 0.8"]) + s.add_dependency(%q, [">= 0"]) + s.add_dependency(%q, [">= 0"]) + s.add_dependency(%q, [">= 0"]) + s.add_dependency(%q, [">= 0"]) end else s.add_dependency(%q, ["~> 2.7"]) s.add_dependency(%q, ["~> 0.6"]) - s.add_dependency(%q, ["~> 3.5"]) - s.add_dependency(%q, ["~> 3.12"]) - s.add_dependency(%q, ["~> 1.0"]) - s.add_dependency(%q, ["~> 2.0"]) - s.add_dependency(%q, ["~> 0.8"]) + s.add_dependency(%q, [">= 0"]) + s.add_dependency(%q, [">= 0"]) + s.add_dependency(%q, [">= 0"]) + s.add_dependency(%q, [">= 0"]) end end From 75df967f1588da1a9c1367a467744bed351701df Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Wed, 20 May 2015 21:19:29 +0200 Subject: [PATCH 02/64] set up working tests --- Gemfile | 6 +++--- Rakefile | 21 +++++++++++---------- test/helper.rb | 34 +++++++++++----------------------- test/test_base.rb | 6 ++++++ test/test_unipept.rb | 7 ------- 5 files changed, 31 insertions(+), 43 deletions(-) create mode 100644 test/test_base.rb delete mode 100644 test/test_unipept.rb diff --git a/Gemfile b/Gemfile index dbc19dfc..7360d711 100644 --- a/Gemfile +++ b/Gemfile @@ -1,10 +1,10 @@ -source "http://rubygems.org" +source 'http://rubygems.org' # Add dependencies required to use your gem here. # Example: # gem "activesupport", ">= 2.3.5" -gem "cri", "~> 2.7" -gem "typhoeus", "~> 0.6" +gem 'cri', '~> 2.7' +gem 'typhoeus', '~> 0.6' group :development do gem 'rake' diff --git a/Rakefile b/Rakefile index 0efb2505..7a72e77d 100644 --- a/Rakefile +++ b/Rakefile @@ -10,6 +10,8 @@ rescue Bundler::BundlerError => e exit e.status_code end require 'rake' +require 'rake/testtask' +require 'rubocop/rake_task' require 'jeweler' Jeweler::Tasks.new do |gem| @@ -30,19 +32,18 @@ Jeweler::Tasks.new do |gem| end Jeweler::RubygemsDotOrgTasks.new -require 'rake/testtask' -Rake::TestTask.new(:test) do |test| - test.libs << 'lib' << 'test' - test.pattern = 'test/**/test_*.rb' - test.verbose = true -end +task :test_unit do + require './test/helper.rb' -desc 'Code coverage detail' -task :simplecov do - ENV['COVERAGE'] = 'true' - Rake::Task['test'].execute + FileList['./test/**/test_*.rb', './test/**/*_spec.rb'].each do |fn| + require fn + end end +RuboCop::RakeTask.new(:test_style) + +task test: [:test_unit, :test_style] + task default: :test require 'rdoc/task' diff --git a/test/helper.rb b/test/helper.rb index c4fd95a7..b8ce983a 100644 --- a/test/helper.rb +++ b/test/helper.rb @@ -1,34 +1,22 @@ -require 'simplecov' - -module SimpleCov::Configuration - def clean_filters - @filters = [] - end -end - -SimpleCov.configure do - clean_filters - load_adapter 'test_frameworks' -end - -ENV["COVERAGE"] && SimpleCov.start do - add_filter "/.rvm/" -end require 'rubygems' require 'bundler' begin Bundler.setup(:default, :development) rescue Bundler::BundlerError => e $stderr.puts e.message - $stderr.puts "Run `bundle install` to install missing gems" + $stderr.puts 'Run `bundle install` to install missing gems' exit e.status_code end -require 'test/unit' -require 'shoulda' -$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) -$LOAD_PATH.unshift(File.dirname(__FILE__)) -require 'unipept' +require 'minitest' +require 'minitest/autorun' + +module Unipept + class TestCase < Minitest::Test + # add helper methods here -class Test::Unit::TestCase + end end + +# Unexpected system exit is unexpected +::MiniTest::Unit::TestCase::PASSTHROUGH_EXCEPTIONS.delete(SystemExit) diff --git a/test/test_base.rb b/test/test_base.rb new file mode 100644 index 00000000..3bbf8a76 --- /dev/null +++ b/test/test_base.rb @@ -0,0 +1,6 @@ +module Unipept + class BaseTestCase < Unipept::TestCase + def test_stub + end + end +end diff --git a/test/test_unipept.rb b/test/test_unipept.rb deleted file mode 100644 index a2a890ab..00000000 --- a/test/test_unipept.rb +++ /dev/null @@ -1,7 +0,0 @@ -require 'helper' - -class TestUnipept < Test::Unit::TestCase - should "probably rename this file and start testing for real" do - flunk "hey buddy, you should probably rename this file and start testing for real" - end -end From 1708e7c3dbb81c983558318ce22277baf203670a Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Wed, 20 May 2015 22:52:56 +0200 Subject: [PATCH 03/64] configure rubocop and fix some style issues --- bin/prot2pept | 26 ++++++++-------- bin/unipept | 2 +- bin/uniprot | 22 +++++++------- lib/unipept/batch_order.rb | 4 --- lib/unipept/commands.rb | 8 ++--- lib/unipept/commands/api_runner.rb | 20 ++++++------ lib/unipept/commands/pept2lca.rb | 1 - lib/unipept/commands/pept2prot.rb | 11 +++---- lib/unipept/commands/taxa2lca.rb | 2 +- lib/unipept/configuration.rb | 6 ++-- lib/unipept/formatters.rb | 49 ++++++++++++------------------ test/helper.rb | 1 - 12 files changed, 65 insertions(+), 87 deletions(-) diff --git a/bin/prot2pept b/bin/prot2pept index 50ebeca4..f60c7420 100755 --- a/bin/prot2pept +++ b/bin/prot2pept @@ -1,12 +1,12 @@ #!/usr/bin/env ruby require 'cri' -Signal.trap("PIPE", "EXIT") -Signal.trap("INT", "EXIT") +Signal.trap('PIPE', 'EXIT') +Signal.trap('INT', 'EXIT') root_cmd = Cri::Command.new_basic_root.modify do - name 'prot2pept' - summary 'Split protein sequences into peptides.' - usage 'prot2pept [options]' + name 'prot2pept' + summary 'Split protein sequences into peptides.' + usage 'prot2pept [options]' description <<-EOS The prot2pept command splits each protein sequence into a list of peptides according to a given cleavage-pattern. The command expects a list of protein sequences that are passed @@ -20,30 +20,30 @@ root_cmd = Cri::Command.new_basic_root.modify do EOS required :p, :pattern, 'specify cleavage-pattern (regex) as the pattern after which the next peptide will be cleaved (default: ([KR])([^P]) for tryptic peptides).' - run do |opts, args, cmd| - pattern = opts.fetch(:pattern, "([KR])([^P])") + run do |opts, _args, _cmd| + pattern = opts.fetch(:pattern, '([KR])([^P])') # decide if we have FASTA input fasta_header = $stdin.gets if fasta_header.start_with? '>' # fasta input, need to join lines - while !$stdin.eof? - prot = "" + until $stdin.eof? + prot = '' # Sometimes you just got to accept this weird and ugly code until $stdin.eof? || (line = gets).start_with?('>') prot += line.chomp end puts fasta_header - puts prot.gsub(/#{pattern}/,"\\1\n\\2").gsub(/#{pattern}/, "\\1\n\\2").split("\n").reject(&:empty?) + puts prot.gsub(/#{pattern}/, "\\1\n\\2").gsub(/#{pattern}/, "\\1\n\\2").split("\n").reject(&:empty?) fasta_header = line end else # handle our already read line - puts fasta_header.gsub(/#{pattern}/,"\\1\n\\2").gsub(/#{pattern}/, "\\1\n\\2").split("\n").reject(&:empty?) + puts fasta_header.gsub(/#{pattern}/, "\\1\n\\2").gsub(/#{pattern}/, "\\1\n\\2").split("\n").reject(&:empty?) # we no longer have to join lines as input is now more sane - $stdin.each_line do |prot| - puts prot.gsub(/#{pattern}/,"\\1\n\\2").gsub(/#{pattern}/, "\\1\n\\2").split("\n").reject(&:empty?) + $stdin.each_line do |protein| + puts protein.gsub(/#{pattern}/, "\\1\n\\2").gsub(/#{pattern}/, "\\1\n\\2").split("\n").reject(&:empty?) end end end diff --git a/bin/unipept b/bin/unipept index fe6b84e5..230e9ca0 100755 --- a/bin/unipept +++ b/bin/unipept @@ -41,7 +41,7 @@ root_cmd = Cri::Command.new_basic_root.modify do run do |opts, _args, _cmd| if opts[:version] puts File.read(File.join(File.dirname(__FILE__), '..', 'VERSION')) - elsif + else root_cmd.run(['help']) end end diff --git a/bin/uniprot b/bin/uniprot index e0d1cfdd..10379a31 100755 --- a/bin/uniprot +++ b/bin/uniprot @@ -2,16 +2,16 @@ require 'typhoeus' require 'cri' -Signal.trap("PIPE", "EXIT") -Signal.trap("INT", "EXIT") +Signal.trap('PIPE', 'EXIT') +Signal.trap('INT', 'EXIT') -valid_formats = Set.new ["fasta", "txt", "xml", "rdf", "gff"] +valid_formats = Set.new %w(fasta txt xml rdf gff) def get_uniprot_entry(arg, format) if format.nil? || format.empty? resp = Typhoeus.get("http://www.uniprot.org/uniprot/#{arg}.fasta") if resp.success? - puts resp.response_body.lines.map(&:chomp)[1..-1].join("") + puts resp.response_body.lines.map(&:chomp)[1..-1].join('') end else # other format has been specified, just download and output @@ -23,9 +23,9 @@ def get_uniprot_entry(arg, format) end root_cmd = Cri::Command.new_basic_root.modify do - name 'uniprot' - summary 'Command line interface to Uniprot web services.' - usage 'uniprot [options]' + name 'uniprot' + summary 'Command line interface to Uniprot web services.' + usage 'uniprot [options]' description <<-EOS The uniprot command is a command line wrapper around the Uniprot web services. The command expects a list of Uniprot Accession Numbers that are passed @@ -39,10 +39,10 @@ root_cmd = Cri::Command.new_basic_root.modify do The uniprot command yields Uniprot records as output. EOS - required :f, :format, "specify output format (available: " + valid_formats.to_a.join(", ") + ")" - run do |opts, args, cmd| - if !opts[:format].nil? and !valid_formats.include? opts[:format] - $stderr.puts opts[:format] + " is not a valid output format. Available formats are: " + valid_formats.to_a.join(", ") + required :f, :format, 'specify output format (available: ' + valid_formats.to_a.join(', ') + ')' + run do |opts, args, _cmd| + if (!opts[:format].nil?) && (!valid_formats.include? opts[:format]) + $stderr.puts opts[:format] + ' is not a valid output format. Available formats are: ' + valid_formats.to_a.join(', ') exit 1 end iterator = args.empty? ? $stdin.each_line : args diff --git a/lib/unipept/batch_order.rb b/lib/unipept/batch_order.rb index 5f0d526c..824f673a 100644 --- a/lib/unipept/batch_order.rb +++ b/lib/unipept/batch_order.rb @@ -1,7 +1,5 @@ module Unipept - class BatchOrder - attr_reader :order def initialize @@ -22,7 +20,5 @@ def wait(i, &block) @order[i] = block end end - end - end diff --git a/lib/unipept/commands.rb b/lib/unipept/commands.rb index b9516dd6..0c08ddfd 100644 --- a/lib/unipept/commands.rb +++ b/lib/unipept/commands.rb @@ -1,7 +1,5 @@ -['pept2lca','pept2taxa','pept2prot','taxa2lca','taxonomy'].each do |cmd| - require_relative File.join('commands',cmd) +%w(pept2lca pept2taxa pept2prot taxa2lca taxonomy).each do |cmd| + require_relative File.join('commands', cmd) end -module Unipept - module Commands - end +module Unipept::Commands end diff --git a/lib/unipept/commands/api_runner.rb b/lib/unipept/commands/api_runner.rb index 5e088a79..f709a61c 100644 --- a/lib/unipept/commands/api_runner.rb +++ b/lib/unipept/commands/api_runner.rb @@ -68,13 +68,12 @@ def get_server_message return if options[:quiet] return unless STDOUT.tty? last_fetched = @configuration['last_fetch_date'] - if last_fetched.nil? || (last_fetched + 60 * 60 * 24) < Time.now - version = Unipept::VERSION - resp = Typhoeus.get(@message_url, params: { version: version }) - puts resp.body unless resp.body.chomp.empty? - @configuration['last_fetch_date'] = Time.now - @configuration.save - end + return unless last_fetched.nil? || (last_fetched + 60 * 60 * 24) < Time.now + version = Unipept::VERSION + resp = Typhoeus.get(@message_url, params: { version: version }) + puts resp.body unless resp.body.chomp.empty? + @configuration['last_fetch_date'] = Time.now + @configuration.save end def run @@ -187,10 +186,9 @@ def write_to_output(string) end def download_xml(result) - if options[:xml] - File.open(options[:xml] + '.xml', 'wb') do |f| - f.write Typhoeus.get("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=taxonomy&id=#{result.first.map { |h| h['taxon_id'] }.join(',')}&retmode=xml").response_body - end + return unless options[:xml] + File.open(options[:xml] + '.xml', 'wb') do |f| + f.write Typhoeus.get("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=taxonomy&id=#{result.first.map { |h| h['taxon_id'] }.join(',')}&retmode=xml").response_body end end diff --git a/lib/unipept/commands/pept2lca.rb b/lib/unipept/commands/pept2lca.rb index 46a4024f..385dcbe3 100644 --- a/lib/unipept/commands/pept2lca.rb +++ b/lib/unipept/commands/pept2lca.rb @@ -3,4 +3,3 @@ module Unipept::Commands class Pept2lca < ApiRunner end end - diff --git a/lib/unipept/commands/pept2prot.rb b/lib/unipept/commands/pept2prot.rb index f4e9097b..9d5b62a2 100644 --- a/lib/unipept/commands/pept2prot.rb +++ b/lib/unipept/commands/pept2prot.rb @@ -3,12 +3,11 @@ module Unipept::Commands class Pept2prot < ApiRunner def download_xml(result) - if options[:xml] - FileUtils.mkdir_p(options[:xml]) - result.first.each do |prot| - File.open(options[:xml] + "/#{prot['uniprot_id']}.xml", "wb") do |f| - f.write Typhoeus.get("http://www.uniprot.org/uniprot/#{prot['uniprot_id']}.xml").response_body - end + return unless options[:xml] + FileUtils.mkdir_p(options[:xml]) + result.first.each do |prot| + File.open(options[:xml] + "/#{prot['uniprot_id']}.xml", 'wb') do |f| + f.write Typhoeus.get("http://www.uniprot.org/uniprot/#{prot['uniprot_id']}.xml").response_body end end end diff --git a/lib/unipept/commands/taxa2lca.rb b/lib/unipept/commands/taxa2lca.rb index 12b62384..8aebccb5 100644 --- a/lib/unipept/commands/taxa2lca.rb +++ b/lib/unipept/commands/taxa2lca.rb @@ -6,7 +6,7 @@ def peptide_iterator(peptides, &block) end def batch_size - raise "NOT NEEDED FOR TAXA2LCA" + fail 'NOT NEEDED FOR TAXA2LCA' end end end diff --git a/lib/unipept/configuration.rb b/lib/unipept/configuration.rb index 5ba37914..38705c4d 100644 --- a/lib/unipept/configuration.rb +++ b/lib/unipept/configuration.rb @@ -1,12 +1,11 @@ module Unipept class Configuration - attr_reader :config attr_reader :file_name def initialize - @file_name = File.join(Dir.home, ".unipeptrc") - if !File.exists? file_name + @file_name = File.join(Dir.home, '.unipeptrc') + if !File.exist? file_name @config = {} else @config = YAML.load_file file_name @@ -24,6 +23,5 @@ def [](*args) def []=(*args) config.[]=(*args) end - end end diff --git a/lib/unipept/formatters.rb b/lib/unipept/formatters.rb index 9cdc1d77..b1550383 100644 --- a/lib/unipept/formatters.rb +++ b/lib/unipept/formatters.rb @@ -1,36 +1,33 @@ module Unipept class Formatter - def self.formatters @@formatters ||= {} end def self.new_for_format(format) - begin - formatters[format].new - rescue - formatters[self.default].new - end + formatters[format].new + rescue + formatters[default].new end def self.register(format) - self.formatters[format.to_s] = self + formatters[format.to_s] = self end def self.available - self.formatters.keys + formatters.keys end def self.default 'csv' end - def header(sample_data, fasta_mapper = nil) - "" + def header(_sample_data, _fasta_mapper = nil) + '' end # JSON formatted data goes in, something other comes out - def format(data, fasta_mapper = nil) + def format(data, _fasta_mapper = nil) data end end @@ -39,11 +36,10 @@ class JSONFormatter < Formatter require 'json' register :json - def format(data, fasta_mapper = nil) + def format(data, _fasta_mapper = nil) # TODO: add fasta header based on fasta_mapper information data.to_json end - end class CSVFormatter < Formatter require 'csv' @@ -53,7 +49,7 @@ class CSVFormatter < Formatter def header(data, fasta_input = nil) CSV.generate do |csv| first = data.first - if first.kind_of? Array + if first.is_a? Array first = first.first end if fasta_input @@ -66,7 +62,6 @@ def header(data, fasta_input = nil) def format(data, fasta_input = nil) CSV.generate do |csv| - if fasta_input # Process the output from {key1: value1, key2: value2, ...} # to {value => {key1: value1, key2: value2, ...}} @@ -80,18 +75,18 @@ def format(data, fasta_input = nil) fasta_input.each do |input_pair| fasta_header, id = input_pair + next if data_dict[id].nil? + # Retrieve the corresponding API result (if any) - unless data_dict[id].nil? - data_dict[id].each do |r| - csv << ([fasta_header] + r.values).map { |v| v == "" ? nil : v } - end + data_dict[id].each do |r| + csv << ([fasta_header] + r.values).map { |v| v == '' ? nil : v } end end else data.each do |o| - csv << o.values.map { |v| v == "" ? nil : v } + csv << o.values.map { |v| v == '' ? nil : v } end end @@ -100,36 +95,32 @@ def format(data, fasta_input = nil) end class XMLFormatter < Formatter - # Monkey patch (do as to_xml, but saner) class ::Object def to_xml(name = nil) - name ? %{<#{name}>#{self.to_s}} : self.to_s + name ? %(<#{name}>#{self}) : to_s end end class ::Array - def to_xml(array_name = :array, item_name = :item) - %|<#{array_name} size="#{self.size}">| + self.map{|n|n.to_xml( :item )}.join+"" + def to_xml(array_name = :array, _item_name = :item) + %(<#{array_name} size="#{size}">) + map { |n|n.to_xml(:item) }.join + "" end end class ::Hash def to_xml(name = nil) - data = to_a.map{|k,v|v.to_xml(k)}.join + data = to_a.map { |k, v|v.to_xml(k) }.join name ? "<#{name}>#{data}" : data end end register :xml - def format(data, fasta_mapper = nil) + def format(data, _fasta_mapper = nil) # TODO: add fasta header based on fasta_mapper information data.to_xml end - end - - end diff --git a/test/helper.rb b/test/helper.rb index b8ce983a..7524da9d 100644 --- a/test/helper.rb +++ b/test/helper.rb @@ -14,7 +14,6 @@ module Unipept class TestCase < Minitest::Test # add helper methods here - end end From 2fd74a59b4a54e3513ec07163915c3677464aa7b Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Thu, 21 May 2015 10:53:14 +0200 Subject: [PATCH 04/64] actually add rubycop config --- .rubocop.yml | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 .rubocop.yml diff --git a/.rubocop.yml b/.rubocop.yml new file mode 100644 index 00000000..d39925be --- /dev/null +++ b/.rubocop.yml @@ -0,0 +1,31 @@ +AllCops: + Exclude: + - 'unipept.gemspec' + +Style/ClassAndModuleChildren: + EnforcedStyle: compact + Enabled: false + +# disable for now +Style/AccessorMethodName: + Enabled: false +Style/ClassVars: + Enabled: false +Style/Documentation: + Enabled: false +Style/IfUnlessModifier: + Enabled: false +Style/RescueModifier: + Enabled: false +Metrics/AbcSize: + Enabled: false +Metrics/ClassLength: + Enabled: false +Metrics/CyclomaticComplexity: + Enabled: false +Metrics/LineLength: + Enabled: false +Metrics/MethodLength: + Enabled: false +Metrics/PerceivedComplexity: + Enabled: false From 6c661e0e7ecba15f278f54f9b72e5a45901307a5 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Thu, 21 May 2015 10:53:28 +0200 Subject: [PATCH 05/64] add travis config --- .travis.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..bc95718b --- /dev/null +++ b/.travis.yml @@ -0,0 +1,10 @@ +language: ruby +rvm: + - 2.2.2 + - 2.1.6 + - 2.0.0 + - 1.9.3 + - jruby-19mode + - rbx-2.2.2 + - rbx-2.1.6 + - rbx-1.9.3 From 0f7ff6a7f0f54c4aec7ad9ca15f179f4809c078e Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Thu, 21 May 2015 11:01:46 +0200 Subject: [PATCH 06/64] exclude the vendor dir --- .rubocop.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.rubocop.yml b/.rubocop.yml index d39925be..cd3360f1 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -1,6 +1,9 @@ AllCops: + Include: + - Exclude: - 'unipept.gemspec' + - 'vendor/*' Style/ClassAndModuleChildren: EnforcedStyle: compact From a654f096f7851cf82e18243c5c96f22f87a826ce Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Thu, 21 May 2015 11:41:02 +0200 Subject: [PATCH 07/64] tweak travis --- .rubocop.yml | 2 +- .travis.yml | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.rubocop.yml b/.rubocop.yml index cd3360f1..e3590e55 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -3,7 +3,7 @@ AllCops: - Exclude: - 'unipept.gemspec' - - 'vendor/*' + - 'vendor/**/*' Style/ClassAndModuleChildren: EnforcedStyle: compact diff --git a/.travis.yml b/.travis.yml index bc95718b..ade0c846 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,6 +5,3 @@ rvm: - 2.0.0 - 1.9.3 - jruby-19mode - - rbx-2.2.2 - - rbx-2.1.6 - - rbx-1.9.3 From 00acc3bdaf785d38b7b2a384e64367264d5dd9e1 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Thu, 21 May 2015 11:45:25 +0200 Subject: [PATCH 08/64] add travis badge --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 464a0c98..adec4d2f 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,8 @@ # unipept-cli -[![Code Climate](https://codeclimate.com/github/unipept/unipept-cli/badges/gpa.svg)](https://codeclimate.com/github/unipept/unipept-cli) [![Gem Version](https://badge.fury.io/rb/unipept.svg)](http://badge.fury.io/rb/unipept) +[![Build Status](https://api.travis-ci.org/unipept/unipept-cli.svg)](https://travis-ci.org/unipept/unipept-cli) +[![Code Climate](https://codeclimate.com/github/unipept/unipept-cli/badges/gpa.svg)](https://codeclimate.com/github/unipept/unipept-cli) Unipept-cli offers a command line interface to the [Unipept](http://unipept.ugent.be) web service. Documentation about the web service can be found at [http://unipept.ugent.be/apidocs](http://unipept.ugent.be/apidocs). From 068ad2e5b8ecadd0c65cd14bc63450ddc949101e Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Sat, 6 Jun 2015 19:05:10 +0200 Subject: [PATCH 09/64] refactor peptfilter and add tests --- bin/peptfilter | 46 +------------ lib/peptfilter.rb | 125 +++++++++++++++++++++++++++++++++ test/helper.rb | 49 ++++++++++++- test/test_peptfilter.rb | 148 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 323 insertions(+), 45 deletions(-) create mode 100644 lib/peptfilter.rb create mode 100644 test/test_peptfilter.rb diff --git a/bin/peptfilter b/bin/peptfilter index 24e243d2..ca2a93e6 100755 --- a/bin/peptfilter +++ b/bin/peptfilter @@ -1,49 +1,7 @@ #!/usr/bin/env ruby -require 'cri' +require_relative '../lib/peptfilter' Signal.trap('PIPE', 'EXIT') Signal.trap('INT', 'EXIT') -root_cmd = Cri::Command.new_basic_root.modify do - name 'peptfilter' - summary 'Filter peptides based on specific criteria.' - usage 'peptfilter [options]' - description <<-EOS - The peptfilter command filters a list of peptides according to specific criteria. The command expects a list of peptides that are passed - - as separate command line arguments - - - in one or more text files that are passed as an argument to the -i option - - - to standard input - - The command will give priority to the first way peptides are passed, in the order as listed above. Text files and standard input should have one peptide per line. FASTA headers are preserved in the output, so that peptides remain bundled. - EOS - # flag :u, :unique, "filter duplicate peptides." - required nil, :minlen, 'only retain tryptic peptides that have at least min (default: 5) amino acids.' - required nil, :maxlen, 'only retain tryptic peptides that have at most max (default: 50) amino acids.' - required :l, :lacks, 'only retain tryptic peptides that lack all amino acids from the string of residues.' - required :c, :contains, 'only retain tryptic peptides that contain all amino acids from the string of residues.' - run do |opts, _args, _cmd| - minlen = opts.fetch(:minlen, '5').to_i - maxlen = opts.fetch(:maxlen, '50').to_i - lacks = opts.fetch(:lacks, '').chars.to_a - contains = opts.fetch(:contains, '').chars.to_a - $stdin.each_line do |pept| - # FASTA headers - if pept.start_with? '>' - puts pept - next - end - pept = pept.chomp - length_ok = pept.length >= minlen && pept.length <= maxlen - lacks_ok = (pept.chars.to_a & lacks).size == 0 - contains_ok = (pept.chars.to_a & contains).size == contains.size - - if length_ok && lacks_ok && contains_ok - puts pept - end - end - end -end - -root_cmd.run(ARGV) +Unipept::Peptfilter.run(ARGV) diff --git a/lib/peptfilter.rb b/lib/peptfilter.rb new file mode 100644 index 00000000..5c0e51aa --- /dev/null +++ b/lib/peptfilter.rb @@ -0,0 +1,125 @@ +require 'cri' + +module Unipept + class Peptfilter + @root_command = Cri::Command.new_basic_root.modify do + name 'peptfilter' + summary 'Filter peptides based on specific criteria.' + usage 'peptfilter [options]' + description <<-EOS + The peptfilter command filters a list of peptides according to specific criteria. The command expects a list of peptides that are passed + + - as separate command line arguments + + - in one or more text files that are passed as an argument to the -i option + + - to standard input + + The command will give priority to the first way peptides are passed, in the order as listed above. Text files and standard input should have one peptide per line. FASTA headers are preserved in the output, so that peptides remain bundled. + EOS + # flag :u, :unique, "filter duplicate peptides." + required nil, :minlen, 'only retain tryptic peptides that have at least min (default: 5) amino acids.' + required nil, :maxlen, 'only retain tryptic peptides that have at most max (default: 50) amino acids.' + required :l, :lacks, 'only retain tryptic peptides that lack all amino acids from the string of residues.' + required :c, :contains, 'only retain tryptic peptides that contain all amino acids from the string of residues.' + run do |opts, _args, _cmd| + minlen = opts.fetch(:minlen, '5').to_i + maxlen = opts.fetch(:maxlen, '50').to_i + lacks = opts.fetch(:lacks, '').chars.to_a + contains = opts.fetch(:contains, '').chars.to_a + $stdin.each_line do |pept| + # FASTA headers + if pept.start_with? '>' + puts pept + next + end + + pept = pept.chomp + if Peptfilter.filter(pept, minlen, maxlen, lacks, contains) + puts pept + end + end + end + end + + # Invokes the peptfilter command-line tool with the given arguments. + # + # @param [Array] args An array of command-line arguments + # + # @return [void] + def self.run(args) + root_command.run(args) + end + + # @return [Unipept::Command] The root command + class << self + attr_reader :root_command + end + + # Checks if a peptide satisfies the min length, max length, lacks and contains requirements. + # Returns true if + # - the peptide length is equal or higher than min + # - the peptide length is equal or lower than max + # - the peptide doesn't contain any of the amino acids in lacks + # - the peptide contains all of the amino acids in contains + # + # @param [String] peptide The peptide to check + # + # @param [Integer] min The minimal length requirement + # + # @param [Integer] max The maximal length requirement + # + # @param [Array] lacks The forbidden amino acids + # + # @param [Array] contains The required amino acids + # + # @return [Boolean] true if the peptide satisfies all requirements + def self.filter(peptide, min, max, lacks, contains) + filter_length(peptide, min, max) && + filter_lacks(peptide, lacks) && + filter_contains(peptide, contains) + end + + # Checks if a peptide satisfies the min length and max length requirements. + # Returns true if + # - the peptide length is equal or higher than min + # - the peptide length is equal or lower than max + # + # @param [String] peptide The peptide to check + # + # @param [Integer] min The minimal length requirement + # + # @param [Integer] max The maximal length requirement + # + # @return [Boolean] true if the peptide satisfies all requirements + def self.filter_length(peptide, min, max) + peptide.length >= min && peptide.length <= max + end + + # Checks if a peptide satisfies lacks requirement. + # Returns true if + # - the peptide doesn't contain any of the amino acids in lacks + # + # @param [String] peptide The peptide to check + # + # @param [Array] lacks The forbidden amino acids + # + # @return [Boolean] true if the peptide satisfies all requirements + def self.filter_lacks(peptide, lacks) + (peptide.chars.to_a & lacks).size == 0 + end + + # Checks if a peptide satisfies the contains requirement. + # Returns true if + # - the peptide contains all of the amino acids in contains + # + # @param [String] peptide The peptide to check + # + # @param [Array] contains The required amino acids + # + # @return [Boolean] true if the peptide satisfies all requirements + def self.filter_contains(peptide, contains) + (peptide.chars.to_a & contains).size == contains.size + end + end +end diff --git a/test/helper.rb b/test/helper.rb index 7524da9d..7ba46dd7 100644 --- a/test/helper.rb +++ b/test/helper.rb @@ -13,7 +13,54 @@ module Unipept class TestCase < Minitest::Test - # add helper methods here + def setup + @orig_io = capture_io + end + + def teardown + uncapture_io(*@orig_io) + end + + def capture_io_with_input(input, &block) + capture_io_while do + input = input.join("\n") if input.is_a? Array + $stdin.write(input) + $stdin.rewind + block.call + end + end + + def capture_io_while(&block) + orig_io = capture_io + block.call + [$stdout.string, $stderr.string] + ensure + uncapture_io(*orig_io) + end + + def lines(string) + string.scan(/^.*\n/).map(&:chomp) + end + + private + + def capture_io + orig_stdout = $stdout + orig_stderr = $stderr + orig_stdin = $stdin + + $stdout = StringIO.new + $stderr = StringIO.new + $stdin = StringIO.new + + [orig_stdout, orig_stderr, orig_stdin] + end + + def uncapture_io(orig_stdout, orig_stderr, orig_stdin) + $stdout = orig_stdout + $stderr = orig_stderr + $stdin = orig_stdin + end end end diff --git a/test/test_peptfilter.rb b/test/test_peptfilter.rb new file mode 100644 index 00000000..ea8230a3 --- /dev/null +++ b/test/test_peptfilter.rb @@ -0,0 +1,148 @@ +require_relative '../lib/peptfilter' + +module Unipept + class PeptfilterTestCase < Unipept::TestCase + def test_length_filter + # min length + assert(Peptfilter.filter_length('AALER', 4, 10)) + assert(Peptfilter.filter_length('AALER', 5, 10)) + assert(!Peptfilter.filter_length('AALER', 6, 10)) + + # max length + assert(!Peptfilter.filter_length('AALER', 1, 4)) + assert(Peptfilter.filter_length('AALER', 1, 5)) + assert(Peptfilter.filter_length('AALER', 1, 6)) + end + + def test_lacks_filter + assert(Peptfilter.filter_lacks('AALER', ''.chars.to_a)) + assert(Peptfilter.filter_lacks('AALER', 'BCD'.chars.to_a)) + assert(!Peptfilter.filter_lacks('AALER', 'A'.chars.to_a)) + assert(!Peptfilter.filter_lacks('AALER', 'AE'.chars.to_a)) + end + + def test_contains_filter + assert(Peptfilter.filter_contains('AALER', ''.chars.to_a)) + assert(Peptfilter.filter_contains('AALER', 'A'.chars.to_a)) + assert(Peptfilter.filter_contains('AALER', 'AE'.chars.to_a)) + assert(!Peptfilter.filter_contains('AALER', 'BCD'.chars.to_a)) + assert(!Peptfilter.filter_contains('AALER', 'AB'.chars.to_a)) + end + + def test_filter + assert(Peptfilter.filter('AALTER', 4, 10, 'BCD'.chars.to_a, 'AL'.chars.to_a)) + assert(!Peptfilter.filter('AALTER', 7, 10, 'BCD.chars.to_a', 'AL'.chars.to_a)) + assert(!Peptfilter.filter('AALTER', 4, 5, 'BCD'.chars.to_a, 'AL'.chars.to_a)) + assert(!Peptfilter.filter('AALTER', 4, 10, 'ABC'.chars.to_a, 'AL'.chars.to_a)) + assert(!Peptfilter.filter('AALTER', 4, 10, 'BCD'.chars.to_a, 'ALC'.chars.to_a)) + end + + def test_default_min_length_argument + out, _err = capture_io_with_input('A' * 6) do + Peptfilter.run(%w()) + end + assert_equal('A' * 6, out.chomp) + out, _err = capture_io_with_input('A' * 5) do + Peptfilter.run(%w()) + end + assert_equal('A' * 5, out.chomp) + out, _err = capture_io_with_input('A' * 4) do + Peptfilter.run(%w()) + end + assert_equal('', out.chomp) + end + + def test_default_max_length_argument + out, _err = capture_io_with_input('A' * 49) do + Peptfilter.run(%w()) + end + assert_equal('A' * 49, out.chomp) + out, _err = capture_io_with_input('A' * 50) do + Peptfilter.run(%w()) + end + assert_equal('A' * 50, out.chomp) + out, _err = capture_io_with_input('A' * 51) do + Peptfilter.run(%w()) + end + assert_equal('', out.chomp) + end + + def test_with_min_argument + out, _err = capture_io_with_input('A' * 6) do + Peptfilter.run(%w(--minlen 7)) + end + assert_equal('', out.chomp) + out, _err = capture_io_with_input('A' * 4) do + Peptfilter.run(%w(--minlen 3)) + end + assert_equal('A' * 4, out.chomp) + end + + def test_with_max_argument + out, _err = capture_io_with_input('A' * 45) do + Peptfilter.run(%w(--maxlen 40)) + end + assert_equal('', out.chomp) + out, _err = capture_io_with_input('A' * 55) do + Peptfilter.run(%w(--maxlen 60)) + end + assert_equal('A' * 55, out.chomp) + end + + def test_with_lacks_argument + out, _err = capture_io_with_input('A' * 10) do + Peptfilter.run(%w(--lacks B)) + end + assert_equal('A' * 10, out.chomp) + out, _err = capture_io_with_input('A' * 10) do + Peptfilter.run(%w(-l B)) + end + assert_equal('A' * 10, out.chomp) + out, _err = capture_io_with_input('A' * 10) do + Peptfilter.run(%w(--lacks A)) + end + assert_equal('', out.chomp) + out, _err = capture_io_with_input('A' * 10) do + Peptfilter.run(%w(-l A)) + end + assert_equal('', out.chomp) + end + + def test_with_contains_argument + out, _err = capture_io_with_input('A' * 10) do + Peptfilter.run(%w(--contains A)) + end + assert_equal('A' * 10, out.chomp) + out, _err = capture_io_with_input('A' * 10) do + Peptfilter.run(%w(-c A)) + end + assert_equal('A' * 10, out.chomp) + out, _err = capture_io_with_input('A' * 10) do + Peptfilter.run(%w(--contains B)) + end + assert_equal('', out.chomp) + out, _err = capture_io_with_input('A' * 10) do + Peptfilter.run(%w(-c B)) + end + assert_equal('', out.chomp) + end + + def test_fasta_input + out, _err = capture_io_with_input('>') do + Peptfilter.run(%w()) + end + assert_equal('>', out.chomp) + out, _err = capture_io_with_input(['>', 'A', 'AALTER', '>']) do + Peptfilter.run(%w()) + end + assert_equal(">\nAALTER\n>", out.chomp) + end + + def test_normal_input + out, _err = capture_io_with_input(['A', 'A' * 11, 'AAAAB', 'BBBBB', 'CCCCC', 'CCCCCA']) do + Peptfilter.run(%w(--minlen 4 --maxlen 10 --lacks B --contains A)) + end + assert_equal('CCCCCA', out.chomp) + end + end +end From 8dafc02c05281093e49a8c3bd5648d9aff8dbe76 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Sun, 7 Jun 2015 11:27:05 +0200 Subject: [PATCH 10/64] add coveralls --- Gemfile | 1 + Gemfile.lock | 22 ++++++++++++++++++++++ test/helper.rb | 3 +++ 3 files changed, 26 insertions(+) diff --git a/Gemfile b/Gemfile index 7360d711..c2ae45da 100644 --- a/Gemfile +++ b/Gemfile @@ -11,4 +11,5 @@ group :development do gem 'minitest' gem 'rubocop' gem 'jeweler' + gem 'coveralls', require: false end diff --git a/Gemfile.lock b/Gemfile.lock index 7356dedb..ff7b56ff 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -7,10 +7,17 @@ GEM parser (>= 2.2.0.pre.3, < 3.0) builder (3.2.2) colored (1.2) + coveralls (0.7.1) + multi_json (~> 1.3) + rest-client + simplecov (>= 0.7) + term-ansicolor + thor cri (2.7.0) colored (~> 1.2) descendants_tracker (0.0.4) thread_safe (~> 0.3, >= 0.3.1) + docile (1.1.5) ethon (0.7.3) ffi (>= 1.3.0) faraday (0.9.1) @@ -38,11 +45,13 @@ GEM rdoc json (1.8.2) jwt (1.4.1) + mime-types (2.4.3) mini_portile (0.6.2) minitest (5.6.1) multi_json (1.11.0) multi_xml (0.5.5) multipart-post (2.0.0) + netrc (0.9.0) nokogiri (1.6.6.2) mini_portile (~> 0.6.0) oauth2 (1.0.0) @@ -59,6 +68,9 @@ GEM rake (10.4.2) rdoc (4.2.0) json (~> 1.4) + rest-client (1.7.2) + mime-types (>= 1.16, < 3.0) + netrc (~> 0.7) rubocop (0.28.0) astrolabe (~> 1.3) parser (>= 2.2.0.pre.7, < 3.0) @@ -66,7 +78,16 @@ GEM rainbow (>= 1.99.1, < 3.0) ruby-progressbar (~> 1.4) ruby-progressbar (1.7.1) + simplecov (0.10.0) + docile (~> 1.1.0) + json (~> 1.8) + simplecov-html (~> 0.10.0) + simplecov-html (0.10.0) + term-ansicolor (1.3.0) + tins (~> 1.0) + thor (0.19.1) thread_safe (0.3.5) + tins (1.3.3) typhoeus (0.7.1) ethon (>= 0.7.1) @@ -74,6 +95,7 @@ PLATFORMS ruby DEPENDENCIES + coveralls cri (~> 2.7) jeweler minitest diff --git a/test/helper.rb b/test/helper.rb index 7ba46dd7..558dd3d7 100644 --- a/test/helper.rb +++ b/test/helper.rb @@ -1,5 +1,8 @@ require 'rubygems' require 'bundler' +require 'coveralls' +Coveralls.wear! + begin Bundler.setup(:default, :development) rescue Bundler::BundlerError => e From c08f614f108caf0ad33a6f0a5a2316fe8dbae55d Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Sun, 7 Jun 2015 12:16:35 +0200 Subject: [PATCH 11/64] refactor the uniprot command --- bin/uniprot | 53 ++--------------------------------- lib/peptfilter.rb | 19 ++++--------- lib/uniprot.rb | 71 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+), 65 deletions(-) create mode 100644 lib/uniprot.rb diff --git a/bin/uniprot b/bin/uniprot index 10379a31..35c00ac8 100755 --- a/bin/uniprot +++ b/bin/uniprot @@ -1,56 +1,7 @@ #!/usr/bin/env ruby -require 'typhoeus' -require 'cri' +require_relative '../lib/uniprot' Signal.trap('PIPE', 'EXIT') Signal.trap('INT', 'EXIT') -valid_formats = Set.new %w(fasta txt xml rdf gff) - -def get_uniprot_entry(arg, format) - if format.nil? || format.empty? - resp = Typhoeus.get("http://www.uniprot.org/uniprot/#{arg}.fasta") - if resp.success? - puts resp.response_body.lines.map(&:chomp)[1..-1].join('') - end - else - # other format has been specified, just download and output - resp = Typhoeus.get("http://www.uniprot.org/uniprot/#{arg}.#{format}") - if resp.success? - puts resp.response_body - end - end -end - -root_cmd = Cri::Command.new_basic_root.modify do - name 'uniprot' - summary 'Command line interface to Uniprot web services.' - usage 'uniprot [options]' - description <<-EOS - The uniprot command is a command line wrapper around the Uniprot web services. The command expects a list of Uniprot Accession Numbers that are passed - - - as separate command line arguments - - - in one or more text files that are passed as an argument to the -i option - - - to standard input - - The command will give priority to the first way Uniprot Accession Numbers are passed, in the order as listed above. Text files and standard input should have one Uniprot Accession Number per line. - - The uniprot command yields Uniprot records as output. - EOS - required :f, :format, 'specify output format (available: ' + valid_formats.to_a.join(', ') + ')' - run do |opts, args, _cmd| - if (!opts[:format].nil?) && (!valid_formats.include? opts[:format]) - $stderr.puts opts[:format] + ' is not a valid output format. Available formats are: ' + valid_formats.to_a.join(', ') - exit 1 - end - iterator = args.empty? ? $stdin.each_line : args - iterator.each do |pept| - get_uniprot_entry(pept.chomp, opts[:format]) - end - end -end - -# run_this runs this command without subcommands! Absolutely needed here -root_cmd.run_this(ARGV) +Unipept::Uniprot.run(ARGV) diff --git a/lib/peptfilter.rb b/lib/peptfilter.rb index 5c0e51aa..d2e1cd36 100644 --- a/lib/peptfilter.rb +++ b/lib/peptfilter.rb @@ -2,20 +2,16 @@ module Unipept class Peptfilter + attr_reader :root_command + @root_command = Cri::Command.new_basic_root.modify do name 'peptfilter' summary 'Filter peptides based on specific criteria.' usage 'peptfilter [options]' description <<-EOS - The peptfilter command filters a list of peptides according to specific criteria. The command expects a list of peptides that are passed - - - as separate command line arguments - - - in one or more text files that are passed as an argument to the -i option + The peptfilter command filters a list of peptides according to specific criteria. The command expects a list of peptides that are passed to standard input. - - to standard input - - The command will give priority to the first way peptides are passed, in the order as listed above. Text files and standard input should have one peptide per line. FASTA headers are preserved in the output, so that peptides remain bundled. + The input should have one peptide per line. FASTA headers are preserved in the output, so that peptides remain bundled. EOS # flag :u, :unique, "filter duplicate peptides." required nil, :minlen, 'only retain tryptic peptides that have at least min (default: 5) amino acids.' @@ -48,12 +44,7 @@ class Peptfilter # # @return [void] def self.run(args) - root_command.run(args) - end - - # @return [Unipept::Command] The root command - class << self - attr_reader :root_command + @root_command.run(args) end # Checks if a peptide satisfies the min length, max length, lacks and contains requirements. diff --git a/lib/uniprot.rb b/lib/uniprot.rb new file mode 100644 index 00000000..f69e2852 --- /dev/null +++ b/lib/uniprot.rb @@ -0,0 +1,71 @@ +require 'cri' +require 'typhoeus' + +module Unipept + class Uniprot + attr_reader :root_command + attr_reader :valid_formats + + valid_formats = Set.new %w(fasta txt xml rdf gff sequence) + @root_command = Cri::Command.new_basic_root.modify do + name 'uniprot' + summary 'Command line interface to Uniprot web services.' + usage 'uniprot [options]' + description <<-EOS + The uniprot command is a command line wrapper around the Uniprot web services. The command expects a list of Uniprot Accession Numbers that are passed + + - as separate command line arguments + + - to standard input + + The command will give priority to the first way Uniprot Accession Numbers are passed, in the order as listed above. The standard input should have one Uniprot Accession Number per line. + + The uniprot command yields just the protein sequences as a default, but can return several formats. + EOS + required :f, :format, 'specify output format (available: ' + valid_formats.to_a.join(', ') + ') (default: sequence)' + run do |opts, args, _cmd| + format = opts.fetch(:format, 'sequence') + unless valid_formats.include? format + $stderr.puts format + ' is not a valid output format. Available formats are: ' + valid_formats.to_a.join(', ') + exit 1 + end + iterator = args.empty? ? $stdin.each_line : args + iterator.each do |accession| + puts Uniprot.get_uniprot_entry(accession.chomp, format) + end + end + end + + # Invokes the uniprot command-line tool with the given arguments. + # + # @param [Array] args An array of command-line arguments + # + # @return [void] + def self.run(args) + @root_command.run(args) + end + + # Fetches a Uniprot record from the uniprot website with the given accession + # number in the requested format. + # + # @param [String] accession The accession number of the record to fetch + # + # @param [String] format The format of of the record. If the format is 'sequence', the sequence will be returned in as a single line + # + # @return [String] The requested Uniprot record in the requested format + def self.get_uniprot_entry(accession, format) + if format == 'sequence' + resp = Typhoeus.get("http://www.uniprot.org/uniprot/#{accession}.fasta") + if resp.success? + resp.response_body.lines.map(&:chomp)[1..-1].join('') + end + else + # other format has been specified, just download and output + resp = Typhoeus.get("http://www.uniprot.org/uniprot/#{accession}.#{format}") + if resp.success? + resp.response_body + end + end + end + end +end From 2098731f10f7bed77f614c382700d0808630dc40 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Sun, 7 Jun 2015 12:19:14 +0200 Subject: [PATCH 12/64] add coveralls badge --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index adec4d2f..656a33b3 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@ [![Gem Version](https://badge.fury.io/rb/unipept.svg)](http://badge.fury.io/rb/unipept) [![Build Status](https://api.travis-ci.org/unipept/unipept-cli.svg)](https://travis-ci.org/unipept/unipept-cli) +[![Coverage Status](https://coveralls.io/repos/unipept/unipept-cli/badge.svg)](https://coveralls.io/r/unipept/unipept-cli) [![Code Climate](https://codeclimate.com/github/unipept/unipept-cli/badges/gpa.svg)](https://codeclimate.com/github/unipept/unipept-cli) Unipept-cli offers a command line interface to the [Unipept](http://unipept.ugent.be) web service. From 68f4a49c2cb5168a3c5c0fe270ab1d9272dd190d Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Sun, 7 Jun 2015 13:26:34 +0200 Subject: [PATCH 13/64] prot2pept, peptfilter and uniprot should be simple commands --- bin/prot2pept | 2 +- lib/peptfilter.rb | 2 +- lib/uniprot.rb | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/prot2pept b/bin/prot2pept index f60c7420..46b71295 100755 --- a/bin/prot2pept +++ b/bin/prot2pept @@ -3,7 +3,7 @@ require 'cri' Signal.trap('PIPE', 'EXIT') Signal.trap('INT', 'EXIT') -root_cmd = Cri::Command.new_basic_root.modify do +root_cmd = Cri::Command.define do name 'prot2pept' summary 'Split protein sequences into peptides.' usage 'prot2pept [options]' diff --git a/lib/peptfilter.rb b/lib/peptfilter.rb index d2e1cd36..2f9d6155 100644 --- a/lib/peptfilter.rb +++ b/lib/peptfilter.rb @@ -4,7 +4,7 @@ module Unipept class Peptfilter attr_reader :root_command - @root_command = Cri::Command.new_basic_root.modify do + @root_command = Cri::Command.define do name 'peptfilter' summary 'Filter peptides based on specific criteria.' usage 'peptfilter [options]' diff --git a/lib/uniprot.rb b/lib/uniprot.rb index f69e2852..6e8b284e 100644 --- a/lib/uniprot.rb +++ b/lib/uniprot.rb @@ -7,7 +7,7 @@ class Uniprot attr_reader :valid_formats valid_formats = Set.new %w(fasta txt xml rdf gff sequence) - @root_command = Cri::Command.new_basic_root.modify do + @root_command = Cri::Command.define do name 'uniprot' summary 'Command line interface to Uniprot web services.' usage 'uniprot [options]' From 4b11d6843eaa95e015a728d734c23f6101e77352 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Sun, 7 Jun 2015 13:51:06 +0200 Subject: [PATCH 14/64] add tests for uniprot command --- test/test_peptfilter.rb | 13 ++++ test/test_uniprot.rb | 127 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 test/test_uniprot.rb diff --git a/test/test_peptfilter.rb b/test/test_peptfilter.rb index ea8230a3..0475f404 100644 --- a/test/test_peptfilter.rb +++ b/test/test_peptfilter.rb @@ -42,10 +42,12 @@ def test_default_min_length_argument Peptfilter.run(%w()) end assert_equal('A' * 6, out.chomp) + out, _err = capture_io_with_input('A' * 5) do Peptfilter.run(%w()) end assert_equal('A' * 5, out.chomp) + out, _err = capture_io_with_input('A' * 4) do Peptfilter.run(%w()) end @@ -57,10 +59,12 @@ def test_default_max_length_argument Peptfilter.run(%w()) end assert_equal('A' * 49, out.chomp) + out, _err = capture_io_with_input('A' * 50) do Peptfilter.run(%w()) end assert_equal('A' * 50, out.chomp) + out, _err = capture_io_with_input('A' * 51) do Peptfilter.run(%w()) end @@ -72,6 +76,7 @@ def test_with_min_argument Peptfilter.run(%w(--minlen 7)) end assert_equal('', out.chomp) + out, _err = capture_io_with_input('A' * 4) do Peptfilter.run(%w(--minlen 3)) end @@ -83,6 +88,7 @@ def test_with_max_argument Peptfilter.run(%w(--maxlen 40)) end assert_equal('', out.chomp) + out, _err = capture_io_with_input('A' * 55) do Peptfilter.run(%w(--maxlen 60)) end @@ -94,14 +100,17 @@ def test_with_lacks_argument Peptfilter.run(%w(--lacks B)) end assert_equal('A' * 10, out.chomp) + out, _err = capture_io_with_input('A' * 10) do Peptfilter.run(%w(-l B)) end assert_equal('A' * 10, out.chomp) + out, _err = capture_io_with_input('A' * 10) do Peptfilter.run(%w(--lacks A)) end assert_equal('', out.chomp) + out, _err = capture_io_with_input('A' * 10) do Peptfilter.run(%w(-l A)) end @@ -113,14 +122,17 @@ def test_with_contains_argument Peptfilter.run(%w(--contains A)) end assert_equal('A' * 10, out.chomp) + out, _err = capture_io_with_input('A' * 10) do Peptfilter.run(%w(-c A)) end assert_equal('A' * 10, out.chomp) + out, _err = capture_io_with_input('A' * 10) do Peptfilter.run(%w(--contains B)) end assert_equal('', out.chomp) + out, _err = capture_io_with_input('A' * 10) do Peptfilter.run(%w(-c B)) end @@ -132,6 +144,7 @@ def test_fasta_input Peptfilter.run(%w()) end assert_equal('>', out.chomp) + out, _err = capture_io_with_input(['>', 'A', 'AALTER', '>']) do Peptfilter.run(%w()) end diff --git a/test/test_uniprot.rb b/test/test_uniprot.rb new file mode 100644 index 00000000..70b6ba4e --- /dev/null +++ b/test/test_uniprot.rb @@ -0,0 +1,127 @@ +require_relative '../lib/uniprot' + +module Unipept + class UniprotTestCase < Unipept::TestCase + def test_argument_input + out, _err = capture_io_while do + Uniprot.run(%w(Q6GZX3)) + end + assert_equal(1, out.split(/\n/).length) + + out, _err = capture_io_while do + Uniprot.run(%w(Q6GZX3 Q6GZX4)) + end + assert_equal(2, out.split(/\n/).length) + + out, _err = capture_io_while do + Uniprot.run(%w(-f fasta Q6GZX3 Q6GZX4)) + end + assert_equal(2, out.count('>')) + + out, _err = capture_io_while do + Uniprot.run(%w(--format fasta Q6GZX3 Q6GZX4)) + end + assert_equal(2, out.count('>')) + end + + def test_stdin_input + out, _err = capture_io_with_input('Q6GZX3') do + Uniprot.run(%w()) + end + assert_equal(1, out.split(/\n/).length) + + out, _err = capture_io_with_input(%w(Q6GZX3 Q6GZX4)) do + Uniprot.run(%w()) + end + assert_equal(2, out.split(/\n/).length) + + out, _err = capture_io_with_input(%w(Q6GZX3 Q6GZX4)) do + Uniprot.run(%w(-f fasta)) + end + assert_equal(2, out.count('>')) + + out, _err = capture_io_with_input(%w(Q6GZX3 Q6GZX4)) do + Uniprot.run(%w(--format fasta)) + end + assert_equal(2, out.count('>')) + end + + def test_argument_input_priority + out, _err = capture_io_with_input('Q6GZX3') do + Uniprot.run(%w(Q6GZX3 Q6GZX4)) + end + assert_equal(2, out.split(/\n/).length) + + out, _err = capture_io_with_input(%w(Q6GZX3 Q6GZX4)) do + Uniprot.run(%w(Q6GZX3)) + end + assert_equal(1, out.split(/\n/).length) + end + + def test_invalid_format + out, err = capture_io_while do + assert_raises SystemExit do + Uniprot.run(%w(--format xxx)) + end + end + assert_equal('', out) + assert(err.include? 'xxx is not a valid output format') + end + + def test_default_format + out_default, _err = capture_io_while do + Uniprot.run(%w(Q6GZX3)) + end + assert_equal(1, out_default.split(/\n/).length) + + out_sequence, _err = capture_io_while do + Uniprot.run(%w(-f sequence Q6GZX3)) + end + assert_equal(out_default, out_sequence) + + out_sequence, _err = capture_io_while do + Uniprot.run(%w(--format sequence Q6GZX3)) + end + assert_equal(out_default, out_sequence) + end + + def test_format_options + # fasta txt xml rdf gff sequence + out, err = capture_io_while do + Uniprot.run(%w(-f fasta Q6GZX3)) + end + assert(!out.empty?) + assert(err.empty?) + + out, err = capture_io_while do + Uniprot.run(%w(-f txt Q6GZX3)) + end + assert(!out.empty?) + assert(err.empty?) + + out, err = capture_io_while do + Uniprot.run(%w(-f xml Q6GZX3)) + end + assert(!out.empty?) + assert(err.empty?) + + out, err = capture_io_while do + Uniprot.run(%w(-f rdf Q6GZX3)) + end + assert(!out.empty?) + assert(err.empty?) + + out, err = capture_io_while do + Uniprot.run(%w(-f gff Q6GZX3)) + end + assert(!out.empty?) + assert(err.empty?) + + out, err = capture_io_while do + Uniprot.run(%w(-f sequence Q6GZX3)) + end + assert(!out.empty?) + assert(err.empty?) + end + end +end From 242d78633b4b95b228af2a48d07cdd80b7002d80 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Sun, 7 Jun 2015 14:52:05 +0200 Subject: [PATCH 15/64] fix help for uniprot and peptfilter --- lib/peptfilter.rb | 6 ++++++ lib/uniprot.rb | 4 ++++ test/test_peptfilter.rb | 9 +++++++++ test/test_uniprot.rb | 9 +++++++++ 4 files changed, 28 insertions(+) diff --git a/lib/peptfilter.rb b/lib/peptfilter.rb index 2f9d6155..afe3d4de 100644 --- a/lib/peptfilter.rb +++ b/lib/peptfilter.rb @@ -18,6 +18,10 @@ class Peptfilter required nil, :maxlen, 'only retain tryptic peptides that have at most max (default: 50) amino acids.' required :l, :lacks, 'only retain tryptic peptides that lack all amino acids from the string of residues.' required :c, :contains, 'only retain tryptic peptides that contain all amino acids from the string of residues.' + flag :h, :help, 'show help for this command' do |_value, cmd| + puts cmd.help + exit 0 + end run do |opts, _args, _cmd| minlen = opts.fetch(:minlen, '5').to_i maxlen = opts.fetch(:maxlen, '50').to_i @@ -38,6 +42,8 @@ class Peptfilter end end + @root_command.add_command(Cri::Command.new_basic_help) + # Invokes the peptfilter command-line tool with the given arguments. # # @param [Array] args An array of command-line arguments diff --git a/lib/uniprot.rb b/lib/uniprot.rb index 6e8b284e..6467358e 100644 --- a/lib/uniprot.rb +++ b/lib/uniprot.rb @@ -23,6 +23,10 @@ class Uniprot The uniprot command yields just the protein sequences as a default, but can return several formats. EOS required :f, :format, 'specify output format (available: ' + valid_formats.to_a.join(', ') + ') (default: sequence)' + flag :h, :help, 'show help for this command' do |_value, cmd| + puts cmd.help + exit 0 + end run do |opts, args, _cmd| format = opts.fetch(:format, 'sequence') unless valid_formats.include? format diff --git a/test/test_peptfilter.rb b/test/test_peptfilter.rb index 0475f404..1f71617f 100644 --- a/test/test_peptfilter.rb +++ b/test/test_peptfilter.rb @@ -157,5 +157,14 @@ def test_normal_input end assert_equal('CCCCCA', out.chomp) end + + def test_help + out, _err = capture_io_while do + assert_raises SystemExit do + Peptfilter.run(%w(-h)) + end + end + assert(out.include? 'show help for this command') + end end end diff --git a/test/test_uniprot.rb b/test/test_uniprot.rb index 70b6ba4e..4f5615e7 100644 --- a/test/test_uniprot.rb +++ b/test/test_uniprot.rb @@ -123,5 +123,14 @@ def test_format_options assert(!out.empty?) assert(err.empty?) end + + def test_help + out, _err = capture_io_while do + assert_raises SystemExit do + Peptfilter.run(%w(-h)) + end + end + assert(out.include? 'show help for this command') + end end end From fcdfef1265ca6b580a2a023877c1aac862d6ffdb Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Sun, 7 Jun 2015 16:08:55 +0200 Subject: [PATCH 16/64] refactor prot2pept --- bin/prot2pept | 49 ++----------------------------------- lib/prot2pept.rb | 63 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 47 deletions(-) create mode 100644 lib/prot2pept.rb diff --git a/bin/prot2pept b/bin/prot2pept index 46b71295..d52cae66 100755 --- a/bin/prot2pept +++ b/bin/prot2pept @@ -1,52 +1,7 @@ #!/usr/bin/env ruby -require 'cri' +require_relative '../lib/prot2pept' Signal.trap('PIPE', 'EXIT') Signal.trap('INT', 'EXIT') -root_cmd = Cri::Command.define do - name 'prot2pept' - summary 'Split protein sequences into peptides.' - usage 'prot2pept [options]' - description <<-EOS - The prot2pept command splits each protein sequence into a list of peptides according to a given cleavage-pattern. The command expects a list of protein sequences that are passed - - as separate command line arguments - - - in one or more text files that are passed as an argument to the -i option - - - to standard input - - The command will give priority to the first way protein sequences are passed, in the order as listed above. Text files and standard input should have either one protein sequence per line or contain a FASTA formatted list of protein sequences. FASTA headers are preserved in the output, so that peptides can be bundled per protein sequence. - - EOS - required :p, :pattern, 'specify cleavage-pattern (regex) as the pattern after which the next peptide will be cleaved (default: ([KR])([^P]) for tryptic peptides).' - run do |opts, _args, _cmd| - pattern = opts.fetch(:pattern, '([KR])([^P])') - # decide if we have FASTA input - fasta_header = $stdin.gets - if fasta_header.start_with? '>' - # fasta input, need to join lines - until $stdin.eof? - prot = '' - # Sometimes you just got to accept this weird and ugly code - until $stdin.eof? || (line = gets).start_with?('>') - prot += line.chomp - end - puts fasta_header - puts prot.gsub(/#{pattern}/, "\\1\n\\2").gsub(/#{pattern}/, "\\1\n\\2").split("\n").reject(&:empty?) - - fasta_header = line - end - else - # handle our already read line - puts fasta_header.gsub(/#{pattern}/, "\\1\n\\2").gsub(/#{pattern}/, "\\1\n\\2").split("\n").reject(&:empty?) - - # we no longer have to join lines as input is now more sane - $stdin.each_line do |protein| - puts protein.gsub(/#{pattern}/, "\\1\n\\2").gsub(/#{pattern}/, "\\1\n\\2").split("\n").reject(&:empty?) - end - end - end -end - -root_cmd.run(ARGV) +Unipept::Prot2pept.run(ARGV) diff --git a/lib/prot2pept.rb b/lib/prot2pept.rb new file mode 100644 index 00000000..9a83f21b --- /dev/null +++ b/lib/prot2pept.rb @@ -0,0 +1,63 @@ +require 'cri' + +module Unipept + class Prot2pept + attr_reader :root_command + attr_reader :valid_formats + + @root_command = Cri::Command.define do + name 'prot2pept' + summary 'Split protein sequences into peptides.' + usage 'prot2pept [options]' + description <<-EOS + The prot2pept command splits each protein sequence into a list of peptides according to a given cleavage-pattern. The command expects a list of protein sequences that are passed to standard input. + + The input should have either one protein sequence per line or contain a FASTA formatted list of protein sequences. FASTA headers are preserved in the output, so that peptides can be bundled per protein sequence. + + EOS + required :p, :pattern, 'specify cleavage-pattern (regex) as the pattern after which the next peptide will be cleaved (default: ([KR])([^P]) for tryptic peptides).' + flag :h, :help, 'show help for this command' do |_value, cmd| + puts cmd.help + exit 0 + end + run do |opts, _args, _cmd| + pattern = opts.fetch(:pattern, '([KR])([^P])') + + # decide if we have FASTA input + first_char = $stdin.getc + $stdin.ungetc(first_char) + if first_char == '>' + # fasta mode! + protein = '' + while (line = $stdin.gets) + if line.start_with? '>' + puts Prot2pept.split(protein, pattern) + protein = '' + puts line + else + protein += line.chomp + end + end + puts Prot2pept.split(protein, pattern) + else + $stdin.each_line do |prot| + puts Prot2pept.split(prot, pattern) + end + end + end + end + + def self.split(protein, pattern) + protein.gsub(/#{pattern}/, "\\1\n\\2").gsub(/#{pattern}/, "\\1\n\\2").split("\n").reject(&:empty?) + end + + # Invokes the uniprot command-line tool with the given arguments. + # + # @param [Array] args An array of command-line arguments + # + # @return [void] + def self.run(args) + @root_command.run(args) + end + end +end From 2e185fdbd141a70f7746b2b268c157d847442303 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Sun, 7 Jun 2015 16:14:22 +0200 Subject: [PATCH 17/64] fix test --- test/test_uniprot.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_uniprot.rb b/test/test_uniprot.rb index 4f5615e7..611e60a3 100644 --- a/test/test_uniprot.rb +++ b/test/test_uniprot.rb @@ -127,7 +127,7 @@ def test_format_options def test_help out, _err = capture_io_while do assert_raises SystemExit do - Peptfilter.run(%w(-h)) + Uniprot.run(%w(-h)) end end assert(out.include? 'show help for this command') From ea58c0edbf39e8f415d666b76e427c197fd1c285 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Sun, 7 Jun 2015 16:55:18 +0200 Subject: [PATCH 18/64] add tests for prot2pept --- test/test_prot2pept.rb | 82 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 test/test_prot2pept.rb diff --git a/test/test_prot2pept.rb b/test/test_prot2pept.rb new file mode 100644 index 00000000..e3e28212 --- /dev/null +++ b/test/test_prot2pept.rb @@ -0,0 +1,82 @@ +require_relative '../lib/prot2pept' + +module Unipept + class Prot2peptTestCase < Unipept::TestCase + def test_normal_input + out, _err = capture_io_with_input('AALTERAALTERPAALTER') do + Prot2pept.run(%w()) + end + assert_equal("AALTER\nAALTERPAALTER", out.chomp) + + out, _err = capture_io_with_input('KRKPR') do + Prot2pept.run(%w()) + end + assert_equal("K\nR\nKPR", out.chomp) + + out, _err = capture_io_with_input(%w(AALTERAALTERPAALTER AALTERAA)) do + Prot2pept.run(%w()) + end + assert_equal("AALTER\nAALTERPAALTER\nAALTER\nAA", out.chomp) + end + + def test_fasta_input + out, _err = capture_io_with_input(">AKA\nAALTERAALTERPAALTER") do + Prot2pept.run(%w()) + end + assert_equal(">AKA\nAALTER\nAALTERPAALTER", out.chomp) + + out, _err = capture_io_with_input(">AKA\nAAL\nT\nERAALTER\nP\nAALTER") do + Prot2pept.run(%w()) + end + assert_equal(">AKA\nAALTER\nAALTERPAALTER", out.chomp) + + out, _err = capture_io_with_input(">AKA\nAAL\nT\n>\nERAALTER\nP\nAALTER") do + Prot2pept.run(%w()) + end + assert_equal(">AKA\nAALT\n>\nER\nAALTERPAALTER", out.chomp) + end + + def test_default_pattern + default_out, _err = capture_io_with_input('AALTERAALTERPAALTER') do + Prot2pept.run(%w()) + end + assert_equal("AALTER\nAALTERPAALTER", default_out.chomp) + + pattern_out, _err = capture_io_with_input('AALTERAALTERPAALTER') do + Prot2pept.run(['-p', '([KR])([^P])']) + end + assert_equal(default_out, pattern_out) + + pattern_out, _err = capture_io_with_input('AALTERAALTERPAALTER') do + Prot2pept.run(['--pattern', '([KR])([^P])']) + end + assert_equal(default_out, pattern_out) + end + + def test_pattern + out, _err = capture_io_with_input('AALTERAALTERPAALTER') do + Prot2pept.run(%w()) + end + assert_equal("AALTER\nAALTERPAALTER", out.chomp) + + out, _err = capture_io_with_input('AALTERAALTERPAALTER') do + Prot2pept.run(%w(-p ([KR])([^A]))) + end + assert_equal("AALTERAALTER\nPAALTER", out.chomp) + + out, _err = capture_io_with_input('AALTERAALTERPAALTER') do + Prot2pept.run(%w(--pattern ([KR])([^A]))) + end + assert_equal("AALTERAALTER\nPAALTER", out.chomp) + end + + def test_help + out, _err = capture_io_while do + assert_raises SystemExit do + Prot2pept.run(%w(-h)) + end + end + assert(out.include? 'show help for this command') + end + end +end From b7ade4a24a311fa35bb4ee4e05261b6ec1c65f59 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Sun, 7 Jun 2015 17:28:35 +0200 Subject: [PATCH 19/64] move files around --- bin/peptfilter | 4 +- bin/prot2pept | 4 +- bin/uniprot | 4 +- lib/commands.rb | 9 +++ lib/{ => commands}/peptfilter.rb | 4 +- lib/{ => commands}/prot2pept.rb | 4 +- lib/{ => commands}/uniprot.rb | 3 +- test/{ => commands}/test_peptfilter.rb | 86 +++++++++++++------------- test/{ => commands}/test_prot2pept.rb | 28 ++++----- test/{ => commands}/test_uniprot.rb | 44 ++++++------- 10 files changed, 97 insertions(+), 93 deletions(-) create mode 100644 lib/commands.rb rename lib/{ => commands}/peptfilter.rb (99%) rename lib/{ => commands}/prot2pept.rb (98%) rename lib/{ => commands}/uniprot.rb (98%) rename test/{ => commands}/test_peptfilter.rb (53%) rename test/{ => commands}/test_prot2pept.rb (76%) rename test/{ => commands}/test_uniprot.rb (71%) diff --git a/bin/peptfilter b/bin/peptfilter index ca2a93e6..b2ee6d90 100755 --- a/bin/peptfilter +++ b/bin/peptfilter @@ -1,7 +1,7 @@ #!/usr/bin/env ruby -require_relative '../lib/peptfilter' +require_relative '../lib/commands' Signal.trap('PIPE', 'EXIT') Signal.trap('INT', 'EXIT') -Unipept::Peptfilter.run(ARGV) +Unipept::Commands::Peptfilter.run(ARGV) diff --git a/bin/prot2pept b/bin/prot2pept index d52cae66..49174e5a 100755 --- a/bin/prot2pept +++ b/bin/prot2pept @@ -1,7 +1,7 @@ #!/usr/bin/env ruby -require_relative '../lib/prot2pept' +require_relative '../lib/commands' Signal.trap('PIPE', 'EXIT') Signal.trap('INT', 'EXIT') -Unipept::Prot2pept.run(ARGV) +Unipept::Commands::Prot2pept.run(ARGV) diff --git a/bin/uniprot b/bin/uniprot index 35c00ac8..0f070fcc 100755 --- a/bin/uniprot +++ b/bin/uniprot @@ -1,7 +1,7 @@ #!/usr/bin/env ruby -require_relative '../lib/uniprot' +require_relative '../lib/commands' Signal.trap('PIPE', 'EXIT') Signal.trap('INT', 'EXIT') -Unipept::Uniprot.run(ARGV) +Unipept::Commands::Uniprot.run(ARGV) diff --git a/lib/commands.rb b/lib/commands.rb new file mode 100644 index 00000000..3bd3226c --- /dev/null +++ b/lib/commands.rb @@ -0,0 +1,9 @@ +require 'cri' + +module Unipept + module Commands + require_relative 'commands/peptfilter' + require_relative 'commands/prot2pept' + require_relative 'commands/uniprot' + end +end diff --git a/lib/peptfilter.rb b/lib/commands/peptfilter.rb similarity index 99% rename from lib/peptfilter.rb rename to lib/commands/peptfilter.rb index afe3d4de..dfd4cd77 100644 --- a/lib/peptfilter.rb +++ b/lib/commands/peptfilter.rb @@ -1,6 +1,4 @@ -require 'cri' - -module Unipept +module Unipept::Commands class Peptfilter attr_reader :root_command diff --git a/lib/prot2pept.rb b/lib/commands/prot2pept.rb similarity index 98% rename from lib/prot2pept.rb rename to lib/commands/prot2pept.rb index 9a83f21b..eeb1bcf1 100644 --- a/lib/prot2pept.rb +++ b/lib/commands/prot2pept.rb @@ -1,6 +1,4 @@ -require 'cri' - -module Unipept +module Unipept::Commands class Prot2pept attr_reader :root_command attr_reader :valid_formats diff --git a/lib/uniprot.rb b/lib/commands/uniprot.rb similarity index 98% rename from lib/uniprot.rb rename to lib/commands/uniprot.rb index 6467358e..ec536186 100644 --- a/lib/uniprot.rb +++ b/lib/commands/uniprot.rb @@ -1,7 +1,6 @@ -require 'cri' require 'typhoeus' -module Unipept +module Unipept::Commands class Uniprot attr_reader :root_command attr_reader :valid_formats diff --git a/test/test_peptfilter.rb b/test/commands/test_peptfilter.rb similarity index 53% rename from test/test_peptfilter.rb rename to test/commands/test_peptfilter.rb index 1f71617f..1d61fce1 100644 --- a/test/test_peptfilter.rb +++ b/test/commands/test_peptfilter.rb @@ -1,159 +1,159 @@ -require_relative '../lib/peptfilter' +require_relative '../../lib/commands' module Unipept class PeptfilterTestCase < Unipept::TestCase def test_length_filter # min length - assert(Peptfilter.filter_length('AALER', 4, 10)) - assert(Peptfilter.filter_length('AALER', 5, 10)) - assert(!Peptfilter.filter_length('AALER', 6, 10)) + assert(Commands::Peptfilter.filter_length('AALER', 4, 10)) + assert(Commands::Peptfilter.filter_length('AALER', 5, 10)) + assert(!Commands::Peptfilter.filter_length('AALER', 6, 10)) # max length - assert(!Peptfilter.filter_length('AALER', 1, 4)) - assert(Peptfilter.filter_length('AALER', 1, 5)) - assert(Peptfilter.filter_length('AALER', 1, 6)) + assert(!Commands::Peptfilter.filter_length('AALER', 1, 4)) + assert(Commands::Peptfilter.filter_length('AALER', 1, 5)) + assert(Commands::Peptfilter.filter_length('AALER', 1, 6)) end def test_lacks_filter - assert(Peptfilter.filter_lacks('AALER', ''.chars.to_a)) - assert(Peptfilter.filter_lacks('AALER', 'BCD'.chars.to_a)) - assert(!Peptfilter.filter_lacks('AALER', 'A'.chars.to_a)) - assert(!Peptfilter.filter_lacks('AALER', 'AE'.chars.to_a)) + assert(Commands::Peptfilter.filter_lacks('AALER', ''.chars.to_a)) + assert(Commands::Peptfilter.filter_lacks('AALER', 'BCD'.chars.to_a)) + assert(!Commands::Peptfilter.filter_lacks('AALER', 'A'.chars.to_a)) + assert(!Commands::Peptfilter.filter_lacks('AALER', 'AE'.chars.to_a)) end def test_contains_filter - assert(Peptfilter.filter_contains('AALER', ''.chars.to_a)) - assert(Peptfilter.filter_contains('AALER', 'A'.chars.to_a)) - assert(Peptfilter.filter_contains('AALER', 'AE'.chars.to_a)) - assert(!Peptfilter.filter_contains('AALER', 'BCD'.chars.to_a)) - assert(!Peptfilter.filter_contains('AALER', 'AB'.chars.to_a)) + assert(Commands::Peptfilter.filter_contains('AALER', ''.chars.to_a)) + assert(Commands::Peptfilter.filter_contains('AALER', 'A'.chars.to_a)) + assert(Commands::Peptfilter.filter_contains('AALER', 'AE'.chars.to_a)) + assert(!Commands::Peptfilter.filter_contains('AALER', 'BCD'.chars.to_a)) + assert(!Commands::Peptfilter.filter_contains('AALER', 'AB'.chars.to_a)) end def test_filter - assert(Peptfilter.filter('AALTER', 4, 10, 'BCD'.chars.to_a, 'AL'.chars.to_a)) - assert(!Peptfilter.filter('AALTER', 7, 10, 'BCD.chars.to_a', 'AL'.chars.to_a)) - assert(!Peptfilter.filter('AALTER', 4, 5, 'BCD'.chars.to_a, 'AL'.chars.to_a)) - assert(!Peptfilter.filter('AALTER', 4, 10, 'ABC'.chars.to_a, 'AL'.chars.to_a)) - assert(!Peptfilter.filter('AALTER', 4, 10, 'BCD'.chars.to_a, 'ALC'.chars.to_a)) + assert(Commands::Peptfilter.filter('AALTER', 4, 10, 'BCD'.chars.to_a, 'AL'.chars.to_a)) + assert(!Commands::Peptfilter.filter('AALTER', 7, 10, 'BCD.chars.to_a', 'AL'.chars.to_a)) + assert(!Commands::Peptfilter.filter('AALTER', 4, 5, 'BCD'.chars.to_a, 'AL'.chars.to_a)) + assert(!Commands::Peptfilter.filter('AALTER', 4, 10, 'ABC'.chars.to_a, 'AL'.chars.to_a)) + assert(!Commands::Peptfilter.filter('AALTER', 4, 10, 'BCD'.chars.to_a, 'ALC'.chars.to_a)) end def test_default_min_length_argument out, _err = capture_io_with_input('A' * 6) do - Peptfilter.run(%w()) + Commands::Peptfilter.run(%w()) end assert_equal('A' * 6, out.chomp) out, _err = capture_io_with_input('A' * 5) do - Peptfilter.run(%w()) + Commands::Peptfilter.run(%w()) end assert_equal('A' * 5, out.chomp) out, _err = capture_io_with_input('A' * 4) do - Peptfilter.run(%w()) + Commands::Peptfilter.run(%w()) end assert_equal('', out.chomp) end def test_default_max_length_argument out, _err = capture_io_with_input('A' * 49) do - Peptfilter.run(%w()) + Commands::Peptfilter.run(%w()) end assert_equal('A' * 49, out.chomp) out, _err = capture_io_with_input('A' * 50) do - Peptfilter.run(%w()) + Commands::Peptfilter.run(%w()) end assert_equal('A' * 50, out.chomp) out, _err = capture_io_with_input('A' * 51) do - Peptfilter.run(%w()) + Commands::Peptfilter.run(%w()) end assert_equal('', out.chomp) end def test_with_min_argument out, _err = capture_io_with_input('A' * 6) do - Peptfilter.run(%w(--minlen 7)) + Commands::Peptfilter.run(%w(--minlen 7)) end assert_equal('', out.chomp) out, _err = capture_io_with_input('A' * 4) do - Peptfilter.run(%w(--minlen 3)) + Commands::Peptfilter.run(%w(--minlen 3)) end assert_equal('A' * 4, out.chomp) end def test_with_max_argument out, _err = capture_io_with_input('A' * 45) do - Peptfilter.run(%w(--maxlen 40)) + Commands::Peptfilter.run(%w(--maxlen 40)) end assert_equal('', out.chomp) out, _err = capture_io_with_input('A' * 55) do - Peptfilter.run(%w(--maxlen 60)) + Commands::Peptfilter.run(%w(--maxlen 60)) end assert_equal('A' * 55, out.chomp) end def test_with_lacks_argument out, _err = capture_io_with_input('A' * 10) do - Peptfilter.run(%w(--lacks B)) + Commands::Peptfilter.run(%w(--lacks B)) end assert_equal('A' * 10, out.chomp) out, _err = capture_io_with_input('A' * 10) do - Peptfilter.run(%w(-l B)) + Commands::Peptfilter.run(%w(-l B)) end assert_equal('A' * 10, out.chomp) out, _err = capture_io_with_input('A' * 10) do - Peptfilter.run(%w(--lacks A)) + Commands::Peptfilter.run(%w(--lacks A)) end assert_equal('', out.chomp) out, _err = capture_io_with_input('A' * 10) do - Peptfilter.run(%w(-l A)) + Commands::Peptfilter.run(%w(-l A)) end assert_equal('', out.chomp) end def test_with_contains_argument out, _err = capture_io_with_input('A' * 10) do - Peptfilter.run(%w(--contains A)) + Commands::Peptfilter.run(%w(--contains A)) end assert_equal('A' * 10, out.chomp) out, _err = capture_io_with_input('A' * 10) do - Peptfilter.run(%w(-c A)) + Commands::Peptfilter.run(%w(-c A)) end assert_equal('A' * 10, out.chomp) out, _err = capture_io_with_input('A' * 10) do - Peptfilter.run(%w(--contains B)) + Commands::Peptfilter.run(%w(--contains B)) end assert_equal('', out.chomp) out, _err = capture_io_with_input('A' * 10) do - Peptfilter.run(%w(-c B)) + Commands::Peptfilter.run(%w(-c B)) end assert_equal('', out.chomp) end def test_fasta_input out, _err = capture_io_with_input('>') do - Peptfilter.run(%w()) + Commands::Peptfilter.run(%w()) end assert_equal('>', out.chomp) out, _err = capture_io_with_input(['>', 'A', 'AALTER', '>']) do - Peptfilter.run(%w()) + Commands::Peptfilter.run(%w()) end assert_equal(">\nAALTER\n>", out.chomp) end def test_normal_input out, _err = capture_io_with_input(['A', 'A' * 11, 'AAAAB', 'BBBBB', 'CCCCC', 'CCCCCA']) do - Peptfilter.run(%w(--minlen 4 --maxlen 10 --lacks B --contains A)) + Commands::Peptfilter.run(%w(--minlen 4 --maxlen 10 --lacks B --contains A)) end assert_equal('CCCCCA', out.chomp) end @@ -161,7 +161,7 @@ def test_normal_input def test_help out, _err = capture_io_while do assert_raises SystemExit do - Peptfilter.run(%w(-h)) + Commands::Peptfilter.run(%w(-h)) end end assert(out.include? 'show help for this command') diff --git a/test/test_prot2pept.rb b/test/commands/test_prot2pept.rb similarity index 76% rename from test/test_prot2pept.rb rename to test/commands/test_prot2pept.rb index e3e28212..e6521c57 100644 --- a/test/test_prot2pept.rb +++ b/test/commands/test_prot2pept.rb @@ -1,71 +1,71 @@ -require_relative '../lib/prot2pept' +require_relative '../../lib/commands' module Unipept class Prot2peptTestCase < Unipept::TestCase def test_normal_input out, _err = capture_io_with_input('AALTERAALTERPAALTER') do - Prot2pept.run(%w()) + Commands::Prot2pept.run(%w()) end assert_equal("AALTER\nAALTERPAALTER", out.chomp) out, _err = capture_io_with_input('KRKPR') do - Prot2pept.run(%w()) + Commands::Prot2pept.run(%w()) end assert_equal("K\nR\nKPR", out.chomp) out, _err = capture_io_with_input(%w(AALTERAALTERPAALTER AALTERAA)) do - Prot2pept.run(%w()) + Commands::Prot2pept.run(%w()) end assert_equal("AALTER\nAALTERPAALTER\nAALTER\nAA", out.chomp) end def test_fasta_input out, _err = capture_io_with_input(">AKA\nAALTERAALTERPAALTER") do - Prot2pept.run(%w()) + Commands::Prot2pept.run(%w()) end assert_equal(">AKA\nAALTER\nAALTERPAALTER", out.chomp) out, _err = capture_io_with_input(">AKA\nAAL\nT\nERAALTER\nP\nAALTER") do - Prot2pept.run(%w()) + Commands::Prot2pept.run(%w()) end assert_equal(">AKA\nAALTER\nAALTERPAALTER", out.chomp) out, _err = capture_io_with_input(">AKA\nAAL\nT\n>\nERAALTER\nP\nAALTER") do - Prot2pept.run(%w()) + Commands::Prot2pept.run(%w()) end assert_equal(">AKA\nAALT\n>\nER\nAALTERPAALTER", out.chomp) end def test_default_pattern default_out, _err = capture_io_with_input('AALTERAALTERPAALTER') do - Prot2pept.run(%w()) + Commands::Prot2pept.run(%w()) end assert_equal("AALTER\nAALTERPAALTER", default_out.chomp) pattern_out, _err = capture_io_with_input('AALTERAALTERPAALTER') do - Prot2pept.run(['-p', '([KR])([^P])']) + Commands::Prot2pept.run(['-p', '([KR])([^P])']) end assert_equal(default_out, pattern_out) pattern_out, _err = capture_io_with_input('AALTERAALTERPAALTER') do - Prot2pept.run(['--pattern', '([KR])([^P])']) + Commands::Prot2pept.run(['--pattern', '([KR])([^P])']) end assert_equal(default_out, pattern_out) end def test_pattern out, _err = capture_io_with_input('AALTERAALTERPAALTER') do - Prot2pept.run(%w()) + Commands::Prot2pept.run(%w()) end assert_equal("AALTER\nAALTERPAALTER", out.chomp) out, _err = capture_io_with_input('AALTERAALTERPAALTER') do - Prot2pept.run(%w(-p ([KR])([^A]))) + Commands::Prot2pept.run(%w(-p ([KR])([^A]))) end assert_equal("AALTERAALTER\nPAALTER", out.chomp) out, _err = capture_io_with_input('AALTERAALTERPAALTER') do - Prot2pept.run(%w(--pattern ([KR])([^A]))) + Commands::Prot2pept.run(%w(--pattern ([KR])([^A]))) end assert_equal("AALTERAALTER\nPAALTER", out.chomp) end @@ -73,7 +73,7 @@ def test_pattern def test_help out, _err = capture_io_while do assert_raises SystemExit do - Prot2pept.run(%w(-h)) + Commands::Prot2pept.run(%w(-h)) end end assert(out.include? 'show help for this command') diff --git a/test/test_uniprot.rb b/test/commands/test_uniprot.rb similarity index 71% rename from test/test_uniprot.rb rename to test/commands/test_uniprot.rb index 611e60a3..1cbc6429 100644 --- a/test/test_uniprot.rb +++ b/test/commands/test_uniprot.rb @@ -1,59 +1,59 @@ -require_relative '../lib/uniprot' +require_relative '../../lib/commands' module Unipept class UniprotTestCase < Unipept::TestCase def test_argument_input out, _err = capture_io_while do - Uniprot.run(%w(Q6GZX3)) + Commands::Uniprot.run(%w(Q6GZX3)) end assert_equal(1, out.split(/\n/).length) out, _err = capture_io_while do - Uniprot.run(%w(Q6GZX3 Q6GZX4)) + Commands::Uniprot.run(%w(Q6GZX3 Q6GZX4)) end assert_equal(2, out.split(/\n/).length) out, _err = capture_io_while do - Uniprot.run(%w(-f fasta Q6GZX3 Q6GZX4)) + Commands::Uniprot.run(%w(-f fasta Q6GZX3 Q6GZX4)) end assert_equal(2, out.count('>')) out, _err = capture_io_while do - Uniprot.run(%w(--format fasta Q6GZX3 Q6GZX4)) + Commands::Uniprot.run(%w(--format fasta Q6GZX3 Q6GZX4)) end assert_equal(2, out.count('>')) end def test_stdin_input out, _err = capture_io_with_input('Q6GZX3') do - Uniprot.run(%w()) + Commands::Uniprot.run(%w()) end assert_equal(1, out.split(/\n/).length) out, _err = capture_io_with_input(%w(Q6GZX3 Q6GZX4)) do - Uniprot.run(%w()) + Commands::Uniprot.run(%w()) end assert_equal(2, out.split(/\n/).length) out, _err = capture_io_with_input(%w(Q6GZX3 Q6GZX4)) do - Uniprot.run(%w(-f fasta)) + Commands::Uniprot.run(%w(-f fasta)) end assert_equal(2, out.count('>')) out, _err = capture_io_with_input(%w(Q6GZX3 Q6GZX4)) do - Uniprot.run(%w(--format fasta)) + Commands::Uniprot.run(%w(--format fasta)) end assert_equal(2, out.count('>')) end def test_argument_input_priority out, _err = capture_io_with_input('Q6GZX3') do - Uniprot.run(%w(Q6GZX3 Q6GZX4)) + Commands::Uniprot.run(%w(Q6GZX3 Q6GZX4)) end assert_equal(2, out.split(/\n/).length) out, _err = capture_io_with_input(%w(Q6GZX3 Q6GZX4)) do - Uniprot.run(%w(Q6GZX3)) + Commands::Uniprot.run(%w(Q6GZX3)) end assert_equal(1, out.split(/\n/).length) end @@ -61,7 +61,7 @@ def test_argument_input_priority def test_invalid_format out, err = capture_io_while do assert_raises SystemExit do - Uniprot.run(%w(--format xxx)) + Commands::Uniprot.run(%w(--format xxx)) end end assert_equal('', out) @@ -70,17 +70,17 @@ def test_invalid_format def test_default_format out_default, _err = capture_io_while do - Uniprot.run(%w(Q6GZX3)) + Commands::Uniprot.run(%w(Q6GZX3)) end assert_equal(1, out_default.split(/\n/).length) out_sequence, _err = capture_io_while do - Uniprot.run(%w(-f sequence Q6GZX3)) + Commands::Uniprot.run(%w(-f sequence Q6GZX3)) end assert_equal(out_default, out_sequence) out_sequence, _err = capture_io_while do - Uniprot.run(%w(--format sequence Q6GZX3)) + Commands::Uniprot.run(%w(--format sequence Q6GZX3)) end assert_equal(out_default, out_sequence) end @@ -88,37 +88,37 @@ def test_default_format def test_format_options # fasta txt xml rdf gff sequence out, err = capture_io_while do - Uniprot.run(%w(-f fasta Q6GZX3)) + Commands::Uniprot.run(%w(-f fasta Q6GZX3)) end assert(!out.empty?) assert(err.empty?) out, err = capture_io_while do - Uniprot.run(%w(-f txt Q6GZX3)) + Commands::Uniprot.run(%w(-f txt Q6GZX3)) end assert(!out.empty?) assert(err.empty?) out, err = capture_io_while do - Uniprot.run(%w(-f xml Q6GZX3)) + Commands::Uniprot.run(%w(-f xml Q6GZX3)) end assert(!out.empty?) assert(err.empty?) out, err = capture_io_while do - Uniprot.run(%w(-f rdf Q6GZX3)) + Commands::Uniprot.run(%w(-f rdf Q6GZX3)) end assert(!out.empty?) assert(err.empty?) out, err = capture_io_while do - Uniprot.run(%w(-f gff Q6GZX3)) + Commands::Uniprot.run(%w(-f gff Q6GZX3)) end assert(!out.empty?) assert(err.empty?) out, err = capture_io_while do - Uniprot.run(%w(-f sequence Q6GZX3)) + Commands::Uniprot.run(%w(-f sequence Q6GZX3)) end assert(!out.empty?) assert(err.empty?) @@ -127,7 +127,7 @@ def test_format_options def test_help out, _err = capture_io_while do assert_raises SystemExit do - Uniprot.run(%w(-h)) + Commands::Uniprot.run(%w(-h)) end end assert(out.include? 'show help for this command') From a0cee2d7ea7e3445a8fafc823ecaf4bf772dc198 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Sun, 7 Jun 2015 18:41:38 +0200 Subject: [PATCH 20/64] formatter tests, part 1 --- lib/{unipept => }/batch_order.rb | 0 lib/{unipept => }/configuration.rb | 0 lib/{unipept => }/formatters.rb | 17 +++++++++- lib/unipept.rb | 8 ++--- lib/unipept/version.rb | 3 -- lib/version.rb | 3 ++ test/test_formatters.rb | 51 ++++++++++++++++++++++++++++++ 7 files changed, 74 insertions(+), 8 deletions(-) rename lib/{unipept => }/batch_order.rb (100%) rename lib/{unipept => }/configuration.rb (100%) rename lib/{unipept => }/formatters.rb (95%) delete mode 100644 lib/unipept/version.rb create mode 100644 lib/version.rb create mode 100644 test/test_formatters.rb diff --git a/lib/unipept/batch_order.rb b/lib/batch_order.rb similarity index 100% rename from lib/unipept/batch_order.rb rename to lib/batch_order.rb diff --git a/lib/unipept/configuration.rb b/lib/configuration.rb similarity index 100% rename from lib/unipept/configuration.rb rename to lib/configuration.rb diff --git a/lib/unipept/formatters.rb b/lib/formatters.rb similarity index 95% rename from lib/unipept/formatters.rb rename to lib/formatters.rb index b1550383..4bc29b8d 100644 --- a/lib/unipept/formatters.rb +++ b/lib/formatters.rb @@ -22,6 +22,10 @@ def self.default 'csv' end + def type + '' + end + def header(_sample_data, _fasta_mapper = nil) '' end @@ -36,6 +40,10 @@ class JSONFormatter < Formatter require 'json' register :json + def type + 'json' + end + def format(data, _fasta_mapper = nil) # TODO: add fasta header based on fasta_mapper information data.to_json @@ -43,9 +51,12 @@ def format(data, _fasta_mapper = nil) end class CSVFormatter < Formatter require 'csv' - register :csv + def type + 'csv' + end + def header(data, fasta_input = nil) CSV.generate do |csv| first = data.first @@ -118,6 +129,10 @@ def to_xml(name = nil) register :xml + def type + 'xml' + end + def format(data, _fasta_mapper = nil) # TODO: add fasta header based on fasta_mapper information data.to_xml diff --git a/lib/unipept.rb b/lib/unipept.rb index 11c39837..d0a2af67 100644 --- a/lib/unipept.rb +++ b/lib/unipept.rb @@ -1,8 +1,8 @@ -require_relative 'unipept/formatters' -require_relative 'unipept/configuration' -require_relative 'unipept/batch_order' +require_relative 'formatters' +require_relative 'configuration' +require_relative 'batch_order' require_relative 'unipept/commands' -require_relative 'unipept/version' +require_relative 'version' module Unipept end diff --git a/lib/unipept/version.rb b/lib/unipept/version.rb deleted file mode 100644 index e8740866..00000000 --- a/lib/unipept/version.rb +++ /dev/null @@ -1,3 +0,0 @@ -module Unipept - VERSION = File.read(File.join(File.dirname(__FILE__), '..', '..', 'VERSION')).strip -end diff --git a/lib/version.rb b/lib/version.rb new file mode 100644 index 00000000..1182c3bf --- /dev/null +++ b/lib/version.rb @@ -0,0 +1,3 @@ +module Unipept + VERSION = File.read(File.join(File.dirname(__FILE__), '..', 'VERSION')).strip +end diff --git a/test/test_formatters.rb b/test/test_formatters.rb new file mode 100644 index 00000000..e7235680 --- /dev/null +++ b/test/test_formatters.rb @@ -0,0 +1,51 @@ +require_relative '../lib/formatters' + +module Unipept + class FormattersTestCase < Unipept::TestCase + def test_available_formatters + assert_equal(%w(json csv xml).sort, Formatter.available.sort) + end + + def test_default_formatter + assert_equal('csv', Formatter.default) + end + + def test_formatter_registration + assert_equal(%w(json csv xml).sort, Formatter.available.sort) + Formatter.register(:test) + assert_equal(%w(json csv xml test).sort, Formatter.available.sort) + end + + def test_new_for_format + formatter = Formatter.new_for_format('json') + assert_equal('json', formatter.type) + + formatter = Formatter.new_for_format('xml') + assert_equal('xml', formatter.type) + + formatter = Formatter.new_for_format('csv') + assert_equal('csv', formatter.type) + + formatter = Formatter.new_for_format('blah') + assert_equal('csv', formatter.type) + end + end + + class JSONFormatterTestCase < Unipept::TestCase + def formatter + Formatter.new_for_format('json') + end + end + + class CSVFormatterTestCase < Unipept::TestCase + def formatter + Formatter.new_for_format('csv') + end + end + + class XMLFormatterTestCase < Unipept::TestCase + def formatter + Formatter.new_for_format('xml') + end + end +end From c1eb50db525773248cadc9027b063e5fdf4a5c78 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Sun, 7 Jun 2015 23:02:24 +0200 Subject: [PATCH 21/64] fix formatter tests, part 1 --- test/test_formatters.rb | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/test/test_formatters.rb b/test/test_formatters.rb index e7235680..e399e4e7 100644 --- a/test/test_formatters.rb +++ b/test/test_formatters.rb @@ -3,7 +3,10 @@ module Unipept class FormattersTestCase < Unipept::TestCase def test_available_formatters - assert_equal(%w(json csv xml).sort, Formatter.available.sort) + formatters = Formatter.available + assert(formatters.include? 'json') + assert(formatters.include? 'csv') + assert(formatters.include? 'xml') end def test_default_formatter @@ -11,9 +14,9 @@ def test_default_formatter end def test_formatter_registration - assert_equal(%w(json csv xml).sort, Formatter.available.sort) + assert(!(Formatter.available.include? 'test')) Formatter.register(:test) - assert_equal(%w(json csv xml test).sort, Formatter.available.sort) + assert(Formatter.available.include? 'test') end def test_new_for_format From 22d6fc5b69672716df6a8dfd9148d3386c498fd6 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Sun, 7 Jun 2015 23:12:20 +0200 Subject: [PATCH 22/64] slightly cleaner code --- lib/commands/uniprot.rb | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lib/commands/uniprot.rb b/lib/commands/uniprot.rb index ec536186..9d354ed4 100644 --- a/lib/commands/uniprot.rb +++ b/lib/commands/uniprot.rb @@ -58,10 +58,7 @@ def self.run(args) # @return [String] The requested Uniprot record in the requested format def self.get_uniprot_entry(accession, format) if format == 'sequence' - resp = Typhoeus.get("http://www.uniprot.org/uniprot/#{accession}.fasta") - if resp.success? - resp.response_body.lines.map(&:chomp)[1..-1].join('') - end + get_uniprot_entry(accession, 'fasta').lines.map(&:chomp)[1..-1].join('') else # other format has been specified, just download and output resp = Typhoeus.get("http://www.uniprot.org/uniprot/#{accession}.#{format}") From d866752b516fdd0d69e687880993036882c240bd Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Mon, 8 Jun 2015 10:13:16 +0200 Subject: [PATCH 23/64] add tests for json formatter --- test/test_formatters.rb | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/test/test_formatters.rb b/test/test_formatters.rb index e399e4e7..3abc63c5 100644 --- a/test/test_formatters.rb +++ b/test/test_formatters.rb @@ -38,6 +38,22 @@ class JSONFormatterTestCase < Unipept::TestCase def formatter Formatter.new_for_format('json') end + + def object + "{list : ['a', 'b', 'c'], key : 'value'}" + end + + def test_header + assert_equal('', formatter.header(object)) + end + + def test_type + assert_equal('json', formatter.type) + end + + def test_format + assert_equal(object.to_json, formatter.format(object)) + end end class CSVFormatterTestCase < Unipept::TestCase From 38addcd6a384e5ba46fc9f77e615ee945502600a Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Mon, 8 Jun 2015 10:18:42 +0200 Subject: [PATCH 24/64] add tests for xml formatter --- test/test_formatters.rb | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/test/test_formatters.rb b/test/test_formatters.rb index 3abc63c5..854e784d 100644 --- a/test/test_formatters.rb +++ b/test/test_formatters.rb @@ -32,6 +32,30 @@ def test_new_for_format formatter = Formatter.new_for_format('blah') assert_equal('csv', formatter.type) end + + def formatter + Formatter.new + end + + def object + "{list : ['a', 'b', 'c'], key : 'value'}" + end + + def object + "{list : ['a', 'b', 'c'], key : 'value'}" + end + + def test_header + assert_equal('', formatter.header(object)) + end + + def test_type + assert_equal('', formatter.type) + end + + def test_format + assert_equal(object, formatter.format(object)) + end end class JSONFormatterTestCase < Unipept::TestCase @@ -66,5 +90,21 @@ class XMLFormatterTestCase < Unipept::TestCase def formatter Formatter.new_for_format('xml') end + + def object + "{list : ['a', 'b', 'c'], key : 'value'}" + end + + def test_header + assert_equal('', formatter.header(object)) + end + + def test_type + assert_equal('xml', formatter.type) + end + + def test_format + assert_equal(object.to_xml, formatter.format(object)) + end end end From 7424c2fd8c6b09eb6b52f8058625ff49b03257c8 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Mon, 8 Jun 2015 10:48:30 +0200 Subject: [PATCH 25/64] better test object --- test/test_formatters.rb | 42 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/test/test_formatters.rb b/test/test_formatters.rb index 854e784d..b3f8a3c5 100644 --- a/test/test_formatters.rb +++ b/test/test_formatters.rb @@ -37,16 +37,8 @@ def formatter Formatter.new end - def object - "{list : ['a', 'b', 'c'], key : 'value'}" - end - - def object - "{list : ['a', 'b', 'c'], key : 'value'}" - end - def test_header - assert_equal('', formatter.header(object)) + assert_equal('', formatter.header(TestObject.get_object)) end def test_type @@ -54,7 +46,7 @@ def test_type end def test_format - assert_equal(object, formatter.format(object)) + assert_equal(TestObject.get_object, formatter.format(TestObject.get_object)) end end @@ -63,12 +55,8 @@ def formatter Formatter.new_for_format('json') end - def object - "{list : ['a', 'b', 'c'], key : 'value'}" - end - def test_header - assert_equal('', formatter.header(object)) + assert_equal('', formatter.header(TestObject.get_object)) end def test_type @@ -76,7 +64,7 @@ def test_type end def test_format - assert_equal(object.to_json, formatter.format(object)) + assert_equal(TestObject.as_json, formatter.format(TestObject.get_object)) end end @@ -91,12 +79,8 @@ def formatter Formatter.new_for_format('xml') end - def object - "{list : ['a', 'b', 'c'], key : 'value'}" - end - def test_header - assert_equal('', formatter.header(object)) + assert_equal('', formatter.header(TestObject.get_object)) end def test_type @@ -104,7 +88,21 @@ def test_type end def test_format - assert_equal(object.to_xml, formatter.format(object)) + assert_equal(TestObject.as_xml, formatter.format(TestObject.get_object)) + end + end + + class TestObject + def self.get_object + JSON.parse('{"integer": 5, "string": "string", "list": ["a", 2, false]}') + end + + def self.as_json + '{"integer":5,"string":"string","list":["a",2,false]}' + end + + def self.as_xml + '5stringa2false' end end end From 7434f148a1c0cd502eebc66b3313c5db0aaab354 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Mon, 8 Jun 2015 14:59:46 +0200 Subject: [PATCH 26/64] clean csv formatter + add comments --- lib/formatters.rb | 179 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 144 insertions(+), 35 deletions(-) diff --git a/lib/formatters.rb b/lib/formatters.rb index 4bc29b8d..4b35a705 100644 --- a/lib/formatters.rb +++ b/lib/formatters.rb @@ -1,36 +1,75 @@ module Unipept class Formatter + # The Hash of available formatters + # + # @return [Hash] A hash of the available formatters def self.formatters @@formatters ||= {} end + # Returns a new formatter of the given format. If the given format is not available, the + # default formatter is returned + # + # @param [String] format The type of the formatter we want + # + # @return [Formatter] The requested formatter def self.new_for_format(format) formatters[format].new rescue formatters[default].new end + # Adds a new formatter to the list of available formats + # + # @param [Symbol] format The type of the format we want to register def self.register(format) formatters[format.to_s] = self end + # Returns a list of the available formatters + # + # @return [Array] The list of available formatters def self.available formatters.keys end + # @return [String] The type of the default formatter: csv def self.default 'csv' end + # @return [String] The type of the current formatter def type '' end + # Returns the header row for the given sample_data and fasta_mapper. This + # row is output only once at the beginning of the output + # + # @param [Object] _sample_data The data that we will output after this + # header. Can be used to extract the keys. + # + # @param [Array>] _fasta_mapper Optional mapping between input + # data and corresponding fasta header. The data is represented as a list + # containing tuples where the first element is the fasta header and second + # element is the input data + # + # @return [String] The header row def header(_sample_data, _fasta_mapper = nil) '' end - # JSON formatted data goes in, something other comes out + # Converts the given input data and corresponding fasta headers to another + # format. + # + # @param [Array] data The data we wish to convert + # + # @param [Array>] _fasta_mapper Optional mapping between input + # data and corresponding fasta header. The data is represented as a list + # containing tuples where the first element is the fasta header and second + # element is the input data + # + # @return [String] The converted input data def format(data, _fasta_mapper = nil) data end @@ -40,69 +79,127 @@ class JSONFormatter < Formatter require 'json' register :json + # @return [String] The type of the current formatter: json def type 'json' end + # Converts the given input data and corresponding fasta headers to JSON. + # Currently ignores the fasta_mapper. + # + # @param [Array] data The data we wish to convert + # + # @param [Array>] _fasta_mapper Optional mapping between input + # data and corresponding fasta header. The data is represented as a list + # containing tuples where the first element is the fasta header and second + # element is the input data + # + # @return [String] The input data converted to the JSON format. def format(data, _fasta_mapper = nil) # TODO: add fasta header based on fasta_mapper information data.to_json end end + class CSVFormatter < Formatter require 'csv' register :csv + # @return [String] The type of the current formatter: csv def type 'csv' end - def header(data, fasta_input = nil) + # Returns the header row for the given data and fasta_mapper. This row + # contains all the keys of the first element of the data, preceded by + # 'fasta_header' if a fasta_mapper is given. + # + # @param [Array] data The data that we will use to extract the keys from. + # + # @param [Array>] fasta_mapper Optional mapping between input + # data and corresponding fasta header. The data is represented as a list + # containing tuples where the first element is the fasta header and second + # element is the input data If a fasta_mapper is given, the output will be + # preceded with 'fasta_header'. + # + # @return [String] The header row + def header(data, fasta_mapper = nil) CSV.generate do |csv| first = data.first - if first.is_a? Array - first = first.first - end - if fasta_input - csv << (['fasta_header'] + first.keys).map(&:to_s) if first - else - csv << first.keys.map(&:to_s) if first - end + keys = fasta_mapper ? ['fasta_header'] : [] + csv << (keys + first.keys).map(&:to_s) if first end end - def format(data, fasta_input = nil) + # Converts the given input data and corresponding fasta headers to the csv + # format + # + # @param [Array] data The data we wish to convert + # + # @param [Array>] fasta_mapper Optional mapping between input + # data and corresponding fasta header. The data is represented as a list + # containing tuples where the first element is the fasta header and second + # element is the input data + # + # @return [String] The converted input data into the csv format + def format(data, fasta_mapper = nil) CSV.generate do |csv| - if fasta_input - # Process the output from {key1: value1, key2: value2, ...} - # to {value => {key1: value1, key2: value2, ...}} - data_dict = {} - data.each do |d| - data_dict[d.values.first.to_s] ||= [] - data_dict[d.values.first.to_s] << d - end - - # Iterate over the input - fasta_input.each do |input_pair| - fasta_header, id = input_pair - - next if data_dict[id].nil? - - # Retrieve the corresponding API result (if any) - data_dict[id].each do |r| - csv << ([fasta_header] + r.values).map { |v| v == '' ? nil : v } - end - end - + if fasta_mapper + format_fasta(csv, data, fasta_mapper) else + format_normal(csv, data) + end + end + end - data.each do |o| - csv << o.values.map { |v| v == '' ? nil : v } - end + # Converts the given input data and corresponding fasta headers to the csv + # format + # + # @param [CSV] csv object we write the csv output to + # + # @param [Array] data The data we wish to convert + # + # @return [String] The converted input data into the csv format + def format_normal(csv, data) + data.each do |o| + csv << o.values.map { |v| v == '' ? nil : v } + end + end + # Converts the given input data and corresponding fasta headers to the csv + # format + # + # @param [CSV] csv object we write the csv output to + # + # @param [Array] data The data we wish to convert + # + # @param [Array>] fasta_mapper Optional mapping between input + # data and corresponding fasta header. The data is represented as a list + # containing tuples where the first element is the fasta header and second + # element is the input data + # + # @return [String] The converted input data into the csv format + def format_fasta(csv, data, fasta_mapper) + data_dict = group_by_first_key(data) + fasta_mapper.each do |fasta_header, key| + next if data_dict[key].nil? + + data_dict[key].each do |r| + csv << ([fasta_header] + r.values).map { |v| v == '' ? nil : v } end end end + + # Groups the data by the first key of each element, for example + # [{key1: v1, key2: v2},{key1: v1, key2: v3},{key1: v4, key2: v2}] + # to {v1 => [{key1: v1, key2: v2},{key1: v1, key2: v3}], v4 => [{key1: v4, key2: v2}]] + # + # @param [Array] data The data we wish to Groups + # + # @return [Hash] The input data grouped by the first key + def group_by_first_key(data) + data.group_by{|el| el.values.first.to_s} + end end class XMLFormatter < Formatter @@ -129,10 +226,22 @@ def to_xml(name = nil) register :xml + # @return [String] The type of the current formatter: xml def type 'xml' end + # Converts the given input data and corresponding fasta headers to XML. + # Currently ignores the fasta_mapper. + # + # @param [Array] data The data we wish to convert + # + # @param [Array>] _fasta_mapper Optional mapping between input + # data and corresponding fasta header. The data is represented as a list + # containing tuples where the first element is the fasta header and second + # element is the input data + # + # @return [String] The input data converted to the XML format. def format(data, _fasta_mapper = nil) # TODO: add fasta header based on fasta_mapper information data.to_xml From cd028e9a19b7c7a24799ad12700d965bf35afcd9 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Mon, 8 Jun 2015 14:59:56 +0200 Subject: [PATCH 27/64] add csv formatter tests --- test/test_formatters.rb | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/test/test_formatters.rb b/test/test_formatters.rb index b3f8a3c5..0c348536 100644 --- a/test/test_formatters.rb +++ b/test/test_formatters.rb @@ -72,6 +72,30 @@ class CSVFormatterTestCase < Unipept::TestCase def formatter Formatter.new_for_format('csv') end + + def test_header + fasta = [["peptide", ">test"]] + object = [TestObject.get_object, TestObject.get_object] + assert_equal(TestObject.as_csv_header, formatter.header(object)) + assert_equal("fasta_header," + TestObject.as_csv_header, formatter.header(object, fasta)) + end + + def test_type + assert_equal('csv', formatter.type) + end + + def test_format + object = [TestObject.get_object, TestObject.get_object] + csv = [TestObject.as_csv, TestObject.as_csv, ''].join("\n") + assert_equal(csv, formatter.format(object)) + end + + def test_format_with_fasta + fasta = [[">test", '5']] + object = [TestObject.get_object, TestObject.get_object] + csv = ['>test,' + TestObject.as_csv, '>test,' + TestObject.as_csv, ''].join("\n") + assert_equal(csv, formatter.format(object, fasta)) + end end class XMLFormatterTestCase < Unipept::TestCase @@ -104,5 +128,13 @@ def self.as_json def self.as_xml '5stringa2false' end + + def self.as_csv + '5,string,"[""a"", 2, false]"' + end + + def self.as_csv_header + "integer,string,list\n" + end end end From 0898d16b45112d252dc2305f0e1d25a38d9b83f9 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Mon, 8 Jun 2015 15:01:05 +0200 Subject: [PATCH 28/64] style tweaks --- lib/formatters.rb | 2 +- test/test_formatters.rb | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/formatters.rb b/lib/formatters.rb index 4b35a705..9f9017f1 100644 --- a/lib/formatters.rb +++ b/lib/formatters.rb @@ -198,7 +198,7 @@ def format_fasta(csv, data, fasta_mapper) # # @return [Hash] The input data grouped by the first key def group_by_first_key(data) - data.group_by{|el| el.values.first.to_s} + data.group_by { |el| el.values.first.to_s } end end diff --git a/test/test_formatters.rb b/test/test_formatters.rb index 0c348536..985d9747 100644 --- a/test/test_formatters.rb +++ b/test/test_formatters.rb @@ -74,10 +74,10 @@ def formatter end def test_header - fasta = [["peptide", ">test"]] + fasta = [['peptide', '>test']] object = [TestObject.get_object, TestObject.get_object] assert_equal(TestObject.as_csv_header, formatter.header(object)) - assert_equal("fasta_header," + TestObject.as_csv_header, formatter.header(object, fasta)) + assert_equal('fasta_header,' + TestObject.as_csv_header, formatter.header(object, fasta)) end def test_type @@ -91,7 +91,7 @@ def test_format end def test_format_with_fasta - fasta = [[">test", '5']] + fasta = [['>test', '5']] object = [TestObject.get_object, TestObject.get_object] csv = ['>test,' + TestObject.as_csv, '>test,' + TestObject.as_csv, ''].join("\n") assert_equal(csv, formatter.format(object, fasta)) From f4de122e76a13fda87ace59650e04e9270a8682a Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Mon, 8 Jun 2015 16:06:09 +0200 Subject: [PATCH 29/64] add optional file name to the config object --- lib/configuration.rb | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/lib/configuration.rb b/lib/configuration.rb index 38705c4d..3efcdc69 100644 --- a/lib/configuration.rb +++ b/lib/configuration.rb @@ -3,8 +3,15 @@ class Configuration attr_reader :config attr_reader :file_name - def initialize - @file_name = File.join(Dir.home, '.unipeptrc') + # Creates a new config object, based on a given YAML file. If no filename + # given, '.unipeptrc' in the home dir of the user will be used. + # + # If the file doesn't exist, an empty config will be loaded. + # + # @param [String] file An optional file name of the YAML file to create the + # config from + def initialize(file = nil) + @file_name = file ? file : File.join(Dir.home, '.unipeptrc') if !File.exist? file_name @config = {} else @@ -12,14 +19,18 @@ def initialize end end + # Saves the config to disk. If the file doesn't exist yet, a new one will be + # created def save File.open(file_name, 'w') { |f| f.write config.to_yaml } end + # forwards [] to the internal config hash def [](*args) config.[](*args) end + # forwards =[] to the internal config hash def []=(*args) config.[]=(*args) end From 016ce886db366ebe5ff9ca0fbd88a17595cec7cc Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Mon, 8 Jun 2015 16:06:41 +0200 Subject: [PATCH 30/64] execute every test in a new temp dir so we can create files --- test/helper.rb | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/helper.rb b/test/helper.rb index 558dd3d7..c331e3b0 100644 --- a/test/helper.rb +++ b/test/helper.rb @@ -17,11 +17,20 @@ module Unipept class TestCase < Minitest::Test def setup + # Enter tmp + @tmp_dir = Dir.mktmpdir('unipept-test') + @orig_wd = FileUtils.pwd + FileUtils.cd(@tmp_dir) + @orig_io = capture_io end def teardown uncapture_io(*@orig_io) + + # Exit tmp + FileUtils.cd(@orig_wd) + FileUtils.rm_rf(@tmp_dir) end def capture_io_with_input(input, &block) From c7ac1f374c6f810a27ea9690f23a5196bcdaa153 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Mon, 8 Jun 2015 16:07:36 +0200 Subject: [PATCH 31/64] add configuration tests --- test/test_configuration.rb | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 test/test_configuration.rb diff --git a/test/test_configuration.rb b/test/test_configuration.rb new file mode 100644 index 00000000..27b9f2f7 --- /dev/null +++ b/test/test_configuration.rb @@ -0,0 +1,35 @@ +require_relative '../lib/configuration' + +module Unipept + class ConfigurationTestCase < Unipept::TestCase + def test_load_without_file + config = Configuration.new('no_file') + assert_equal({}, config.config) + end + + def test_load_without_file + hash = { 'key' => 'value' } + File.open('new_file', 'w') { |f| f.write hash.to_yaml } + config = Configuration.new('new_file') + assert_equal(hash, config.config) + end + + def test_save + file_name = 'no_file' + assert(!(File.exist? file_name)) + config = Configuration.new(file_name) + config.config['key'] = 'value' + config.save + assert((File.exist? file_name)) + other_config = Configuration.new(file_name) + assert_equal('value', other_config.config['key']) + end + + def test_assign + config = Configuration.new('no_file') + config['key'] = 'value' + assert_equal('value', config.config['key']) + assert_equal('value', config['key']) + end + end +end From cfb9116d7f92e7f88384794307fa31adf9fdcc80 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Mon, 8 Jun 2015 16:28:47 +0200 Subject: [PATCH 32/64] fix test --- test/test_configuration.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_configuration.rb b/test/test_configuration.rb index 27b9f2f7..0b76535c 100644 --- a/test/test_configuration.rb +++ b/test/test_configuration.rb @@ -7,7 +7,7 @@ def test_load_without_file assert_equal({}, config.config) end - def test_load_without_file + def test_load_with_file hash = { 'key' => 'value' } File.open('new_file', 'w') { |f| f.write hash.to_yaml } config = Configuration.new('new_file') From 78979239e340f7d73a44f6278771c83122ee45f9 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Mon, 8 Jun 2015 16:45:18 +0200 Subject: [PATCH 33/64] add tests for batch order --- test/test_bach_order.rb | 57 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 test/test_bach_order.rb diff --git a/test/test_bach_order.rb b/test/test_bach_order.rb new file mode 100644 index 00000000..acbd4afd --- /dev/null +++ b/test/test_bach_order.rb @@ -0,0 +1,57 @@ +require_relative '../lib/batch_order' + +module Unipept + class BatchOrderTestCase < Unipept::TestCase + def test_single_batch + order = BatchOrder.new + out, _err = capture_io_while do + run_batch(order, [0]) + end + assert_equal(['0', ''].join("\n"), out) + end + + def test_double_batch + order = BatchOrder.new + out, _err = capture_io_while do + run_batch(order, [0, 1]) + end + assert_equal(['0', '1', ''].join("\n"), out) + end + + def test_missing_batch + order = BatchOrder.new + out, _err = capture_io_while do + run_batch(order, [1, 2]) + end + assert_equal('', out) + end + + def test_out_order_batch + order = BatchOrder.new + out, _err = capture_io_while do + run_batch(order, [1, 0]) + end + assert_equal(['0', '1', ''].join("\n"), out) + end + + def test_gap_batch + order = BatchOrder.new + out, _err = capture_io_while do + run_batch(order, [1, 4, 0]) + end + assert_equal(['0', '1', ''].join("\n"), out) + out, _err = capture_io_while do + run_batch(order, [2, 3, 5]) + end + assert_equal(['2', '3', '4', '5', ''].join("\n"), out) + end + + def run_batch(order, list) + list.each do |i| + order.wait(i) do + puts i + end + end + end + end +end From 9f8322598cc5c057cd7460adea97554733ba0547 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Mon, 8 Jun 2015 17:00:32 +0200 Subject: [PATCH 34/64] refactor batch_order --- lib/batch_order.rb | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/lib/batch_order.rb b/lib/batch_order.rb index 824f673a..1110ee80 100644 --- a/lib/batch_order.rb +++ b/lib/batch_order.rb @@ -7,17 +7,13 @@ def initialize @current = 0 end + # Executes block if it's its turn, queues the block in the other case. def wait(i, &block) - if i == @current - # start writing + those who have been waiting as well - block.call + @order[i] = block + return unless i == @current + while order[@current] + order.delete(@current).call @current += 1 - while order[@current] - order.delete(@current).call - @current += 1 - end - else - @order[i] = block end end end From bd374a0e88451decff394fc45d6fcfe1feb26947 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 9 Jun 2015 09:50:53 +0200 Subject: [PATCH 35/64] refactor the unipept command --- bin/unipept | 199 +--------------- lib/commands.rb | 1 + lib/commands/unipept.rb | 215 ++++++++++++++++++ .../unipept}/api_runner.rb | 4 +- .../commands => commands/unipept}/pept2lca.rb | 0 .../unipept}/pept2prot.rb | 0 .../unipept}/pept2taxa.rb | 0 .../commands => commands/unipept}/taxa2lca.rb | 0 .../commands => commands/unipept}/taxonomy.rb | 0 lib/configuration.rb | 2 + lib/formatters.rb | 2 + lib/unipept.rb | 8 - lib/unipept/commands.rb | 5 - 13 files changed, 224 insertions(+), 212 deletions(-) create mode 100644 lib/commands/unipept.rb rename lib/{unipept/commands => commands/unipept}/api_runner.rb (99%) rename lib/{unipept/commands => commands/unipept}/pept2lca.rb (100%) rename lib/{unipept/commands => commands/unipept}/pept2prot.rb (100%) rename lib/{unipept/commands => commands/unipept}/pept2taxa.rb (100%) rename lib/{unipept/commands => commands/unipept}/taxa2lca.rb (100%) rename lib/{unipept/commands => commands/unipept}/taxonomy.rb (100%) delete mode 100644 lib/unipept.rb delete mode 100644 lib/unipept/commands.rb diff --git a/bin/unipept b/bin/unipept index 230e9ca0..4f51dc2f 100755 --- a/bin/unipept +++ b/bin/unipept @@ -1,203 +1,8 @@ #!usr/bin/env ruby - -require 'cri' -require 'typhoeus' -require 'yaml' -require 'json' -require 'fileutils' - -require_relative '../lib/unipept' +require_relative '../lib/commands' # Prevent broken pipe errors Signal.trap('PIPE', 'EXIT') Signal.trap('INT', 'EXIT') -root_cmd = Cri::Command.new_basic_root.modify do - name 'unipept' - summary 'Command line interface to Unipept web services.' - usage 'unipept subcommand [options]' - description <<-EOS - The unipept subcommands are command line wrappers around the Unipept web services. - - Subcommands that start with pept expect a list of tryptic peptides as input. Subcommands that start with tax expect a list of NCBI Taxonomy Identifiers as input. Input is passed - - - as separate command line arguments - - - in one or more text files that are passed as an argument to the -i option - - - to standard input - - The command will give priority to the first way the input is passed, in the order as listed above. Text files and standard input should have one tryptic peptide or one NCBI Taxonomy Identifier per line. - EOS - flag :v, :version, 'displays the version' - flag :q, :quiet, 'disable service messages' - option :i, :input, 'read input from file', argument: :required - option :o, :output, 'write output to file', argument: :required - option :f, :format, "define the output format (available: #{Unipept::Formatter.available.join ', ' }) (default: #{Unipept::Formatter.default})", argument: :required - - # Configuration options - option nil, 'host', 'specify the server running the Unipept web service', argument: :required - - run do |opts, _args, _cmd| - if opts[:version] - puts File.read(File.join(File.dirname(__FILE__), '..', 'VERSION')) - else - root_cmd.run(['help']) - end - end -end - -root_cmd.define_command('config') do - summary 'Set configuration options.' - usage 'config option [value]' - description <<-EOS - Sets or shows the value for configuration options. All settings are stored in the .unipeptrc file in the home directory of the user. - - Running the command with a value will set that value for the given option, running it without will show the current value. - - These options are currently supported: - - - host: Set the default host for api calls. - - Example: "unipept config host http://api.unipept.ugent.be" will set the default host to the public unipept server. - EOS - - run do |_opts, args, _cmd| - config = Unipept::Configuration.new - if args.size > 1 - config[args.first] = args[1] - config.save - elsif args.size == 1 - puts config[args.first] - elsif args.size == 0 - root_cmd.run(['config', '-h']) - end - end -end - -root_cmd.define_command('pept2taxa') do - usage 'pept2taxa [options]' - aliases :pt - summary 'Fetch taxa of Uniprot records that match tryptic peptides.' - description <<-EOS - For each tryptic peptide the unipept pept2taxa command retrieves from Unipept the set of taxa from all Uniprot records whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed - - - as separate command line arguments - - - in one or more text files that are passed as an argument to the -i option - - - to standard input - - The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line. - - The unipept pept2taxa subcommand yields NCBI Taxonomy records as output. - EOS - - flag :e, :equate, 'equate isoleucine (I) and leucine (L) when matching peptides' - flag :a, :all, 'report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.' - option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true - option :x, :xml, 'Download the matched records from the NCBI web service as an xml-formatted file (specify output filename)', argument: :required - - runner Unipept::Commands::Pept2taxa -end - -root_cmd.define_command('pept2lca') do - usage 'pept2lca [options]' - aliases :pl - summary 'Fetch taxonomic lowest common ancestor of Uniprot records that match tryptic peptides.' - description <<-EOS - For each tryptic peptide the unipept pept2lca command retrieves from Unipept the lowest common ancestor of the set of taxa from all Uniprot records whose protein sequence contains an exact matches to the tryptic peptide. The lowest common ancestor is based on the topology of the Unipept Taxonomy -- a cleaned up version of the NCBI Taxonomy -- and is itself a record from the NCBI Taxonomy. The command expects a list of tryptic peptides that are passed - - - as separate command line arguments - - - in one or more text files that are passed as an argument to the -i option - - - to standard input - - The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line. - - The unipept pept2lca subcommand yields an NCBI Taxonomy record as output. - EOS - - flag :e, :equate, 'equate isoleucine (I) and leucine (L) when matching peptides' - flag :a, :all, 'report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.' - option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true - - runner Unipept::Commands::Pept2lca -end - -root_cmd.define_command('taxa2lca') do - usage 'taxa2lca [options]' - aliases :tl - summary 'Compute taxonomic lowest common ancestor for given list of taxa.' - description <<-EOS - The unipept taxa2lca command computes the lowest common ancestor of a given list of NCBI Taxonomy Identifiers. The lowest common ancestor is based on the topology of the Unipept Taxonomy -- a cleaned up version of the NCBI Taxonomy -- and is itself a record from the NCBI Taxonomy. The command expects a list of NCBI Taxonomy Identifiers that are passed - - - as separate command line arguments - - - in one or more text files that are passed as an argument to the -i option - - - to standard input - - The command will give priority to the first way NCBI Taxonomy Identifiers are passed, in the order as listed above. Text files and standard input should have one NCBI Taxonomy Identifier per line. - - The unipept taxonomy subcommand yields NCBI Taxonomy records as output. - EOS - - flag :a, :all, 'report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.' - option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true - - runner Unipept::Commands::Taxa2lca -end - -root_cmd.define_command('pept2prot') do - usage 'pept2prot [options]' - aliases :pp - summary 'Fetch Uniprot records that match tryptic peptides.' - description <<-EOS - For each tryptic peptide the unipept pept2prot command retrieves from Unipept all Uniprot records whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed - - - as separate command line arguments - - - in one or more text files that are passed as an argument to the -i option - - - to standard input - - The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line. - - The unipept pept2prot subcommand yields Uniprot records as output. - EOS - - flag :e, :equate, 'equate isoleucine (I) and leucine (L) when matching peptides' - flag :a, :all, 'report all information fields of Uniprot records available in Unipept. Note that this may have a performance penalty.' - option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true - option :x, :xml, 'download XML-formatted Uniprot records into the specified download-directory. ', argument: :required - - runner Unipept::Commands::Pept2prot -end - -root_cmd.define_command('taxonomy') do - usage 'taxonomy [options]' - aliases :tax - summary 'Fetch taxonomic information from Unipept Taxonomy.' - description <<-EOS - The unipept taxonomy command yields information from the Unipept Taxonomy records for a given list of NCBI Taxonomy Identifiers. The Unipept Taxonomy is a cleaned up version of the NCBI Taxonomy, and its records are also records of the NCBI Taxonomy. The command expects a list of NCBI Taxonomy Identifiers that are passed - - - as separate command line arguments - - - in one or more text files that are passed as an argument to the -i option - - - to standard input - - The command will give priority to the first way NCBI Taxonomy Identifiers are passed, in the order as listed above. Text files and standard input should have one NCBI Taxonomy Identifier per line. - - The unipept taxonomy subcommand yields NCBI Taxonomy records as output. - EOS - - flag :a, :all, 'report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.' - option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true - - runner Unipept::Commands::Taxonomy -end - -root_cmd.run(ARGV) +Unipept::Commands::Unipept.run(ARGV) diff --git a/lib/commands.rb b/lib/commands.rb index 3bd3226c..bf87a4b6 100644 --- a/lib/commands.rb +++ b/lib/commands.rb @@ -5,5 +5,6 @@ module Commands require_relative 'commands/peptfilter' require_relative 'commands/prot2pept' require_relative 'commands/uniprot' + require_relative 'commands/unipept' end end diff --git a/lib/commands/unipept.rb b/lib/commands/unipept.rb new file mode 100644 index 00000000..dcef42f7 --- /dev/null +++ b/lib/commands/unipept.rb @@ -0,0 +1,215 @@ +require 'typhoeus' + +require_relative '../formatters' +require_relative '../configuration' +require_relative '../batch_order' +require_relative '../version' + +require_relative 'unipept/pept2lca' +require_relative 'unipept/pept2prot' +require_relative 'unipept/pept2taxa' +require_relative 'unipept/taxa2lca' +require_relative 'unipept/taxonomy' + +module Unipept + class Commands::Unipept + attr_reader :root_command + + @root_command = Cri::Command.new_basic_root.modify do + name 'unipept' + summary 'Command line interface to Unipept web services.' + usage 'unipept subcommand [options]' + description <<-EOS + The unipept subcommands are command line wrappers around the Unipept web services. + + Subcommands that start with pept expect a list of tryptic peptides as input. Subcommands that start with tax expect a list of NCBI Taxonomy Identifiers as input. Input is passed + + - as separate command line arguments + + - in one or more text files that are passed as an argument to the -i option + + - to standard input + + The command will give priority to the first way the input is passed, in the order as listed above. Text files and standard input should have one tryptic peptide or one NCBI Taxonomy Identifier per line. + EOS + flag :v, :version, 'displays the version' + flag :q, :quiet, 'disable service messages' + option :i, :input, 'read input from file', argument: :required + option :o, :output, 'write output to file', argument: :required + option :f, :format, "define the output format (available: #{Unipept::Formatter.available.join ', ' }) (default: #{Unipept::Formatter.default})", argument: :required + + # Configuration options + option nil, 'host', 'specify the server running the Unipept web service', argument: :required + + run do |opts, _args, _cmd| + if opts[:version] + puts File.read(File.join(File.dirname(__FILE__), '..', 'VERSION')) + else + root_cmd.run(['help']) + end + end + end + + @root_command.define_command('config') do + summary 'Set configuration options.' + usage 'config option [value]' + description <<-EOS + Sets or shows the value for configuration options. All settings are stored in the .unipeptrc file in the home directory of the user. + + Running the command with a value will set that value for the given option, running it without will show the current value. + + These options are currently supported: + + - host: Set the default host for api calls. + + Example: "unipept config host http://api.unipept.ugent.be" will set the default host to the public unipept server. + EOS + + run do |_opts, args, _cmd| + config = Unipept::Configuration.new + if args.size > 1 + config[args.first] = args[1] + config.save + elsif args.size == 1 + puts config[args.first] + elsif args.size == 0 + root_cmd.run(['config', '-h']) + end + end + end + + @root_command.define_command('pept2taxa') do + usage 'pept2taxa [options]' + aliases :pt + summary 'Fetch taxa of Uniprot records that match tryptic peptides.' + description <<-EOS + For each tryptic peptide the unipept pept2taxa command retrieves from Unipept the set of taxa from all Uniprot records whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed + + - as separate command line arguments + + - in one or more text files that are passed as an argument to the -i option + + - to standard input + + The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line. + + The unipept pept2taxa subcommand yields NCBI Taxonomy records as output. + EOS + + flag :e, :equate, 'equate isoleucine (I) and leucine (L) when matching peptides' + flag :a, :all, 'report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.' + option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true + option :x, :xml, 'Download the matched records from the NCBI web service as an xml-formatted file (specify output filename)', argument: :required + + runner Commands::Pept2taxa + end + + @root_command.define_command('pept2lca') do + usage 'pept2lca [options]' + aliases :pl + summary 'Fetch taxonomic lowest common ancestor of Uniprot records that match tryptic peptides.' + description <<-EOS + For each tryptic peptide the unipept pept2lca command retrieves from Unipept the lowest common ancestor of the set of taxa from all Uniprot records whose protein sequence contains an exact matches to the tryptic peptide. The lowest common ancestor is based on the topology of the Unipept Taxonomy -- a cleaned up version of the NCBI Taxonomy -- and is itself a record from the NCBI Taxonomy. The command expects a list of tryptic peptides that are passed + + - as separate command line arguments + + - in one or more text files that are passed as an argument to the -i option + + - to standard input + + The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line. + + The unipept pept2lca subcommand yields an NCBI Taxonomy record as output. + EOS + + flag :e, :equate, 'equate isoleucine (I) and leucine (L) when matching peptides' + flag :a, :all, 'report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.' + option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true + + runner Commands::Pept2lca + end + + @root_command.define_command('taxa2lca') do + usage 'taxa2lca [options]' + aliases :tl + summary 'Compute taxonomic lowest common ancestor for given list of taxa.' + description <<-EOS + The unipept taxa2lca command computes the lowest common ancestor of a given list of NCBI Taxonomy Identifiers. The lowest common ancestor is based on the topology of the Unipept Taxonomy -- a cleaned up version of the NCBI Taxonomy -- and is itself a record from the NCBI Taxonomy. The command expects a list of NCBI Taxonomy Identifiers that are passed + + - as separate command line arguments + + - in one or more text files that are passed as an argument to the -i option + + - to standard input + + The command will give priority to the first way NCBI Taxonomy Identifiers are passed, in the order as listed above. Text files and standard input should have one NCBI Taxonomy Identifier per line. + + The unipept taxonomy subcommand yields NCBI Taxonomy records as output. + EOS + + flag :a, :all, 'report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.' + option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true + + runner Commands::Taxa2lca + end + + @root_command.define_command('pept2prot') do + usage 'pept2prot [options]' + aliases :pp + summary 'Fetch Uniprot records that match tryptic peptides.' + description <<-EOS + For each tryptic peptide the unipept pept2prot command retrieves from Unipept all Uniprot records whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed + + - as separate command line arguments + + - in one or more text files that are passed as an argument to the -i option + + - to standard input + + The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line. + + The unipept pept2prot subcommand yields Uniprot records as output. + EOS + + flag :e, :equate, 'equate isoleucine (I) and leucine (L) when matching peptides' + flag :a, :all, 'report all information fields of Uniprot records available in Unipept. Note that this may have a performance penalty.' + option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true + option :x, :xml, 'download XML-formatted Uniprot records into the specified download-directory. ', argument: :required + + runner Commands::Pept2prot + end + + @root_command.define_command('taxonomy') do + usage 'taxonomy [options]' + aliases :tax + summary 'Fetch taxonomic information from Unipept Taxonomy.' + description <<-EOS + The unipept taxonomy command yields information from the Unipept Taxonomy records for a given list of NCBI Taxonomy Identifiers. The Unipept Taxonomy is a cleaned up version of the NCBI Taxonomy, and its records are also records of the NCBI Taxonomy. The command expects a list of NCBI Taxonomy Identifiers that are passed + + - as separate command line arguments + + - in one or more text files that are passed as an argument to the -i option + + - to standard input + + The command will give priority to the first way NCBI Taxonomy Identifiers are passed, in the order as listed above. Text files and standard input should have one NCBI Taxonomy Identifier per line. + + The unipept taxonomy subcommand yields NCBI Taxonomy records as output. + EOS + + flag :a, :all, 'report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.' + option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true + + runner Commands::Taxonomy + end + + # Invokes the unipept command-line tool with the given arguments. + # + # @param [Array] args An array of command-line arguments + # + # @return [void] + def self.run(args) + @root_command.run(args) + end + end +end diff --git a/lib/unipept/commands/api_runner.rb b/lib/commands/unipept/api_runner.rb similarity index 99% rename from lib/unipept/commands/api_runner.rb rename to lib/commands/unipept/api_runner.rb index f709a61c..1c092b09 100644 --- a/lib/unipept/commands/api_runner.rb +++ b/lib/commands/unipept/api_runner.rb @@ -1,7 +1,7 @@ require 'set' -module Unipept::Commands - class ApiRunner < Cri::CommandRunner +module Unipept + class Commands::ApiRunner < Cri::CommandRunner def initialize(args, opts, cmd) super @configuration = Unipept::Configuration.new diff --git a/lib/unipept/commands/pept2lca.rb b/lib/commands/unipept/pept2lca.rb similarity index 100% rename from lib/unipept/commands/pept2lca.rb rename to lib/commands/unipept/pept2lca.rb diff --git a/lib/unipept/commands/pept2prot.rb b/lib/commands/unipept/pept2prot.rb similarity index 100% rename from lib/unipept/commands/pept2prot.rb rename to lib/commands/unipept/pept2prot.rb diff --git a/lib/unipept/commands/pept2taxa.rb b/lib/commands/unipept/pept2taxa.rb similarity index 100% rename from lib/unipept/commands/pept2taxa.rb rename to lib/commands/unipept/pept2taxa.rb diff --git a/lib/unipept/commands/taxa2lca.rb b/lib/commands/unipept/taxa2lca.rb similarity index 100% rename from lib/unipept/commands/taxa2lca.rb rename to lib/commands/unipept/taxa2lca.rb diff --git a/lib/unipept/commands/taxonomy.rb b/lib/commands/unipept/taxonomy.rb similarity index 100% rename from lib/unipept/commands/taxonomy.rb rename to lib/commands/unipept/taxonomy.rb diff --git a/lib/configuration.rb b/lib/configuration.rb index 3efcdc69..447d77f0 100644 --- a/lib/configuration.rb +++ b/lib/configuration.rb @@ -1,3 +1,5 @@ +require 'yaml' + module Unipept class Configuration attr_reader :config diff --git a/lib/formatters.rb b/lib/formatters.rb index 9f9017f1..58add798 100644 --- a/lib/formatters.rb +++ b/lib/formatters.rb @@ -1,3 +1,5 @@ +require 'json' + module Unipept class Formatter # The Hash of available formatters diff --git a/lib/unipept.rb b/lib/unipept.rb deleted file mode 100644 index d0a2af67..00000000 --- a/lib/unipept.rb +++ /dev/null @@ -1,8 +0,0 @@ -require_relative 'formatters' -require_relative 'configuration' -require_relative 'batch_order' -require_relative 'unipept/commands' -require_relative 'version' - -module Unipept -end diff --git a/lib/unipept/commands.rb b/lib/unipept/commands.rb deleted file mode 100644 index 0c08ddfd..00000000 --- a/lib/unipept/commands.rb +++ /dev/null @@ -1,5 +0,0 @@ -%w(pept2lca pept2taxa pept2prot taxa2lca taxonomy).each do |cmd| - require_relative File.join('commands', cmd) -end -module Unipept::Commands -end From 9cf70b00b42ca7907ef92f5231f7648c4fc6e000 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 9 Jun 2015 11:00:15 +0200 Subject: [PATCH 36/64] more refactoring --- lib/commands/unipept.rb | 276 ++++++++++++++++++++++------------------ 1 file changed, 151 insertions(+), 125 deletions(-) diff --git a/lib/commands/unipept.rb b/lib/commands/unipept.rb index dcef42f7..f51c7199 100644 --- a/lib/commands/unipept.rb +++ b/lib/commands/unipept.rb @@ -13,194 +13,220 @@ module Unipept class Commands::Unipept - attr_reader :root_command + def initialize + @root_command = create_root_command + add_config_command + add_pept2taxa_command + add_pept2lca_command + add_taxa2lca_command + add_pept2prot_command + add_taxonomy_command + end - @root_command = Cri::Command.new_basic_root.modify do - name 'unipept' - summary 'Command line interface to Unipept web services.' - usage 'unipept subcommand [options]' - description <<-EOS - The unipept subcommands are command line wrappers around the Unipept web services. + def run(args) + @root_command.run(args) + end - Subcommands that start with pept expect a list of tryptic peptides as input. Subcommands that start with tax expect a list of NCBI Taxonomy Identifiers as input. Input is passed + def create_root_command + Cri::Command.new_basic_root.modify do + name 'unipept' + summary 'Command line interface to Unipept web services.' + usage 'unipept subcommand [options]' + description <<-EOS + The unipept subcommands are command line wrappers around the Unipept web services. - - as separate command line arguments + Subcommands that start with pept expect a list of tryptic peptides as input. Subcommands that start with tax expect a list of NCBI Taxonomy Identifiers as input. Input is passed - - in one or more text files that are passed as an argument to the -i option + - as separate command line arguments - - to standard input + - in one or more text files that are passed as an argument to the -i option - The command will give priority to the first way the input is passed, in the order as listed above. Text files and standard input should have one tryptic peptide or one NCBI Taxonomy Identifier per line. - EOS - flag :v, :version, 'displays the version' - flag :q, :quiet, 'disable service messages' - option :i, :input, 'read input from file', argument: :required - option :o, :output, 'write output to file', argument: :required - option :f, :format, "define the output format (available: #{Unipept::Formatter.available.join ', ' }) (default: #{Unipept::Formatter.default})", argument: :required + - to standard input - # Configuration options - option nil, 'host', 'specify the server running the Unipept web service', argument: :required + The command will give priority to the first way the input is passed, in the order as listed above. Text files and standard input should have one tryptic peptide or one NCBI Taxonomy Identifier per line. + EOS + flag :v, :version, 'displays the version' + flag :q, :quiet, 'disable service messages' + option :i, :input, 'read input from file', argument: :required + option :o, :output, 'write output to file', argument: :required + option :f, :format, "define the output format (available: #{Unipept::Formatter.available.join ', ' }) (default: #{Unipept::Formatter.default})", argument: :required - run do |opts, _args, _cmd| - if opts[:version] - puts File.read(File.join(File.dirname(__FILE__), '..', 'VERSION')) - else - root_cmd.run(['help']) + # Configuration options + option nil, 'host', 'specify the server running the Unipept web service', argument: :required + + run do |opts, _args, _cmd| + if opts[:version] + puts File.read(File.join(File.dirname(__FILE__), '..', 'VERSION')) + else + root_cmd.run(['help']) + end end end end - @root_command.define_command('config') do - summary 'Set configuration options.' - usage 'config option [value]' - description <<-EOS - Sets or shows the value for configuration options. All settings are stored in the .unipeptrc file in the home directory of the user. + def add_config_command + @root_command.define_command('config') do + summary 'Set configuration options.' + usage 'config option [value]' + description <<-EOS + Sets or shows the value for configuration options. All settings are stored in the .unipeptrc file in the home directory of the user. - Running the command with a value will set that value for the given option, running it without will show the current value. + Running the command with a value will set that value for the given option, running it without will show the current value. - These options are currently supported: + These options are currently supported: - - host: Set the default host for api calls. + - host: Set the default host for api calls. - Example: "unipept config host http://api.unipept.ugent.be" will set the default host to the public unipept server. - EOS + Example: "unipept config host http://api.unipept.ugent.be" will set the default host to the public unipept server. + EOS - run do |_opts, args, _cmd| - config = Unipept::Configuration.new - if args.size > 1 - config[args.first] = args[1] - config.save - elsif args.size == 1 - puts config[args.first] - elsif args.size == 0 - root_cmd.run(['config', '-h']) + run do |_opts, args, _cmd| + config = Unipept::Configuration.new + if args.size > 1 + config[args.first] = args[1] + config.save + elsif args.size == 1 + puts config[args.first] + elsif args.size == 0 + root_cmd.run(['config', '-h']) + end end end end - @root_command.define_command('pept2taxa') do - usage 'pept2taxa [options]' - aliases :pt - summary 'Fetch taxa of Uniprot records that match tryptic peptides.' - description <<-EOS - For each tryptic peptide the unipept pept2taxa command retrieves from Unipept the set of taxa from all Uniprot records whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed + def add_pept2taxa_command + @root_command.define_command('pept2taxa') do + usage 'pept2taxa [options]' + aliases :pt + summary 'Fetch taxa of Uniprot records that match tryptic peptides.' + description <<-EOS + For each tryptic peptide the unipept pept2taxa command retrieves from Unipept the set of taxa from all Uniprot records whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed - - as separate command line arguments + - as separate command line arguments - - in one or more text files that are passed as an argument to the -i option + - in one or more text files that are passed as an argument to the -i option - - to standard input + - to standard input - The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line. + The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line. - The unipept pept2taxa subcommand yields NCBI Taxonomy records as output. - EOS + The unipept pept2taxa subcommand yields NCBI Taxonomy records as output. + EOS - flag :e, :equate, 'equate isoleucine (I) and leucine (L) when matching peptides' - flag :a, :all, 'report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.' - option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true - option :x, :xml, 'Download the matched records from the NCBI web service as an xml-formatted file (specify output filename)', argument: :required + flag :e, :equate, 'equate isoleucine (I) and leucine (L) when matching peptides' + flag :a, :all, 'report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.' + option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true + option :x, :xml, 'Download the matched records from the NCBI web service as an xml-formatted file (specify output filename)', argument: :required - runner Commands::Pept2taxa + runner Commands::Pept2taxa + end end - @root_command.define_command('pept2lca') do - usage 'pept2lca [options]' - aliases :pl - summary 'Fetch taxonomic lowest common ancestor of Uniprot records that match tryptic peptides.' - description <<-EOS - For each tryptic peptide the unipept pept2lca command retrieves from Unipept the lowest common ancestor of the set of taxa from all Uniprot records whose protein sequence contains an exact matches to the tryptic peptide. The lowest common ancestor is based on the topology of the Unipept Taxonomy -- a cleaned up version of the NCBI Taxonomy -- and is itself a record from the NCBI Taxonomy. The command expects a list of tryptic peptides that are passed + def add_pept2lca_command + @root_command.define_command('pept2lca') do + usage 'pept2lca [options]' + aliases :pl + summary 'Fetch taxonomic lowest common ancestor of Uniprot records that match tryptic peptides.' + description <<-EOS + For each tryptic peptide the unipept pept2lca command retrieves from Unipept the lowest common ancestor of the set of taxa from all Uniprot records whose protein sequence contains an exact matches to the tryptic peptide. The lowest common ancestor is based on the topology of the Unipept Taxonomy -- a cleaned up version of the NCBI Taxonomy -- and is itself a record from the NCBI Taxonomy. The command expects a list of tryptic peptides that are passed - - as separate command line arguments + - as separate command line arguments - - in one or more text files that are passed as an argument to the -i option + - in one or more text files that are passed as an argument to the -i option - - to standard input + - to standard input - The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line. + The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line. - The unipept pept2lca subcommand yields an NCBI Taxonomy record as output. - EOS + The unipept pept2lca subcommand yields an NCBI Taxonomy record as output. + EOS - flag :e, :equate, 'equate isoleucine (I) and leucine (L) when matching peptides' - flag :a, :all, 'report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.' - option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true + flag :e, :equate, 'equate isoleucine (I) and leucine (L) when matching peptides' + flag :a, :all, 'report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.' + option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true - runner Commands::Pept2lca + runner Commands::Pept2lca + end end - @root_command.define_command('taxa2lca') do - usage 'taxa2lca [options]' - aliases :tl - summary 'Compute taxonomic lowest common ancestor for given list of taxa.' - description <<-EOS - The unipept taxa2lca command computes the lowest common ancestor of a given list of NCBI Taxonomy Identifiers. The lowest common ancestor is based on the topology of the Unipept Taxonomy -- a cleaned up version of the NCBI Taxonomy -- and is itself a record from the NCBI Taxonomy. The command expects a list of NCBI Taxonomy Identifiers that are passed + def add_taxa2lca_command + @root_command.define_command('taxa2lca') do + usage 'taxa2lca [options]' + aliases :tl + summary 'Compute taxonomic lowest common ancestor for given list of taxa.' + description <<-EOS + The unipept taxa2lca command computes the lowest common ancestor of a given list of NCBI Taxonomy Identifiers. The lowest common ancestor is based on the topology of the Unipept Taxonomy -- a cleaned up version of the NCBI Taxonomy -- and is itself a record from the NCBI Taxonomy. The command expects a list of NCBI Taxonomy Identifiers that are passed - - as separate command line arguments + - as separate command line arguments - - in one or more text files that are passed as an argument to the -i option + - in one or more text files that are passed as an argument to the -i option - - to standard input + - to standard input - The command will give priority to the first way NCBI Taxonomy Identifiers are passed, in the order as listed above. Text files and standard input should have one NCBI Taxonomy Identifier per line. + The command will give priority to the first way NCBI Taxonomy Identifiers are passed, in the order as listed above. Text files and standard input should have one NCBI Taxonomy Identifier per line. - The unipept taxonomy subcommand yields NCBI Taxonomy records as output. - EOS + The unipept taxonomy subcommand yields NCBI Taxonomy records as output. + EOS - flag :a, :all, 'report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.' - option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true + flag :a, :all, 'report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.' + option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true - runner Commands::Taxa2lca + runner Commands::Taxa2lca + end end - @root_command.define_command('pept2prot') do - usage 'pept2prot [options]' - aliases :pp - summary 'Fetch Uniprot records that match tryptic peptides.' - description <<-EOS - For each tryptic peptide the unipept pept2prot command retrieves from Unipept all Uniprot records whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed + def add_pept2prot_command + @root_command.define_command('pept2prot') do + usage 'pept2prot [options]' + aliases :pp + summary 'Fetch Uniprot records that match tryptic peptides.' + description <<-EOS + For each tryptic peptide the unipept pept2prot command retrieves from Unipept all Uniprot records whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed - - as separate command line arguments + - as separate command line arguments - - in one or more text files that are passed as an argument to the -i option + - in one or more text files that are passed as an argument to the -i option - - to standard input + - to standard input - The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line. + The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line. - The unipept pept2prot subcommand yields Uniprot records as output. - EOS + The unipept pept2prot subcommand yields Uniprot records as output. + EOS - flag :e, :equate, 'equate isoleucine (I) and leucine (L) when matching peptides' - flag :a, :all, 'report all information fields of Uniprot records available in Unipept. Note that this may have a performance penalty.' - option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true - option :x, :xml, 'download XML-formatted Uniprot records into the specified download-directory. ', argument: :required + flag :e, :equate, 'equate isoleucine (I) and leucine (L) when matching peptides' + flag :a, :all, 'report all information fields of Uniprot records available in Unipept. Note that this may have a performance penalty.' + option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true + option :x, :xml, 'download XML-formatted Uniprot records into the specified download-directory. ', argument: :required - runner Commands::Pept2prot + runner Commands::Pept2prot + end end - @root_command.define_command('taxonomy') do - usage 'taxonomy [options]' - aliases :tax - summary 'Fetch taxonomic information from Unipept Taxonomy.' - description <<-EOS - The unipept taxonomy command yields information from the Unipept Taxonomy records for a given list of NCBI Taxonomy Identifiers. The Unipept Taxonomy is a cleaned up version of the NCBI Taxonomy, and its records are also records of the NCBI Taxonomy. The command expects a list of NCBI Taxonomy Identifiers that are passed + def add_taxonomy_command + @root_command.define_command('taxonomy') do + usage 'taxonomy [options]' + aliases :tax + summary 'Fetch taxonomic information from Unipept Taxonomy.' + description <<-EOS + The unipept taxonomy command yields information from the Unipept Taxonomy records for a given list of NCBI Taxonomy Identifiers. The Unipept Taxonomy is a cleaned up version of the NCBI Taxonomy, and its records are also records of the NCBI Taxonomy. The command expects a list of NCBI Taxonomy Identifiers that are passed - - as separate command line arguments + - as separate command line arguments - - in one or more text files that are passed as an argument to the -i option + - in one or more text files that are passed as an argument to the -i option - - to standard input + - to standard input - The command will give priority to the first way NCBI Taxonomy Identifiers are passed, in the order as listed above. Text files and standard input should have one NCBI Taxonomy Identifier per line. + The command will give priority to the first way NCBI Taxonomy Identifiers are passed, in the order as listed above. Text files and standard input should have one NCBI Taxonomy Identifier per line. - The unipept taxonomy subcommand yields NCBI Taxonomy records as output. - EOS + The unipept taxonomy subcommand yields NCBI Taxonomy records as output. + EOS - flag :a, :all, 'report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.' - option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true + flag :a, :all, 'report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.' + option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true - runner Commands::Taxonomy + runner Commands::Taxonomy + end end # Invokes the unipept command-line tool with the given arguments. @@ -209,7 +235,7 @@ class Commands::Unipept # # @return [void] def self.run(args) - @root_command.run(args) + new.run(args) end end end From ab74789affff2fb1733f00f5451e1f3b68f5ec2a Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 9 Jun 2015 11:14:04 +0200 Subject: [PATCH 37/64] remove aliases --- lib/commands/unipept.rb | 5 ----- 1 file changed, 5 deletions(-) diff --git a/lib/commands/unipept.rb b/lib/commands/unipept.rb index f51c7199..a1a7650c 100644 --- a/lib/commands/unipept.rb +++ b/lib/commands/unipept.rb @@ -97,7 +97,6 @@ def add_config_command def add_pept2taxa_command @root_command.define_command('pept2taxa') do usage 'pept2taxa [options]' - aliases :pt summary 'Fetch taxa of Uniprot records that match tryptic peptides.' description <<-EOS For each tryptic peptide the unipept pept2taxa command retrieves from Unipept the set of taxa from all Uniprot records whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed @@ -125,7 +124,6 @@ def add_pept2taxa_command def add_pept2lca_command @root_command.define_command('pept2lca') do usage 'pept2lca [options]' - aliases :pl summary 'Fetch taxonomic lowest common ancestor of Uniprot records that match tryptic peptides.' description <<-EOS For each tryptic peptide the unipept pept2lca command retrieves from Unipept the lowest common ancestor of the set of taxa from all Uniprot records whose protein sequence contains an exact matches to the tryptic peptide. The lowest common ancestor is based on the topology of the Unipept Taxonomy -- a cleaned up version of the NCBI Taxonomy -- and is itself a record from the NCBI Taxonomy. The command expects a list of tryptic peptides that are passed @@ -152,7 +150,6 @@ def add_pept2lca_command def add_taxa2lca_command @root_command.define_command('taxa2lca') do usage 'taxa2lca [options]' - aliases :tl summary 'Compute taxonomic lowest common ancestor for given list of taxa.' description <<-EOS The unipept taxa2lca command computes the lowest common ancestor of a given list of NCBI Taxonomy Identifiers. The lowest common ancestor is based on the topology of the Unipept Taxonomy -- a cleaned up version of the NCBI Taxonomy -- and is itself a record from the NCBI Taxonomy. The command expects a list of NCBI Taxonomy Identifiers that are passed @@ -178,7 +175,6 @@ def add_taxa2lca_command def add_pept2prot_command @root_command.define_command('pept2prot') do usage 'pept2prot [options]' - aliases :pp summary 'Fetch Uniprot records that match tryptic peptides.' description <<-EOS For each tryptic peptide the unipept pept2prot command retrieves from Unipept all Uniprot records whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed @@ -206,7 +202,6 @@ def add_pept2prot_command def add_taxonomy_command @root_command.define_command('taxonomy') do usage 'taxonomy [options]' - aliases :tax summary 'Fetch taxonomic information from Unipept Taxonomy.' description <<-EOS The unipept taxonomy command yields information from the Unipept Taxonomy records for a given list of NCBI Taxonomy Identifiers. The Unipept Taxonomy is a cleaned up version of the NCBI Taxonomy, and its records are also records of the NCBI Taxonomy. The command expects a list of NCBI Taxonomy Identifiers that are passed From 6df0e316952730e1a1e23c3b09795d138191e3b7 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 9 Jun 2015 11:27:35 +0200 Subject: [PATCH 38/64] fix help --- lib/commands/unipept.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/commands/unipept.rb b/lib/commands/unipept.rb index a1a7650c..16608359 100644 --- a/lib/commands/unipept.rb +++ b/lib/commands/unipept.rb @@ -58,7 +58,7 @@ def create_root_command if opts[:version] puts File.read(File.join(File.dirname(__FILE__), '..', 'VERSION')) else - root_cmd.run(['help']) + Commands::Unipept.run(['help']) end end end @@ -88,7 +88,7 @@ def add_config_command elsif args.size == 1 puts config[args.first] elsif args.size == 0 - root_cmd.run(['config', '-h']) + Commands::Unipept.run(['config', '-h']) end end end From c0e27c605894ca98790b9b02aa6c8330b563d264 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 9 Jun 2015 11:43:56 +0200 Subject: [PATCH 39/64] refactor the config command --- lib/commands/unipept.rb | 17 ++++------------- lib/commands/unipept/config.rb | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+), 13 deletions(-) create mode 100644 lib/commands/unipept/config.rb diff --git a/lib/commands/unipept.rb b/lib/commands/unipept.rb index 16608359..3170db02 100644 --- a/lib/commands/unipept.rb +++ b/lib/commands/unipept.rb @@ -5,6 +5,7 @@ require_relative '../batch_order' require_relative '../version' +require_relative 'unipept/config' require_relative 'unipept/pept2lca' require_relative 'unipept/pept2prot' require_relative 'unipept/pept2taxa' @@ -54,11 +55,11 @@ def create_root_command # Configuration options option nil, 'host', 'specify the server running the Unipept web service', argument: :required - run do |opts, _args, _cmd| + run do |opts, _args, cmd| if opts[:version] puts File.read(File.join(File.dirname(__FILE__), '..', 'VERSION')) else - Commands::Unipept.run(['help']) + puts cmd.help end end end @@ -80,17 +81,7 @@ def add_config_command Example: "unipept config host http://api.unipept.ugent.be" will set the default host to the public unipept server. EOS - run do |_opts, args, _cmd| - config = Unipept::Configuration.new - if args.size > 1 - config[args.first] = args[1] - config.save - elsif args.size == 1 - puts config[args.first] - elsif args.size == 0 - Commands::Unipept.run(['config', '-h']) - end - end + runner Commands::Config end end diff --git a/lib/commands/unipept/config.rb b/lib/commands/unipept/config.rb new file mode 100644 index 00000000..aaf55111 --- /dev/null +++ b/lib/commands/unipept/config.rb @@ -0,0 +1,18 @@ +module Unipept + class Commands::Config < Cri::CommandRunner + def run + config = Unipept::Configuration.new + key = arguments[0] + value = arguments[1] + if arguments.size == 2 + config[key] = value + config.save + puts key + ' was set to ' + value + elsif arguments.size == 1 + puts config[key] + else + puts command.help + end + end + end +end From 45d6a7bd74a4005b02f6bc23db59f7a76441ebed Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 9 Jun 2015 13:28:14 +0200 Subject: [PATCH 40/64] refactor config --- lib/commands/unipept.rb | 2 +- lib/commands/unipept/config.rb | 29 +++++++++++++++++++++-------- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/lib/commands/unipept.rb b/lib/commands/unipept.rb index 3170db02..bf332aa5 100644 --- a/lib/commands/unipept.rb +++ b/lib/commands/unipept.rb @@ -59,7 +59,7 @@ def create_root_command if opts[:version] puts File.read(File.join(File.dirname(__FILE__), '..', 'VERSION')) else - puts cmd.help + abort cmd.help end end end diff --git a/lib/commands/unipept/config.rb b/lib/commands/unipept/config.rb index aaf55111..90dacaf3 100644 --- a/lib/commands/unipept/config.rb +++ b/lib/commands/unipept/config.rb @@ -1,18 +1,31 @@ module Unipept class Commands::Config < Cri::CommandRunner def run - config = Unipept::Configuration.new - key = arguments[0] - value = arguments[1] + if arguments.size == 0 || arguments.size > 2 + abort command.help + end + + key, value = *arguments + if arguments.size == 2 - config[key] = value - config.save + set_config(key, value) puts key + ' was set to ' + value - elsif arguments.size == 1 - puts config[key] else - puts command.help + puts get_config(key) end end + + def config + Unipept::Configuration.new + end + + def set_config(key, value) + config[key] = value + config.save + end + + def get_config(key) + config[key] + end end end From 04e6019eca9fd552cd969afe8e6b5d4763f1bdfc Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 9 Jun 2015 13:28:31 +0200 Subject: [PATCH 41/64] add tests for unipept --- test/commands/test_unipept.rb | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 test/commands/test_unipept.rb diff --git a/test/commands/test_unipept.rb b/test/commands/test_unipept.rb new file mode 100644 index 00000000..232dc538 --- /dev/null +++ b/test/commands/test_unipept.rb @@ -0,0 +1,30 @@ +require_relative '../../lib/commands' + +module Unipept + class UnipeptTestCase < Unipept::TestCase + def test_help + out, _err = capture_io_while do + assert_raises SystemExit do + Commands::Unipept.run(%w(-h)) + end + end + assert(out.include? 'show help for this command') + + out, _err = capture_io_while do + assert_raises SystemExit do + Commands::Unipept.run(%w(--help)) + end + end + assert(out.include? 'show help for this command') + end + + def test_no_valid_subcommand + _out, err = capture_io_while do + assert_raises SystemExit do + Commands::Unipept.run(%w()) + end + end + assert(err.include? 'show help for this command') + end + end +end From 3b21929abe933d4e032fc3556b3533805f0c1767 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 9 Jun 2015 14:09:29 +0200 Subject: [PATCH 42/64] add tests for unipept config --- lib/commands/unipept/config.rb | 3 +- lib/configuration.rb | 5 +++ test/commands/unipept/test_config.rb | 64 ++++++++++++++++++++++++++++ test/test_configuration.rb | 8 ++++ 4 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 test/commands/unipept/test_config.rb diff --git a/lib/commands/unipept/config.rb b/lib/commands/unipept/config.rb index 90dacaf3..f942d36c 100644 --- a/lib/commands/unipept/config.rb +++ b/lib/commands/unipept/config.rb @@ -16,7 +16,8 @@ def run end def config - Unipept::Configuration.new + @config = Unipept::Configuration.new unless @config + @config end def set_config(key, value) diff --git a/lib/configuration.rb b/lib/configuration.rb index 447d77f0..e72dffa6 100644 --- a/lib/configuration.rb +++ b/lib/configuration.rb @@ -27,6 +27,11 @@ def save File.open(file_name, 'w') { |f| f.write config.to_yaml } end + # Deletes a key + def delete(key) + config.delete(key) + end + # forwards [] to the internal config hash def [](*args) config.[](*args) diff --git a/test/commands/unipept/test_config.rb b/test/commands/unipept/test_config.rb new file mode 100644 index 00000000..97c009b0 --- /dev/null +++ b/test/commands/unipept/test_config.rb @@ -0,0 +1,64 @@ +require_relative '../../../lib/commands' + +module Unipept + class UnipeptConfigTestCase < Unipept::TestCase + def test_help + out, _err = capture_io_while do + assert_raises SystemExit do + Commands::Unipept.run(%w(config -h)) + end + end + assert(out.include? 'show help for this command') + + out, _err = capture_io_while do + assert_raises SystemExit do + Commands::Unipept.run(%w(config --help)) + end + end + assert(out.include? 'show help for this command') + end + + def test_no_args + _out, err = capture_io_while do + assert_raises SystemExit do + Commands::Unipept.run(%w(config)) + end + end + assert(err.include? 'show help for this command') + end + + def test_too_many_args + _out, err = capture_io_while do + assert_raises SystemExit do + Commands::Unipept.run(%w(config a b c)) + end + end + assert(err.include? 'show help for this command') + end + + def test_setting_config + value = Random.rand.to_s + config = Unipept::Configuration.new + config.delete('test') + config.save + out, _err = capture_io_while do + Commands::Unipept.run(['config', 'test', value]) + end + assert_equal('test was set to ' + value, out.chomp) + assert_equal(value, Unipept::Configuration.new['test']) + end + + def test_getting_config + value = Random.rand.to_s + config = Unipept::Configuration.new + config['test'] = value + config.save + out, _err = capture_io_while do + Commands::Unipept.run(%w(config test)) + end + config.delete('test') + config.save + assert_equal(value, out.chomp) + end + end +end diff --git a/test/test_configuration.rb b/test/test_configuration.rb index 0b76535c..ec912aa7 100644 --- a/test/test_configuration.rb +++ b/test/test_configuration.rb @@ -31,5 +31,13 @@ def test_assign assert_equal('value', config.config['key']) assert_equal('value', config['key']) end + + def test_delete + config = Configuration.new('no_file') + config['key'] = 'value' + assert_equal('value', config['key']) + config.delete('key') + assert_equal(nil, config['key']) + end end end From a8643eef27dc1694d98a656b0983f88af1081401 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 9 Jun 2015 14:40:42 +0200 Subject: [PATCH 43/64] test version --- lib/commands/unipept.rb | 2 +- lib/commands/unipept/config.rb | 3 +-- lib/version.rb | 2 +- test/commands/test_unipept.rb | 7 +++++++ 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/lib/commands/unipept.rb b/lib/commands/unipept.rb index bf332aa5..1876a099 100644 --- a/lib/commands/unipept.rb +++ b/lib/commands/unipept.rb @@ -57,7 +57,7 @@ def create_root_command run do |opts, _args, cmd| if opts[:version] - puts File.read(File.join(File.dirname(__FILE__), '..', 'VERSION')) + puts Unipept::VERSION else abort cmd.help end diff --git a/lib/commands/unipept/config.rb b/lib/commands/unipept/config.rb index f942d36c..e547f0a6 100644 --- a/lib/commands/unipept/config.rb +++ b/lib/commands/unipept/config.rb @@ -16,8 +16,7 @@ def run end def config - @config = Unipept::Configuration.new unless @config - @config + @config ||= Unipept::Configuration.new end def set_config(key, value) diff --git a/lib/version.rb b/lib/version.rb index 1182c3bf..2986a649 100644 --- a/lib/version.rb +++ b/lib/version.rb @@ -1,3 +1,3 @@ module Unipept - VERSION = File.read(File.join(File.dirname(__FILE__), '..', 'VERSION')).strip + VERSION = File.read(File.join(File.dirname(__FILE__), '..', 'VERSION')).chomp end diff --git a/test/commands/test_unipept.rb b/test/commands/test_unipept.rb index 232dc538..992aa73b 100644 --- a/test/commands/test_unipept.rb +++ b/test/commands/test_unipept.rb @@ -26,5 +26,12 @@ def test_no_valid_subcommand end assert(err.include? 'show help for this command') end + + def test_version + out, _err = capture_io_while do + Commands::Unipept.run(%w(-v)) + end + assert_equal(VERSION, out.chomp) + end end end From 60288cebce0735c48727e7d7a98fdc76d8211887 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 9 Jun 2015 17:59:16 +0200 Subject: [PATCH 44/64] refactor API runner --- lib/commands/unipept/api_runner.rb | 198 ++++++++++++----------------- 1 file changed, 82 insertions(+), 116 deletions(-) diff --git a/lib/commands/unipept/api_runner.rb b/lib/commands/unipept/api_runner.rb index 1c092b09..adc493b9 100644 --- a/lib/commands/unipept/api_runner.rb +++ b/lib/commands/unipept/api_runner.rb @@ -7,163 +7,141 @@ def initialize(args, opts, cmd) @configuration = Unipept::Configuration.new set_configuration - @user_agent = 'Unipept CLI - unipept ' + Unipept::VERSION - @url = "#{@host}/api/v1/#{cmd.name}.json" @message_url = "#{@host}/api/v1/messages.json" end + # Sets the configurable options of the command line app: + # - the host + # - the user agent def set_configuration + @host = get_host + @user_agent = 'Unipept CLI - unipept ' + Unipept::VERSION + end + + # Returns the host. If a value is defined by both an option and the config + # file, the value of the option is used. + def get_host # find host in opts first - if options[:host] - host = options[:host] - else - host = @configuration['host'] - end + host = options[:host] ? options[:host] : @configuration['host'] # No host has been set? if host.nil? || host.empty? - puts 'WARNING: no host has been set, you can set the host with `unipept config host http://localhost:3000/`' - exit 1 - end - unless host.start_with? 'http://' - host = "http://#{host}" + abort 'WARNING: no host has been set, you can set the host with `unipept config host http://api.unipept.ugent.be:3000/`' end - @host = host - end - - def input_iterator - # Argument over file input over stdin - if !arguments.empty? - arguments.each + # add http:// if needed + if host.start_with?('http://') || host.start_with?('https://') + host else - if options[:input] - IO.foreach(options[:input]) - else - STDIN.each_line - end + "http://#{host}" end end + # Returns an input iterator to use for the request. + # - if arguments are given, uses arguments + # - if the input file option is given, uses file input + # - if none of the previous are given, uses stdin + def get_input_iterator + return arguments.each unless arguments.empty? + return IO.foreach(options[:input]) if options[:input] + STDIN.each_line + end + + # Returns the default batch_size of a command. def batch_size 100 end - def url_options(sub_part) - filter = options[:select] ? options[:select] : [] - if filter.empty? - names = true - else - names = filter.any? { |f| /.*name.*/.match f } - end - { input: sub_part, + # Constructs a request body (a Hash) for set of input strings, using the + # options supplied by the user. + def get_request_body(input, selected_fields) + names = selected_fields.empty? || selected_fields.any? { |f| /.*name.*/.match f } + { input: input, equate_il: options[:equate], extra: options[:all], - names: names + names: options[:all] && names } end - def get_server_message + # Checks if the server has a message and prints it if not empty. + # We will only check this once a day and won't print anything if the quiet + # option is set or if we output to a file. + def print_server_message return if options[:quiet] return unless STDOUT.tty? - last_fetched = @configuration['last_fetch_date'] - return unless last_fetched.nil? || (last_fetched + 60 * 60 * 24) < Time.now - version = Unipept::VERSION - resp = Typhoeus.get(@message_url, params: { version: version }) - puts resp.body unless resp.body.chomp.empty? + return if recently_fetched? @configuration['last_fetch_date'] = Time.now @configuration.save + resp = Typhoeus.get(@message_url, params: { version: Unipept::VERSION }).body.chomp + puts resp unless resp.empty? end - def run - get_server_message + # Returns true if the last check for a server message was less than a day + # ago. + def recently_fetched? + last_fetched = @configuration['last_fetch_date'] + !last_fetched.nil? && (last_fetched + 60 * 60 * 24) > Time.now + end + def run + print_server_message + hydra = Typhoeus::Hydra.new(max_concurrency: 10) formatter = Unipept::Formatter.new_for_format(options[:format]) - peptides = input_iterator - - filter_list = options[:select] ? options[:select] : [] - # Parse filter list: convert to regex and split on commas - filter_list = filter_list.map { |f| f.include?(',') ? f.split(',') : f }.flatten.map { |f| glob_to_regex(f) } - batch_order = Unipept::BatchOrder.new - printed_header = false - result = [] - - hydra = Typhoeus::Hydra.new(max_concurrency: 10) - num_req = 0 + input = get_input_iterator + selected_fields = options[:select] ? options[:select] : [] + selected_fields = selected_fields.map { |f| f.include?(',') ? f.split(',') : f }.flatten.map { |f| glob_to_regex(f) } - peptide_iterator(peptides) do |sub_division, i, fasta_input| + line_iterator(input) do |input_slice, batch_id, fasta_input| request = Typhoeus::Request.new( @url, method: :post, - body: url_options(sub_division), + body: get_request_body(input_slice, selected_fields), accept_encoding: 'gzip', headers: { 'User-Agent' => @user_agent } ) request.on_complete do |resp| if resp.success? - # if JSON parsing goes wrong - sub_result = JSON[resp.response_body] rescue [] - sub_result = [sub_result] unless sub_result.is_a? Array - - sub_result.map! { |r| r.select! { |k, _v| filter_list.any? { |f| f.match k } } } unless filter_list.empty? - - if options[:xml] - result << sub_result - end + result = JSON[resp.response_body] rescue [] + result = [result] unless result.is_a? Array + result.map! { |r| r.select! { |k, _v| selected_fields.any? { |f| f.match k } } } unless selected_fields.empty? # wait till it's our turn to write - batch_order.wait(i) do - unless sub_result.empty? - unless printed_header - write_to_output formatter.header(sub_result, fasta_input) - printed_header = true - end - write_to_output formatter.format(sub_result, fasta_input) + batch_order.wait(batch_id) do + unless result.empty? + write_to_output formatter.header(result, fasta_input) if batch_id == 0 + write_to_output formatter.format(result, fasta_input) end end elsif resp.timed_out? - - batch_order.wait(i) do + batch_order.wait(batch_id) do $stderr.puts 'request timed out, continuing anyway, but results might be incomplete' save_error('request timed out, continuing anyway, but results might be incomplete') end - elsif resp.code == 0 - - batch_order.wait(i) do + batch_order.wait(batch_id) do $stderr.puts 'could not get an http response, continuing anyway, but results might be incomplete' save_error(resp.return_message) end - else - - batch_order.wait(i) do + batch_order.wait(batch_id) do $stderr.puts "received a non-successful http response #{resp.code}, continuing anyway, but results might be incomplete" save_error("Got #{resp.code}: #{resp.response_body}\nRequest headers: #{resp.request.options}\nRequest body:\n#{resp.request.encoded_body}\n\n") end - end end hydra.queue request - num_req += 1 - if num_req % 200 == 0 + if batch_id % 200 == 0 hydra.run end end hydra.run - - begin - download_xml(result) - rescue - STDERR.puts 'Something went wrong while downloading xml information! please check the output' - end end def save_error(message) @@ -185,46 +163,34 @@ def write_to_output(string) end end - def download_xml(result) - return unless options[:xml] - File.open(options[:xml] + '.xml', 'wb') do |f| - f.write Typhoeus.get("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=taxonomy&id=#{result.first.map { |h| h['taxon_id'] }.join(',')}&retmode=xml").response_body - end - end - - def peptide_iterator(peptides, &block) - first = peptides.next rescue return - if first.start_with? '>' - # FASTA MODE ENGAGED - fasta_header = first.chomp - peptides.each_slice(batch_size).with_index do |sub, i| - fasta_input = [] - # Use a set so we don't ask data twice - newsub = Set.new - - # Iterate to find fasta headers - sub.each do |s| - s.chomp! - if s.start_with? '>' - # Save the FASTA header when found - fasta_header = s + def line_iterator(lines, &block) + first_line = lines.next rescue return + if first_line.start_with? '>' + current_fasta_header = first_line.chomp + lines.each_slice(batch_size).with_index do |slice, i| + fasta_mapper = [] + input_set = Set.new + + slice.each do |line| + line.chomp! + if line.start_with? '>' + current_fasta_header = line else - # Add the input pair to our input list - fasta_input << [fasta_header, s] - newsub << s + fasta_mapper << [current_fasta_header, line] + input_set << line end end - block.call(newsub.to_a, i, fasta_input) + block.call(input_set.to_a, i, fasta_mapper) end else - # shame we have to be this explicit, but it appears to be the only way Enumerator.new do |y| y << first loop do - y << peptides.next + y << lines.next end end.each_slice(batch_size).with_index(&block) + end end From 46bf7eb4e4b75bf1bf6cc2d4fdd3036c636eccd8 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 9 Jun 2015 18:52:28 +0200 Subject: [PATCH 45/64] remove xml download option --- lib/commands/unipept.rb | 2 -- lib/commands/unipept/api_runner.rb | 7 +++---- lib/commands/unipept/pept2prot.rb | 10 ---------- 3 files changed, 3 insertions(+), 16 deletions(-) diff --git a/lib/commands/unipept.rb b/lib/commands/unipept.rb index 1876a099..a524ae57 100644 --- a/lib/commands/unipept.rb +++ b/lib/commands/unipept.rb @@ -106,7 +106,6 @@ def add_pept2taxa_command flag :e, :equate, 'equate isoleucine (I) and leucine (L) when matching peptides' flag :a, :all, 'report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.' option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true - option :x, :xml, 'Download the matched records from the NCBI web service as an xml-formatted file (specify output filename)', argument: :required runner Commands::Pept2taxa end @@ -184,7 +183,6 @@ def add_pept2prot_command flag :e, :equate, 'equate isoleucine (I) and leucine (L) when matching peptides' flag :a, :all, 'report all information fields of Uniprot records available in Unipept. Note that this may have a performance penalty.' option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true - option :x, :xml, 'download XML-formatted Uniprot records into the specified download-directory. ', argument: :required runner Commands::Pept2prot end diff --git a/lib/commands/unipept/api_runner.rb b/lib/commands/unipept/api_runner.rb index adc493b9..7426c548 100644 --- a/lib/commands/unipept/api_runner.rb +++ b/lib/commands/unipept/api_runner.rb @@ -94,7 +94,7 @@ def run selected_fields = options[:select] ? options[:select] : [] selected_fields = selected_fields.map { |f| f.include?(',') ? f.split(',') : f }.flatten.map { |f| glob_to_regex(f) } - line_iterator(input) do |input_slice, batch_id, fasta_input| + line_iterator(input) do |input_slice, batch_id, fasta_mapper| request = Typhoeus::Request.new( @url, method: :post, @@ -111,8 +111,8 @@ def run # wait till it's our turn to write batch_order.wait(batch_id) do unless result.empty? - write_to_output formatter.header(result, fasta_input) if batch_id == 0 - write_to_output formatter.format(result, fasta_input) + write_to_output formatter.header(result, fasta_mapper) if batch_id == 0 + write_to_output formatter.format(result, fasta_mapper) end end @@ -190,7 +190,6 @@ def line_iterator(lines, &block) y << lines.next end end.each_slice(batch_size).with_index(&block) - end end diff --git a/lib/commands/unipept/pept2prot.rb b/lib/commands/unipept/pept2prot.rb index 9d5b62a2..4e73c9de 100644 --- a/lib/commands/unipept/pept2prot.rb +++ b/lib/commands/unipept/pept2prot.rb @@ -2,16 +2,6 @@ module Unipept::Commands class Pept2prot < ApiRunner - def download_xml(result) - return unless options[:xml] - FileUtils.mkdir_p(options[:xml]) - result.first.each do |prot| - File.open(options[:xml] + "/#{prot['uniprot_id']}.xml", 'wb') do |f| - f.write Typhoeus.get("http://www.uniprot.org/uniprot/#{prot['uniprot_id']}.xml").response_body - end - end - end - def batch_size 10 end From 80f687c8dc2a521b2702e56aba7f61cc17b47690 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 9 Jun 2015 19:23:50 +0200 Subject: [PATCH 46/64] add bach sizes for all commands --- lib/commands/unipept/pept2lca.rb | 7 +++++++ lib/commands/unipept/pept2prot.rb | 6 +++++- lib/commands/unipept/pept2taxa.rb | 7 +++++++ lib/commands/unipept/taxonomy.rb | 3 +++ 4 files changed, 22 insertions(+), 1 deletion(-) diff --git a/lib/commands/unipept/pept2lca.rb b/lib/commands/unipept/pept2lca.rb index 385dcbe3..7fe15f59 100644 --- a/lib/commands/unipept/pept2lca.rb +++ b/lib/commands/unipept/pept2lca.rb @@ -1,5 +1,12 @@ require_relative 'api_runner' module Unipept::Commands class Pept2lca < ApiRunner + def batch_size + if options[:all] + 100 + else + 1000 + end + end end end diff --git a/lib/commands/unipept/pept2prot.rb b/lib/commands/unipept/pept2prot.rb index 4e73c9de..efb5af78 100644 --- a/lib/commands/unipept/pept2prot.rb +++ b/lib/commands/unipept/pept2prot.rb @@ -3,7 +3,11 @@ module Unipept::Commands class Pept2prot < ApiRunner def batch_size - 10 + if options[:all] + 5 + else + 10 + end end end end diff --git a/lib/commands/unipept/pept2taxa.rb b/lib/commands/unipept/pept2taxa.rb index b5863836..93697bfa 100644 --- a/lib/commands/unipept/pept2taxa.rb +++ b/lib/commands/unipept/pept2taxa.rb @@ -1,5 +1,12 @@ require_relative 'api_runner' module Unipept::Commands class Pept2taxa < ApiRunner + def batch_size + if options[:all] + 5 + else + 10 + end + end end end diff --git a/lib/commands/unipept/taxonomy.rb b/lib/commands/unipept/taxonomy.rb index da371a67..98168478 100644 --- a/lib/commands/unipept/taxonomy.rb +++ b/lib/commands/unipept/taxonomy.rb @@ -1,5 +1,8 @@ require_relative 'api_runner' module Unipept::Commands class Taxonomy < ApiRunner + def batch_size + 100 + end end end From c430437e845361ef2a620babb82e4c6781c3d268 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 9 Jun 2015 21:05:12 +0200 Subject: [PATCH 47/64] more comments for api runner --- lib/commands/unipept/api_runner.rb | 83 ++++++++++++++++++------------ 1 file changed, 49 insertions(+), 34 deletions(-) diff --git a/lib/commands/unipept/api_runner.rb b/lib/commands/unipept/api_runner.rb index 7426c548..77bb5c28 100644 --- a/lib/commands/unipept/api_runner.rb +++ b/lib/commands/unipept/api_runner.rb @@ -45,7 +45,7 @@ def get_host def get_input_iterator return arguments.each unless arguments.empty? return IO.foreach(options[:input]) if options[:input] - STDIN.each_line + $stdin.each_line end # Returns the default batch_size of a command. @@ -69,7 +69,7 @@ def get_request_body(input, selected_fields) # option is set or if we output to a file. def print_server_message return if options[:quiet] - return unless STDOUT.tty? + return unless $stdout.tty? return if recently_fetched? @configuration['last_fetch_date'] = Time.now @configuration.save @@ -84,6 +84,7 @@ def recently_fetched? !last_fetched.nil? && (last_fetched + 60 * 60 * 24) > Time.now end + # Runs the command def run print_server_message hydra = Typhoeus::Hydra.new(max_concurrency: 10) @@ -118,17 +119,14 @@ def run elsif resp.timed_out? batch_order.wait(batch_id) do - $stderr.puts 'request timed out, continuing anyway, but results might be incomplete' save_error('request timed out, continuing anyway, but results might be incomplete') end elsif resp.code == 0 batch_order.wait(batch_id) do - $stderr.puts 'could not get an http response, continuing anyway, but results might be incomplete' - save_error(resp.return_message) + save_error('could not get an http response, continuing anyway, but results might be incomplete' + resp.return_message) end else batch_order.wait(batch_id) do - $stderr.puts "received a non-successful http response #{resp.code}, continuing anyway, but results might be incomplete" save_error("Got #{resp.code}: #{resp.response_body}\nRequest headers: #{resp.request.options}\nRequest body:\n#{resp.request.encoded_body}\n\n") end end @@ -144,55 +142,72 @@ def run hydra.run end + # Saves an error to a new file in the .unipept directory in the users home + # directory. def save_error(message) path = File.expand_path(File.join(Dir.home, '.unipept', "unipept-#{Time.now.strftime('%F-%T')}.log")) FileUtils.mkdir_p File.dirname(path) - File.open(path, 'w') do |f| - f.write message - end + File.open(path, 'w') { |f| f.write message } $stderr.puts "API request failed! log can be found in #{path}" end + # Write a string to the output defined by the command. If a file is given, + # write it to the file. If not, write to stdout def write_to_output(string) if options[:output] - File.open(options[:output], 'a') do |f| - f.write string - end + File.open(options[:output], 'a') { |f| f.write string } else puts string end end + # Splits the input lines into slices, based on the batch_size of the current + # command. Executes the given block for each of the batches. + # + # Supports both normal input and input in the fasta format. def line_iterator(lines, &block) first_line = lines.next rescue return if first_line.start_with? '>' - current_fasta_header = first_line.chomp - lines.each_slice(batch_size).with_index do |slice, i| - fasta_mapper = [] - input_set = Set.new - - slice.each do |line| - line.chomp! - if line.start_with? '>' - current_fasta_header = line - else - fasta_mapper << [current_fasta_header, line] - input_set << line - end - end - - block.call(input_set.to_a, i, fasta_mapper) - end + fasta_iterator(first_line, lines, &block) else - Enumerator.new do |y| - y << first - loop do - y << lines.next + normal_iterator(first_line, lines, &block) + end + end + + # Splits the input lines in fasta format into slices, based on the + # batch_size of the current command. Executes the given block for each of + # the batches. + def fasta_iterator(first_line, next_lines, &block) + current_fasta_header = first_line.chomp + next_lines.each_slice(batch_size).with_index do |slice, i| + fasta_mapper = [] + input_set = Set.new + + slice.each do |line| + line.chomp! + if line.start_with? '>' + current_fasta_header = line + else + fasta_mapper << [current_fasta_header, line] + input_set << line end - end.each_slice(batch_size).with_index(&block) + end + + block.call(input_set.to_a, i, fasta_mapper) end end + # Splits the input lines into slices, based on the batch_size of the current + # command. Executes the given block for each of the batches. + def normal_iterator(first_line, next_lines, &block) + Enumerator.new do |y| + y << first_line + loop do + y << next_lines.next + end + end.each_slice(batch_size).with_index(&block) + end + private def glob_to_regex(string) From 2a945c1eb456d3f3a0752eebb1743b40e690af79 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 9 Jun 2015 21:27:57 +0200 Subject: [PATCH 48/64] more api runner refactoring --- lib/commands/unipept/api_runner.rb | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/lib/commands/unipept/api_runner.rb b/lib/commands/unipept/api_runner.rb index 77bb5c28..21705f84 100644 --- a/lib/commands/unipept/api_runner.rb +++ b/lib/commands/unipept/api_runner.rb @@ -42,7 +42,7 @@ def get_host # - if arguments are given, uses arguments # - if the input file option is given, uses file input # - if none of the previous are given, uses stdin - def get_input_iterator + def input_iterator return arguments.each unless arguments.empty? return IO.foreach(options[:input]) if options[:input] $stdin.each_line @@ -56,7 +56,7 @@ def batch_size # Constructs a request body (a Hash) for set of input strings, using the # options supplied by the user. def get_request_body(input, selected_fields) - names = selected_fields.empty? || selected_fields.any? { |f| /.*name.*/.match f } + names = selected_fields.empty? || selected_fields.any? { |f| /name/.match f.to_s } { input: input, equate_il: options[:equate], extra: options[:all], @@ -64,6 +64,11 @@ def get_request_body(input, selected_fields) } end + # Returns an array of regular expressions containing all the selected fields + def selected_fields + @selected_fields ||= [*options[:select]].map { |f| f.include?(',') ? f.split(',') : f }.flatten.map { |f| glob_to_regex(f) } + end + # Checks if the server has a message and prints it if not empty. # We will only check this once a day and won't print anything if the quiet # option is set or if we output to a file. @@ -91,11 +96,7 @@ def run formatter = Unipept::Formatter.new_for_format(options[:format]) batch_order = Unipept::BatchOrder.new - input = get_input_iterator - selected_fields = options[:select] ? options[:select] : [] - selected_fields = selected_fields.map { |f| f.include?(',') ? f.split(',') : f }.flatten.map { |f| glob_to_regex(f) } - - line_iterator(input) do |input_slice, batch_id, fasta_mapper| + line_iterator(input_iterator) do |input_slice, batch_id, fasta_mapper| request = Typhoeus::Request.new( @url, method: :post, @@ -133,10 +134,7 @@ def run end hydra.queue request - - if batch_id % 200 == 0 - hydra.run - end + hydra.run if batch_id % 200 == 0 end hydra.run From cbe23dc80a4fc51510be22eec441c3d5f97e65e3 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Tue, 9 Jun 2015 21:51:43 +0200 Subject: [PATCH 49/64] move batch iterator to separate file --- lib/batch_iterator.rb | 58 ++++++++++++++++++++++++++++++ lib/commands/unipept.rb | 1 + lib/commands/unipept/api_runner.rb | 54 ++-------------------------- 3 files changed, 62 insertions(+), 51 deletions(-) create mode 100644 lib/batch_iterator.rb diff --git a/lib/batch_iterator.rb b/lib/batch_iterator.rb new file mode 100644 index 00000000..f31722c4 --- /dev/null +++ b/lib/batch_iterator.rb @@ -0,0 +1,58 @@ +require 'set' + +module Unipept + class BatchIterator + def initialize(batch_size) + @batch_size = batch_size + end + + # Splits the input lines into slices, based on the batch_size of the current + # command. Executes the given block for each of the batches. + # + # Supports both normal input and input in the fasta format. + def iterate(lines, &block) + first_line = lines.next rescue return + if first_line.start_with? '>' + fasta_iterator(first_line, lines, &block) + else + normal_iterator(first_line, lines, &block) + end + end + + private + + # Splits the input lines in fasta format into slices, based on the + # batch_size of the current command. Executes the given block for each of + # the batches. + def fasta_iterator(first_line, next_lines, &block) + current_fasta_header = first_line.chomp + next_lines.each_slice(@batch_size).with_index do |slice, i| + fasta_mapper = [] + input_set = Set.new + + slice.each do |line| + line.chomp! + if line.start_with? '>' + current_fasta_header = line + else + fasta_mapper << [current_fasta_header, line] + input_set << line + end + end + + block.call(input_set.to_a, i, fasta_mapper) + end + end + + # Splits the input lines into slices, based on the batch_size of the current + # command. Executes the given block for each of the batches. + def normal_iterator(first_line, next_lines, &block) + Enumerator.new do |y| + y << first_line + loop do + y << next_lines.next + end + end.each_slice(@batch_size).with_index(&block) + end + end +end diff --git a/lib/commands/unipept.rb b/lib/commands/unipept.rb index a524ae57..0549f6c2 100644 --- a/lib/commands/unipept.rb +++ b/lib/commands/unipept.rb @@ -3,6 +3,7 @@ require_relative '../formatters' require_relative '../configuration' require_relative '../batch_order' +require_relative '../batch_iterator' require_relative '../version' require_relative 'unipept/config' diff --git a/lib/commands/unipept/api_runner.rb b/lib/commands/unipept/api_runner.rb index 21705f84..2f5b2067 100644 --- a/lib/commands/unipept/api_runner.rb +++ b/lib/commands/unipept/api_runner.rb @@ -1,5 +1,3 @@ -require 'set' - module Unipept class Commands::ApiRunner < Cri::CommandRunner def initialize(args, opts, cmd) @@ -66,7 +64,7 @@ def get_request_body(input, selected_fields) # Returns an array of regular expressions containing all the selected fields def selected_fields - @selected_fields ||= [*options[:select]].map { |f| f.include?(',') ? f.split(',') : f }.flatten.map { |f| glob_to_regex(f) } + @selected_fields ||= [*options[:select]].map { |f| f.split(',') }.flatten.map { |f| glob_to_regex(f) } end # Checks if the server has a message and prints it if not empty. @@ -95,8 +93,9 @@ def run hydra = Typhoeus::Hydra.new(max_concurrency: 10) formatter = Unipept::Formatter.new_for_format(options[:format]) batch_order = Unipept::BatchOrder.new + batch_iterator = Unipept::BatchIterator.new(batch_size) - line_iterator(input_iterator) do |input_slice, batch_id, fasta_mapper| + batch_iterator.iterate(input_iterator) do |input_slice, batch_id, fasta_mapper| request = Typhoeus::Request.new( @url, method: :post, @@ -159,53 +158,6 @@ def write_to_output(string) end end - # Splits the input lines into slices, based on the batch_size of the current - # command. Executes the given block for each of the batches. - # - # Supports both normal input and input in the fasta format. - def line_iterator(lines, &block) - first_line = lines.next rescue return - if first_line.start_with? '>' - fasta_iterator(first_line, lines, &block) - else - normal_iterator(first_line, lines, &block) - end - end - - # Splits the input lines in fasta format into slices, based on the - # batch_size of the current command. Executes the given block for each of - # the batches. - def fasta_iterator(first_line, next_lines, &block) - current_fasta_header = first_line.chomp - next_lines.each_slice(batch_size).with_index do |slice, i| - fasta_mapper = [] - input_set = Set.new - - slice.each do |line| - line.chomp! - if line.start_with? '>' - current_fasta_header = line - else - fasta_mapper << [current_fasta_header, line] - input_set << line - end - end - - block.call(input_set.to_a, i, fasta_mapper) - end - end - - # Splits the input lines into slices, based on the batch_size of the current - # command. Executes the given block for each of the batches. - def normal_iterator(first_line, next_lines, &block) - Enumerator.new do |y| - y << first_line - loop do - y << next_lines.next - end - end.each_slice(batch_size).with_index(&block) - end - private def glob_to_regex(string) From bc295633d7890f72a726904d519797e0fdcc6d45 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Wed, 10 Jun 2015 10:55:48 +0200 Subject: [PATCH 50/64] more api runner refactoring --- lib/commands/unipept/api_runner.rb | 54 +++++++++++++++--------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/lib/commands/unipept/api_runner.rb b/lib/commands/unipept/api_runner.rb index 2f5b2067..b613cfe9 100644 --- a/lib/commands/unipept/api_runner.rb +++ b/lib/commands/unipept/api_runner.rb @@ -91,7 +91,7 @@ def recently_fetched? def run print_server_message hydra = Typhoeus::Hydra.new(max_concurrency: 10) - formatter = Unipept::Formatter.new_for_format(options[:format]) + @formatter = Unipept::Formatter.new_for_format(options[:format]) batch_order = Unipept::BatchOrder.new batch_iterator = Unipept::BatchIterator.new(batch_size) @@ -103,33 +103,10 @@ def run accept_encoding: 'gzip', headers: { 'User-Agent' => @user_agent } ) + request.on_complete do |resp| - if resp.success? - result = JSON[resp.response_body] rescue [] - result = [result] unless result.is_a? Array - result.map! { |r| r.select! { |k, _v| selected_fields.any? { |f| f.match k } } } unless selected_fields.empty? - - # wait till it's our turn to write - batch_order.wait(batch_id) do - unless result.empty? - write_to_output formatter.header(result, fasta_mapper) if batch_id == 0 - write_to_output formatter.format(result, fasta_mapper) - end - end - - elsif resp.timed_out? - batch_order.wait(batch_id) do - save_error('request timed out, continuing anyway, but results might be incomplete') - end - elsif resp.code == 0 - batch_order.wait(batch_id) do - save_error('could not get an http response, continuing anyway, but results might be incomplete' + resp.return_message) - end - else - batch_order.wait(batch_id) do - save_error("Got #{resp.code}: #{resp.response_body}\nRequest headers: #{resp.request.options}\nRequest body:\n#{resp.request.encoded_body}\n\n") - end - end + block = handle_response(resp, batch_id, fasta_mapper) + batch_order.wait(batch_id, &block) end hydra.queue request @@ -160,6 +137,29 @@ def write_to_output(string) private + # Handles the response of an API request. + # Returns a block to execute. + def handle_response(response, batch_id, fasta_mapper) + if response.success? + result = JSON[response.response_body] rescue [] + result = [result] unless result.is_a? Array + result.map! { |r| r.select! { |k, _v| selected_fields.any? { |f| f.match k } } } unless selected_fields.empty? + + lambda do + unless result.empty? + write_to_output @formatter.header(result, fasta_mapper) if batch_id == 0 + write_to_output @formatter.format(result, fasta_mapper) + end + end + elsif response.timed_out? + -> { save_error('request timed out, continuing anyway, but results might be incomplete') } + elsif response.code == 0 + -> { save_error('could not get an http response, continuing anyway, but results might be incomplete' + response.return_message) } + else + -> { save_error("Got #{response.code}: #{response.response_body}\nRequest headers: #{response.request.options}\nRequest body:\n#{response.request.encoded_body}\n\n") } + end + end + def glob_to_regex(string) /^#{string.gsub('*', '.*')}$/ end From c93f8f910f880cebd4175b378e49a1692adf33ef Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Wed, 10 Jun 2015 15:15:20 +0200 Subject: [PATCH 51/64] test batch iterator --- lib/batch_iterator.rb | 23 ++++++-- lib/commands/unipept/api_runner.rb | 14 +++-- test/test_batch_iterator.rb | 87 ++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+), 9 deletions(-) create mode 100644 test/test_batch_iterator.rb diff --git a/lib/batch_iterator.rb b/lib/batch_iterator.rb index f31722c4..1d1f668f 100644 --- a/lib/batch_iterator.rb +++ b/lib/batch_iterator.rb @@ -2,6 +2,8 @@ module Unipept class BatchIterator + attr_reader :batch_size + def initialize(batch_size) @batch_size = batch_size end @@ -10,15 +12,28 @@ def initialize(batch_size) # command. Executes the given block for each of the batches. # # Supports both normal input and input in the fasta format. + # + # @input [Iterator] lines An iterator containing the input lines + # + # @input [lambda] block The code to execute on the slices def iterate(lines, &block) first_line = lines.next rescue return - if first_line.start_with? '>' + if fasta? first_line fasta_iterator(first_line, lines, &block) else normal_iterator(first_line, lines, &block) end end + # Checks if the geven line is a fasta header. + # + # @param [String] line The input line + # + # @return [Boolean] Whether te input is a fasta header + def fasta?(line) + line.start_with? '>' + end + private # Splits the input lines in fasta format into slices, based on the @@ -26,13 +41,13 @@ def iterate(lines, &block) # the batches. def fasta_iterator(first_line, next_lines, &block) current_fasta_header = first_line.chomp - next_lines.each_slice(@batch_size).with_index do |slice, i| + next_lines.each_slice(batch_size).with_index do |slice, i| fasta_mapper = [] input_set = Set.new slice.each do |line| line.chomp! - if line.start_with? '>' + if fasta? line current_fasta_header = line else fasta_mapper << [current_fasta_header, line] @@ -52,7 +67,7 @@ def normal_iterator(first_line, next_lines, &block) loop do y << next_lines.next end - end.each_slice(@batch_size).with_index(&block) + end.each_slice(batch_size).with_index(&block) end end end diff --git a/lib/commands/unipept/api_runner.rb b/lib/commands/unipept/api_runner.rb index b613cfe9..e1f5c28e 100644 --- a/lib/commands/unipept/api_runner.rb +++ b/lib/commands/unipept/api_runner.rb @@ -53,7 +53,7 @@ def batch_size # Constructs a request body (a Hash) for set of input strings, using the # options supplied by the user. - def get_request_body(input, selected_fields) + def construct_request_body(input, selected_fields) names = selected_fields.empty? || selected_fields.any? { |f| /name/.match f.to_s } { input: input, equate_il: options[:equate], @@ -67,6 +67,11 @@ def selected_fields @selected_fields ||= [*options[:select]].map { |f| f.split(',') }.flatten.map { |f| glob_to_regex(f) } end + # Returns a formatter, based on the format specified in the options + def formatter + @formatter ||= Unipept::Formatter.new_for_format(options[:format]) + end + # Checks if the server has a message and prints it if not empty. # We will only check this once a day and won't print anything if the quiet # option is set or if we output to a file. @@ -91,7 +96,6 @@ def recently_fetched? def run print_server_message hydra = Typhoeus::Hydra.new(max_concurrency: 10) - @formatter = Unipept::Formatter.new_for_format(options[:format]) batch_order = Unipept::BatchOrder.new batch_iterator = Unipept::BatchIterator.new(batch_size) @@ -99,7 +103,7 @@ def run request = Typhoeus::Request.new( @url, method: :post, - body: get_request_body(input_slice, selected_fields), + body: construct_request_body(input_slice, selected_fields), accept_encoding: 'gzip', headers: { 'User-Agent' => @user_agent } ) @@ -147,8 +151,8 @@ def handle_response(response, batch_id, fasta_mapper) lambda do unless result.empty? - write_to_output @formatter.header(result, fasta_mapper) if batch_id == 0 - write_to_output @formatter.format(result, fasta_mapper) + write_to_output formatter.header(result, fasta_mapper) if batch_id == 0 + write_to_output formatter.format(result, fasta_mapper) end end elsif response.timed_out? diff --git a/test/test_batch_iterator.rb b/test/test_batch_iterator.rb new file mode 100644 index 00000000..66b44eac --- /dev/null +++ b/test/test_batch_iterator.rb @@ -0,0 +1,87 @@ +require_relative '../lib/batch_iterator' + +module Unipept + class BatchIteratorTestCase < Unipept::TestCase + def test_batch_size + iterator = BatchIterator.new(50) + assert_equal(50, iterator.batch_size) + end + + def test_fasta + iterator = BatchIterator.new(50) + assert(iterator.fasta? '> test') + assert(!(iterator.fasta? '< test')) + assert(!(iterator.fasta? 'test')) + end + + def test_normal_iterator + iterator = BatchIterator.new(2) + data = %w(a b c d e) + out, _err = capture_io_while do + iterator.iterate(data.each) do |batch, batch_id, fasta_mapper| + assert_nil(fasta_mapper) + puts batch_id + puts batch.to_s + end + end + assert_equal(['0', '["a", "b"]', '1', '["c", "d"]', '2', '["e"]', ''].join("\n"), out) + end + + def test_fasta_iterator_single_header + iterator = BatchIterator.new(2) + data = %w(>h1 a b c d e) + mappings = [] + out, _err = capture_io_while do + iterator.iterate(data.each) do |batch, batch_id, fasta_mapper| + assert(!fasta_mapper.nil?) + mappings << fasta_mapper + puts batch_id + puts batch.to_s + end + end + assert_equal(['0', '["a"]', '1', '["b", "c"]', '2', '["d", "e"]', ''].join("\n"), out) + mappings.flatten!(1) + data.shift + data.each { |element| assert(mappings.include?(['>h1', element])) } + end + + def test_fasta_iterator_double_header_single_batch + iterator = BatchIterator.new(3) + data = %w(>h1 a >h2 b c d e) + mappings = [] + out, _err = capture_io_while do + iterator.iterate(data.each) do |batch, batch_id, fasta_mapper| + assert(!fasta_mapper.nil?) + mappings << fasta_mapper + puts batch_id + puts batch.to_s + end + end + assert_equal(['0', '["a"]', '1', '["b", "c", "d"]', '2', '["e"]', ''].join("\n"), out) + mappings.flatten!(1) + assert(mappings.include?(['>h1', 'a'])) + assert(mappings.include?(['>h2', 'b'])) + assert(mappings.include?(['>h2', 'c'])) + assert(mappings.include?(['>h2', 'd'])) + assert(mappings.include?(['>h2', 'e'])) + end + + def test_fasta_iterator_multiple_values + iterator = BatchIterator.new(4) + data = %w(>h1 a >h2 a a) + mappings = [] + out, _err = capture_io_while do + iterator.iterate(data.each) do |batch, batch_id, fasta_mapper| + assert(!fasta_mapper.nil?) + mappings << fasta_mapper + puts batch_id + puts batch.to_s + end + end + assert_equal(['0', '["a"]', '1', '["a"]', ''].join("\n"), out) + assert(mappings[0].include?(['>h1', 'a'])) + assert(mappings[0].include?(['>h2', 'a'])) + assert(mappings[1].include?(['>h2', 'a'])) + end + end +end From 626a22baed40f9825faf5e47d32e4f9ec4e245b7 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Wed, 10 Jun 2015 17:41:14 +0200 Subject: [PATCH 52/64] first part of the api runner tests --- lib/commands/unipept.rb | 12 +-- lib/commands/unipept/api_runner.rb | 11 ++- test/commands/unipept/test_api_runner.rb | 111 +++++++++++++++++++++++ 3 files changed, 127 insertions(+), 7 deletions(-) create mode 100644 test/commands/unipept/test_api_runner.rb diff --git a/lib/commands/unipept.rb b/lib/commands/unipept.rb index 0549f6c2..e0879b80 100644 --- a/lib/commands/unipept.rb +++ b/lib/commands/unipept.rb @@ -41,7 +41,7 @@ def create_root_command - as separate command line arguments - - in one or more text files that are passed as an argument to the -i option + - in a text file that is passed as an argument to the -i option - to standard input @@ -95,7 +95,7 @@ def add_pept2taxa_command - as separate command line arguments - - in one or more text files that are passed as an argument to the -i option + - in a text file that is passed as an argument to the -i option - to standard input @@ -121,7 +121,7 @@ def add_pept2lca_command - as separate command line arguments - - in one or more text files that are passed as an argument to the -i option + - in a text file that is passed as an argument to the -i option - to standard input @@ -147,7 +147,7 @@ def add_taxa2lca_command - as separate command line arguments - - in one or more text files that are passed as an argument to the -i option + - in a text file that is passed as an argument to the -i option - to standard input @@ -172,7 +172,7 @@ def add_pept2prot_command - as separate command line arguments - - in one or more text files that are passed as an argument to the -i option + - in a text file that is passed as an argument to the -i option - to standard input @@ -198,7 +198,7 @@ def add_taxonomy_command - as separate command line arguments - - in one or more text files that are passed as an argument to the -i option + - in a text file that is passed as an argument to the -i option - to standard input diff --git a/lib/commands/unipept/api_runner.rb b/lib/commands/unipept/api_runner.rb index e1f5c28e..3931ae94 100644 --- a/lib/commands/unipept/api_runner.rb +++ b/lib/commands/unipept/api_runner.rb @@ -1,5 +1,13 @@ module Unipept class Commands::ApiRunner < Cri::CommandRunner + attr_reader :configuration + + attr_reader :url + + attr_reader :message_url + + attr_reader :user_agent + def initialize(args, opts, cmd) super @configuration = Unipept::Configuration.new @@ -20,12 +28,13 @@ def set_configuration # Returns the host. If a value is defined by both an option and the config # file, the value of the option is used. def get_host + puts options # find host in opts first host = options[:host] ? options[:host] : @configuration['host'] # No host has been set? if host.nil? || host.empty? - abort 'WARNING: no host has been set, you can set the host with `unipept config host http://api.unipept.ugent.be:3000/`' + abort 'WARNING: no host has been set, you can set the host with `unipept config host http://api.unipept.ugent.be/`' end # add http:// if needed diff --git a/test/commands/unipept/test_api_runner.rb b/test/commands/unipept/test_api_runner.rb new file mode 100644 index 00000000..5b88a57a --- /dev/null +++ b/test/commands/unipept/test_api_runner.rb @@ -0,0 +1,111 @@ +require_relative '../../../lib/commands/unipept/api_runner' + +module Unipept + class UnipeptAPIRunnerTestCase < Unipept::TestCase + def test_init + runner = new_runner('test', { host: 'test_host' }, %w(a b c)) + assert_equal('test', runner.command.name) + assert_equal('test_host', runner.options[:host]) + assert_equal(%w(a b c), runner.arguments) + assert(!runner.configuration.nil?) + assert_equal('http://test_host/api/v1/test.json', runner.url) + assert_equal('http://test_host/api/v1/messages.json', runner.message_url) + assert(/Unipept CLI - unipept [0-9]\.[0-9]\.[0-9]/.match runner.user_agent) + end + + def test_config_host + runner = new_runner('test', { host: 'http://param_host' }, %w(a b c)) + runner.options.delete(:host) + runner.configuration['host'] = 'http://config_host' + host = runner.get_host + assert_equal('http://config_host', host) + end + + def test_param_host + runner = new_runner('test', { host: 'http://param_host' }, %w(a b c)) + runner.configuration.delete('host') + host = runner.get_host + assert_equal('http://param_host', host) + end + + def test_no_host + runner = new_runner('test', { host: 'param_host' }, %w(a b c)) + runner.configuration.delete('host') + runner.options.delete(:host) + _out, err = capture_io_while do + assert_raises SystemExit do + runner.get_host + end + end + assert(err.start_with? 'WARNING: no host has been set') + end + + def test_host_priority + runner = new_runner('test', { host: 'http://param_host' }, %w(a b c)) + runner.configuration['host'] = 'http://config_host' + host = runner.get_host + assert_equal('http://param_host', host) + end + + def test_http_host + runner = new_runner('test', { host: 'param_host' }, %w(a b c)) + host = runner.get_host + assert_equal('http://param_host', host) + end + + def test_https_host + runner = new_runner('test', { host: 'https://param_host' }, %w(a b c)) + host = runner.get_host + assert_equal('https://param_host', host) + end + + def test_input_iterator_args + runner = new_runner('test', { host: 'https://param_host' }, %w(a b c)) + output = [] + runner.input_iterator.each { |el| output << el.chomp } + assert_equal(%w(a b c), output) + end + + def test_input_iterator_file + File.open('input_file', 'w') { |file| file.write(%w(a b c).join("\n")) } + runner = new_runner('test', host: 'https://param_host', input: 'input_file') + output = [] + runner.input_iterator.each { |el| output << el.chomp } + assert_equal(%w(a b c), output) + end + + def test_input_iterator_stdin + runner = new_runner('test', host: 'https://param_host') + output = [] + _out, _err = capture_io_with_input(%w(a b c)) do + runner.input_iterator.each { |el| output << el.chomp } + end + assert_equal(%w(a b c), output) + end + + def test_input_iterator_arguments_priority + File.open('input_file', 'w') { |file| file.write(%w(1 2 3).join("\n")) } + runner = new_runner('test', { host: 'https://param_host', input: 'input_file' }, %w(a b c)) + output = [] + _out, _err = capture_io_with_input(%w(1 2 3)) do + runner.input_iterator.each { |el| output << el.chomp } + end + assert_equal(%w(a b c), output) + end + + def test_input_iterator_file_priority + File.open('input_file', 'w') { |file| file.write(%w(a b c).join("\n")) } + runner = new_runner('test', host: 'https://param_host', input: 'input_file') + output = [] + _out, _err = capture_io_with_input(%w(1 2 3)) do + runner.input_iterator.each { |el| output << el.chomp } + end + assert_equal(%w(a b c), output) + end + + def new_runner(command_name = 'test', options = { host: 'http://param_host' }, arguments = []) + command = Cri::Command.define { name command_name } + Commands::ApiRunner.new(options, arguments, command) + end + end +end From 39fb966f4bac1ee12a4c1e3e78ca8994b1729fa8 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Thu, 11 Jun 2015 13:44:23 +0200 Subject: [PATCH 53/64] api runner tests part 2 --- lib/commands/unipept/api_runner.rb | 17 ++- test/commands/unipept/test_api_runner.rb | 187 +++++++++++++++++++++++ 2 files changed, 198 insertions(+), 6 deletions(-) diff --git a/lib/commands/unipept/api_runner.rb b/lib/commands/unipept/api_runner.rb index 3931ae94..d08228b5 100644 --- a/lib/commands/unipept/api_runner.rb +++ b/lib/commands/unipept/api_runner.rb @@ -62,12 +62,12 @@ def batch_size # Constructs a request body (a Hash) for set of input strings, using the # options supplied by the user. - def construct_request_body(input, selected_fields) + def construct_request_body(input) names = selected_fields.empty? || selected_fields.any? { |f| /name/.match f.to_s } { input: input, - equate_il: options[:equate], - extra: options[:all], - names: options[:all] && names + equate_il: options[:equate] == true, + extra: options[:all] == true, + names: options[:all] == true && names } end @@ -90,10 +90,15 @@ def print_server_message return if recently_fetched? @configuration['last_fetch_date'] = Time.now @configuration.save - resp = Typhoeus.get(@message_url, params: { version: Unipept::VERSION }).body.chomp + resp = fetch_server_message puts resp unless resp.empty? end + # Fetches a message from the server and returns it + def fetch_server_message + Typhoeus.get(@message_url, params: { version: Unipept::VERSION }).body.chomp + end + # Returns true if the last check for a server message was less than a day # ago. def recently_fetched? @@ -112,7 +117,7 @@ def run request = Typhoeus::Request.new( @url, method: :post, - body: construct_request_body(input_slice, selected_fields), + body: construct_request_body(input_slice), accept_encoding: 'gzip', headers: { 'User-Agent' => @user_agent } ) diff --git a/test/commands/unipept/test_api_runner.rb b/test/commands/unipept/test_api_runner.rb index 5b88a57a..b57ab0fa 100644 --- a/test/commands/unipept/test_api_runner.rb +++ b/test/commands/unipept/test_api_runner.rb @@ -103,6 +103,193 @@ def test_input_iterator_file_priority assert_equal(%w(a b c), output) end + def test_batch_size + assert_equal(100, new_runner.batch_size) + end + + def test_default_formatter + runner = new_runner + assert_equal('csv', runner.formatter.type) + end + + def test_param_formatter + runner = new_runner('test', host: 'http://param_host', format: 'json') + assert_equal('json', runner.formatter.type) + end + + def test_no_selected_fields + runner = new_runner + assert_equal([], runner.selected_fields) + end + + def test_single_selected_fields + runner = new_runner('test', host: 'http://param_host', select: 'field') + assert_equal([/^field$/], runner.selected_fields) + end + + def test_comma_selected_fields + runner = new_runner('test', host: 'http://param_host', select: 'field1,field2') + assert_equal([/^field1$/, /^field2$/], runner.selected_fields) + end + + def test_multiple_selected_fields + runner = new_runner('test', host: 'http://param_host', select: %w(field1 field2)) + assert_equal([/^field1$/, /^field2$/], runner.selected_fields) + end + + def test_combined_selected_fields + runner = new_runner('test', host: 'http://param_host', select: ['field1', 'field2,field3']) + assert_equal([/^field1$/, /^field2$/, /^field3$/], runner.selected_fields) + end + + def test_wildcard_selected_fields + runner = new_runner('test', host: 'http://param_host', select: 'field*') + assert_equal([/^field.*$/], runner.selected_fields) + end + + def test_never_recently_fetched + runner = new_runner + runner.configuration.delete('last_fetch_date') + assert(!runner.recently_fetched?) + end + + def test_old_recently_fetched + runner = new_runner + runner.configuration['last_fetch_date'] = Time.now - 60 * 60 * 25 + assert(!runner.recently_fetched?) + end + + def test_recently_recently_fetched + runner = new_runner + runner.configuration['last_fetch_date'] = Time.now - 60 * 60 * 1 + assert(runner.recently_fetched?) + end + + def test_basic_construct_request_body + runner = new_runner('test', host: 'http://param_host') + body = runner.construct_request_body('test') + assert_equal('test', body[:input]) + assert_equal(false, body[:equate_il]) + assert_equal(false, body[:extra]) + assert_equal(false, body[:names]) + end + + def test_equate_construct_request_body + runner = new_runner('test', host: 'http://param_host', equate: true) + body = runner.construct_request_body('test') + assert_equal('test', body[:input]) + assert_equal(true, body[:equate_il]) + assert_equal(false, body[:extra]) + assert_equal(false, body[:names]) + end + + def test_all_no_select_construct_request_body + runner = new_runner('test', host: 'http://param_host', all: true) + body = runner.construct_request_body('test') + assert_equal('test', body[:input]) + assert_equal(false, body[:equate_il]) + assert_equal(true, body[:extra]) + assert_equal(true, body[:names]) + end + + def test_all_names_select_construct_request_body + runner = new_runner('test', host: 'http://param_host', all: true, select: 'test,names') + body = runner.construct_request_body('test') + assert_equal('test', body[:input]) + assert_equal(false, body[:equate_il]) + assert_equal(true, body[:extra]) + assert_equal(true, body[:names]) + end + + def test_all_no_names_select_construct_request_body + runner = new_runner('test', host: 'http://param_host', all: true, select: 'test') + body = runner.construct_request_body('test') + assert_equal('test', body[:input]) + assert_equal(false, body[:equate_il]) + assert_equal(true, body[:extra]) + assert_equal(false, body[:names]) + end + + def test_print_server_message + runner = new_runner + runner.stub(:recently_fetched?, false) do + runner.stub(:fetch_server_message, 'message') do + out, _err = capture_io_while do + def $stdout.tty? + true + end + runner.print_server_message + end + assert_equal('message', out.chomp) + end + end + end + + def test_quiet_print_server_message + runner = new_runner('test', host: 'bla', quiet: true) + runner.stub(:recently_fetched?, false) do + runner.stub(:fetch_server_message, 'message') do + out, _err = capture_io_while do + def $stdout.tty? + true + end + runner.print_server_message + end + assert_equal('', out) + end + end + end + + def test_no_tty_print_server_message + runner = new_runner + runner.stub(:recently_fetched?, false) do + runner.stub(:fetch_server_message, 'message') do + out, _err = capture_io_while do + def $stdout.tty? + false + end + runner.print_server_message + end + assert_equal('', out) + end + end + end + + def test_recent_print_server_message + runner = new_runner + runner.stub(:recently_fetched?, true) do + runner.stub(:fetch_server_message, 'message') do + out, _err = capture_io_while do + def $stdout.tty? + true + end + runner.print_server_message + end + assert_equal('', out) + end + end + end + + def test_empty_print_server_message + runner = new_runner + runner.stub(:recently_fetched?, false) do + runner.stub(:fetch_server_message, '') do + out, _err = capture_io_while do + def $stdout.tty? + true + end + runner.print_server_message + end + assert_equal('', out) + end + end + end + + def test_fetch_server_message + runner = new_runner('test', host: 'http://api.unipept.ugent.be') + assert(!runner.fetch_server_message.nil?) + end + def new_runner(command_name = 'test', options = { host: 'http://param_host' }, arguments = []) command = Cri::Command.define { name command_name } Commands::ApiRunner.new(options, arguments, command) From 71d373ff9428a6397e1e6319c0278aeca953cb4b Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Thu, 11 Jun 2015 15:09:48 +0200 Subject: [PATCH 54/64] api runner tests part 3 --- lib/commands/unipept/api_runner.rb | 21 ++- test/commands/unipept/test_api_runner.rb | 165 +++++++++++++++++++++++ 2 files changed, 181 insertions(+), 5 deletions(-) diff --git a/lib/commands/unipept/api_runner.rb b/lib/commands/unipept/api_runner.rb index d08228b5..e9f41797 100644 --- a/lib/commands/unipept/api_runner.rb +++ b/lib/commands/unipept/api_runner.rb @@ -63,7 +63,7 @@ def batch_size # Constructs a request body (a Hash) for set of input strings, using the # options supplied by the user. def construct_request_body(input) - names = selected_fields.empty? || selected_fields.any? { |f| /name/.match f.to_s } + names = selected_fields.empty? || selected_fields.any? { |f| f.to_s.include? 'name' } { input: input, equate_il: options[:equate] == true, extra: options[:all] == true, @@ -137,7 +137,7 @@ def run # Saves an error to a new file in the .unipept directory in the users home # directory. def save_error(message) - path = File.expand_path(File.join(Dir.home, '.unipept', "unipept-#{Time.now.strftime('%F-%T')}.log")) + path = error_file_path FileUtils.mkdir_p File.dirname(path) File.open(path, 'w') { |f| f.write message } $stderr.puts "API request failed! log can be found in #{path}" @@ -155,13 +155,15 @@ def write_to_output(string) private + def error_file_path + File.expand_path(File.join(Dir.home, '.unipept', "unipept-#{Time.now.strftime('%F-%T')}.log")) + end + # Handles the response of an API request. # Returns a block to execute. def handle_response(response, batch_id, fasta_mapper) if response.success? - result = JSON[response.response_body] rescue [] - result = [result] unless result.is_a? Array - result.map! { |r| r.select! { |k, _v| selected_fields.any? { |f| f.match k } } } unless selected_fields.empty? + result = filter_result(response.response_body) lambda do unless result.empty? @@ -178,6 +180,15 @@ def handle_response(response, batch_id, fasta_mapper) end end + # Parses the json_response, wraps it in an array if needed and filters the + # fields based on the selected_fields + def filter_result(json_response) + result = JSON[json_response] rescue [] + result = [result] unless result.is_a? Array + result.map! { |r| r.select! { |k, _v| selected_fields.any? { |f| f.match k } } } unless selected_fields.empty? + result + end + def glob_to_regex(string) /^#{string.gsub('*', '.*')}$/ end diff --git a/test/commands/unipept/test_api_runner.rb b/test/commands/unipept/test_api_runner.rb index b57ab0fa..598c39b1 100644 --- a/test/commands/unipept/test_api_runner.rb +++ b/test/commands/unipept/test_api_runner.rb @@ -1,6 +1,11 @@ require_relative '../../../lib/commands/unipept/api_runner' module Unipept + # make methods public to test them + class Commands::ApiRunner + public :glob_to_regex, :handle_response, :error_file_path, :filter_result + end + class UnipeptAPIRunnerTestCase < Unipept::TestCase def test_init runner = new_runner('test', { host: 'test_host' }, %w(a b c)) @@ -233,6 +238,7 @@ def test_quiet_print_server_message def $stdout.tty? true end + $stdout.tty? runner.print_server_message end assert_equal('', out) @@ -290,9 +296,168 @@ def test_fetch_server_message assert(!runner.fetch_server_message.nil?) end + def test_stdout_write_to_output + runner = new_runner + out, _err = capture_io_while do + runner.write_to_output('hello world') + end + assert_equal('hello world', out.chomp) + end + + def test_file_write_to_output + runner = new_runner('test', host: 'test', output: 'output_file') + out, _err = capture_io_while do + runner.write_to_output('hello world') + end + assert_equal('', out) + assert_equal('hello world', IO.foreach('output_file').next.chomp) + end + + def test_glob_to_regex + runner = new_runner + assert(/^simple$/, runner.glob_to_regex('simple')) + assert(/^.*simple.*$/, runner.glob_to_regex('*simple*')) + end + + def test_save_error + runner = new_runner + runner.stub(:error_file_path, 'errordir/error.log') do + _out, err = capture_io_while do + runner.save_error('error message') + end + assert(err.start_with? 'API request failed! log can be found in') + assert_equal('error message', IO.foreach('errordir/error.log').next.chomp) + end + end + + def test_error_file_path + runner = new_runner + assert(runner.error_file_path.include? '/.unipept/') + end + + def test_invalid_filter_result + runner = new_runner + assert_equal([], runner.filter_result('{"key":"value')) + end + + def test_array_wrap_filter_result + runner = new_runner + assert_equal([{ 'key' => 'value' }], runner.filter_result('{"key":"value"}')) + end + + def test_filter_filter_result + runner = new_runner('test', host: 'test', select: 'key1') + result = runner.filter_result('[{"key1":"value1","key2":"value1"},{"key1":"value2","key2":"value2"}]') + assert_equal([{ 'key1' => 'value1' }, { 'key1' => 'value2' }], result) + end + + def test_success_header_handle_response + runner = new_runner + response = new_response(success: true, response_body: '[{"key1":"value1","key2":"value1"},{"key1":"value2","key2":"value2"}]') + lambda = runner.handle_response(response, 0, nil) + assert(lambda.lambda?) + out, err = capture_io_while(&lambda) + lines = out.each_line + assert_equal('', err) + assert_equal('key1,key2', lines.next.chomp) + assert_equal('value1,value1', lines.next.chomp) + assert_equal('value2,value2', lines.next.chomp) + end + + def test_success_no_header_handle_response + runner = new_runner + response = new_response(success: true, response_body: '[{"key1":"value1","key2":"value1"},{"key1":"value2","key2":"value2"}]') + lambda = runner.handle_response(response, 1, nil) + assert(lambda.lambda?) + out, err = capture_io_while(&lambda) + lines = out.each_line + assert_equal('', err) + assert_equal('value1,value1', lines.next.chomp) + assert_equal('value2,value2', lines.next.chomp) + end + + def test_time_out_handle_response + runner = new_runner + response = new_response(success: false, timed_out: true) + lambda = runner.handle_response(response, 0, nil) + assert(lambda.lambda?) + def runner.save_error(input) + $stderr.puts(input) + end + out, err = capture_io_while(&lambda) + assert_equal('', out) + assert(err.chomp.start_with? 'request timed out') + end + + def test_code_0_handle_response + runner = new_runner + response = new_response(success: false, timed_out: false, code: 0) + lambda = runner.handle_response(response, 0, nil) + assert(lambda.lambda?) + def runner.save_error(input) + $stderr.puts(input) + end + out, err = capture_io_while(&lambda) + assert_equal('', out) + assert(err.chomp.start_with? 'could not get an http') + end + + def test_failed_handle_response + runner = new_runner + response = new_response(success: false, timed_out: false, code: 10) + lambda = runner.handle_response(response, 0, nil) + assert(lambda.lambda?) + def runner.save_error(input) + $stderr.puts(input) + end + out, err = capture_io_while(&lambda) + assert_equal('', out) + assert(err.chomp.start_with? 'Got 10') + end + def new_runner(command_name = 'test', options = { host: 'http://param_host' }, arguments = []) command = Cri::Command.define { name command_name } Commands::ApiRunner.new(options, arguments, command) end + + def new_response(values) + response = Class.new do + def initialize(values) + @values = values + end + + def success? + @values[:success] + end + + def timed_out? + @values[:timed_out] + end + + def code + @values[:code] + end + + def response_body + @values[:response_body] + end + + def return_message + '' + end + + def request + o = Object.new + def o.options + '' + end + def o.encoded_body + '' + end + o + end + end + response.new(values) + end end end From 431d201043c5319fb13c6305b3b11d47014f2058 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Thu, 11 Jun 2015 15:33:31 +0200 Subject: [PATCH 55/64] final api runner tests --- test/commands/unipept/test_api_runner.rb | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/test/commands/unipept/test_api_runner.rb b/test/commands/unipept/test_api_runner.rb index 598c39b1..071485a9 100644 --- a/test/commands/unipept/test_api_runner.rb +++ b/test/commands/unipept/test_api_runner.rb @@ -415,6 +415,29 @@ def runner.save_error(input) assert(err.chomp.start_with? 'Got 10') end + def test_run + runner = new_runner('taxonomy', host: 'http://api.unipept.ugent.be') + out, err = capture_io_while do + def runner.print_server_message + puts 'server message' + end + def runner.input_iterator + %w(0 1 2).each + end + def runner.batch_size + 2 + end + runner.run + end + lines = out.each_line + assert_equal('', err) + assert_equal('server message', lines.next.chomp) + assert(lines.next.start_with? 'taxon_id') + assert(lines.next.start_with? '1,root') + assert(lines.next.start_with? '2,Bacteria') + assert_raises(StopIteration) { lines.next } + end + def new_runner(command_name = 'test', options = { host: 'http://param_host' }, arguments = []) command = Cri::Command.define { name command_name } Commands::ApiRunner.new(options, arguments, command) From 796bcc0b012186982c79de6b4060018063e9a145 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Thu, 11 Jun 2015 16:47:15 +0200 Subject: [PATCH 56/64] add pept2lca tests --- lib/commands/unipept/api_runner.rb | 1 - test/commands/unipept/test_pept2lca.rb | 40 ++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 test/commands/unipept/test_pept2lca.rb diff --git a/lib/commands/unipept/api_runner.rb b/lib/commands/unipept/api_runner.rb index e9f41797..13f86af7 100644 --- a/lib/commands/unipept/api_runner.rb +++ b/lib/commands/unipept/api_runner.rb @@ -28,7 +28,6 @@ def set_configuration # Returns the host. If a value is defined by both an option and the config # file, the value of the option is used. def get_host - puts options # find host in opts first host = options[:host] ? options[:host] : @configuration['host'] diff --git a/test/commands/unipept/test_pept2lca.rb b/test/commands/unipept/test_pept2lca.rb new file mode 100644 index 00000000..ba209c36 --- /dev/null +++ b/test/commands/unipept/test_pept2lca.rb @@ -0,0 +1,40 @@ +require_relative '../../../lib/commands' + +module Unipept + class UnipeptPept2lcaTestCase < Unipept::TestCase + def test_batch_size + command = Cri::Command.define { name 'pept2lca' } + pept2lca = Commands::Pept2lca.new({ host: 'http://api.unipept.ugent.be' }, [], command) + assert_equal(1000, pept2lca.batch_size) + pept2lca.options[:all] = true + assert_equal(100, pept2lca.batch_size) + end + + def test_help + out, _err = capture_io_while do + assert_raises SystemExit do + Commands::Unipept.run(%w(pept2lca -h)) + end + end + assert(out.include? 'show help for this command') + + out, _err = capture_io_while do + assert_raises SystemExit do + Commands::Unipept.run(%w(pept2lca --help)) + end + end + assert(out.include? 'show help for this command') + end + + def test_run + out, err = capture_io_while do + Commands::Unipept.run(%w(pept2lca --host http://api.unipept.ugent.be AALTER)) + end + lines = out.each_line + assert_equal('', err) + assert(lines.next.start_with? 'peptide,taxon_id') + assert(lines.next.start_with? 'AALTER,1,root,no rank') + assert_raises(StopIteration) { lines.next } + end + end +end From 8f1d1d805cf8a6597425318dc546e0a8bc8be0ab Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Thu, 11 Jun 2015 16:47:25 +0200 Subject: [PATCH 57/64] add pept2prot tests --- test/commands/unipept/test_pept2prot.rb | 39 +++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 test/commands/unipept/test_pept2prot.rb diff --git a/test/commands/unipept/test_pept2prot.rb b/test/commands/unipept/test_pept2prot.rb new file mode 100644 index 00000000..89b56eb1 --- /dev/null +++ b/test/commands/unipept/test_pept2prot.rb @@ -0,0 +1,39 @@ +require_relative '../../../lib/commands' + +module Unipept + class UnipeptPept2protTestCase < Unipept::TestCase + def test_batch_size + command = Cri::Command.define { name 'pept2lca' } + pept2lca = Commands::Pept2prot.new({ host: 'http://api.unipept.ugent.be' }, [], command) + assert_equal(10, pept2lca.batch_size) + pept2lca.options[:all] = true + assert_equal(5, pept2lca.batch_size) + end + + def test_help + out, _err = capture_io_while do + assert_raises SystemExit do + Commands::Unipept.run(%w(pept2prot -h)) + end + end + assert(out.include? 'show help for this command') + + out, _err = capture_io_while do + assert_raises SystemExit do + Commands::Unipept.run(%w(pept2prot --help)) + end + end + assert(out.include? 'show help for this command') + end + + def test_run + out, err = capture_io_while do + Commands::Unipept.run(%w(pept2prot --host http://api.unipept.ugent.be ENFVYIAK)) + end + lines = out.each_line + assert_equal('', err) + assert(lines.next.start_with? 'peptide,uniprot_id,taxon_id') + assert(lines.next.start_with? 'ENFVYIAK,') + end + end +end From 27cfce1ce3bb999db1c607ebaa9c446cee01833f Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Thu, 11 Jun 2015 16:49:46 +0200 Subject: [PATCH 58/64] add pept2taxa tests --- test/commands/unipept/test_pept2prot.rb | 10 +++---- test/commands/unipept/test_pept2taxa.rb | 39 +++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 5 deletions(-) create mode 100644 test/commands/unipept/test_pept2taxa.rb diff --git a/test/commands/unipept/test_pept2prot.rb b/test/commands/unipept/test_pept2prot.rb index 89b56eb1..f76a897a 100644 --- a/test/commands/unipept/test_pept2prot.rb +++ b/test/commands/unipept/test_pept2prot.rb @@ -3,11 +3,11 @@ module Unipept class UnipeptPept2protTestCase < Unipept::TestCase def test_batch_size - command = Cri::Command.define { name 'pept2lca' } - pept2lca = Commands::Pept2prot.new({ host: 'http://api.unipept.ugent.be' }, [], command) - assert_equal(10, pept2lca.batch_size) - pept2lca.options[:all] = true - assert_equal(5, pept2lca.batch_size) + command = Cri::Command.define { name 'pept2prot' } + pept2prot = Commands::Pept2prot.new({ host: 'http://api.unipept.ugent.be' }, [], command) + assert_equal(10, pept2prot.batch_size) + pept2prot.options[:all] = true + assert_equal(5, pept2prot.batch_size) end def test_help diff --git a/test/commands/unipept/test_pept2taxa.rb b/test/commands/unipept/test_pept2taxa.rb new file mode 100644 index 00000000..d31ef60a --- /dev/null +++ b/test/commands/unipept/test_pept2taxa.rb @@ -0,0 +1,39 @@ +require_relative '../../../lib/commands' + +module Unipept + class UnipeptPept2taxaTestCase < Unipept::TestCase + def test_batch_size + command = Cri::Command.define { name 'pept2taxa' } + pept2taxa = Commands::Pept2taxa.new({ host: 'http://api.unipept.ugent.be' }, [], command) + assert_equal(10, pept2taxa.batch_size) + pept2taxa.options[:all] = true + assert_equal(5, pept2taxa.batch_size) + end + + def test_help + out, _err = capture_io_while do + assert_raises SystemExit do + Commands::Unipept.run(%w(pept2taxa -h)) + end + end + assert(out.include? 'show help for this command') + + out, _err = capture_io_while do + assert_raises SystemExit do + Commands::Unipept.run(%w(pept2taxa --help)) + end + end + assert(out.include? 'show help for this command') + end + + def test_run + out, err = capture_io_while do + Commands::Unipept.run(%w(pept2taxa --host http://api.unipept.ugent.be ENFVYIAK)) + end + lines = out.each_line + assert_equal('', err) + assert(lines.next.start_with? 'peptide,taxon_id,taxon_name,taxon_rank') + assert(lines.next.start_with? 'ENFVYIAK,') + end + end +end From b75932ad7439b3fe1582f9f92e0458f071d65964 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Thu, 11 Jun 2015 17:16:11 +0200 Subject: [PATCH 59/64] add taxa2lca tests --- lib/commands/unipept/api_runner.rb | 6 +++- lib/commands/unipept/taxa2lca.rb | 10 +++++-- test/commands/unipept/test_taxa2lca.rb | 39 ++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 test/commands/unipept/test_taxa2lca.rb diff --git a/lib/commands/unipept/api_runner.rb b/lib/commands/unipept/api_runner.rb index 13f86af7..9ce74233 100644 --- a/lib/commands/unipept/api_runner.rb +++ b/lib/commands/unipept/api_runner.rb @@ -105,12 +105,16 @@ def recently_fetched? !last_fetched.nil? && (last_fetched + 60 * 60 * 24) > Time.now end + # Returns a new batch_iterator based on the batch_size + def batch_iterator + Unipept::BatchIterator.new(batch_size) + end + # Runs the command def run print_server_message hydra = Typhoeus::Hydra.new(max_concurrency: 10) batch_order = Unipept::BatchOrder.new - batch_iterator = Unipept::BatchIterator.new(batch_size) batch_iterator.iterate(input_iterator) do |input_slice, batch_id, fasta_mapper| request = Typhoeus::Request.new( diff --git a/lib/commands/unipept/taxa2lca.rb b/lib/commands/unipept/taxa2lca.rb index 8aebccb5..b7560935 100644 --- a/lib/commands/unipept/taxa2lca.rb +++ b/lib/commands/unipept/taxa2lca.rb @@ -1,12 +1,18 @@ require_relative 'api_runner' module Unipept::Commands class Taxa2lca < ApiRunner - def peptide_iterator(peptides, &block) - block.call(peptides.to_a, 0) + def batch_iterator + SimpleBatchIterator.new end def batch_size fail 'NOT NEEDED FOR TAXA2LCA' end end + + class SimpleBatchIterator + def iterate(input, &block) + block.call(input.to_a, 0) + end + end end diff --git a/test/commands/unipept/test_taxa2lca.rb b/test/commands/unipept/test_taxa2lca.rb new file mode 100644 index 00000000..3ee9b491 --- /dev/null +++ b/test/commands/unipept/test_taxa2lca.rb @@ -0,0 +1,39 @@ +require_relative '../../../lib/commands' + +module Unipept + class UnipeptTaxa2lcaTestCase < Unipept::TestCase + def test_batch_size + command = Cri::Command.define { name 'taxa2lca' } + taxa2lca = Commands::Taxa2lca.new({ host: 'http://api.unipept.ugent.be' }, [], command) + assert_raises RuntimeError do + taxa2lca.batch_size + end + end + + def test_help + out, _err = capture_io_while do + assert_raises SystemExit do + Commands::Unipept.run(%w(taxa2lca -h)) + end + end + assert(out.include? 'show help for this command') + + out, _err = capture_io_while do + assert_raises SystemExit do + Commands::Unipept.run(%w(taxa2lca --help)) + end + end + assert(out.include? 'show help for this command') + end + + def test_run + out, err = capture_io_while do + Commands::Unipept.run(%w(taxa2lca --host http://api.unipept.ugent.be 216816 1680)) + end + lines = out.each_line + assert_equal('', err) + assert(lines.next.start_with? 'taxon_id,taxon_name,taxon_rank') + assert(lines.next.start_with? '1678,Bifidobacterium,genus') + end + end +end From 3a67f970f6d8919a1368921b90b02fa3d10127db Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Thu, 11 Jun 2015 17:18:43 +0200 Subject: [PATCH 60/64] add taxonomy tests --- test/commands/unipept/text_taxonomy.rb | 37 ++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 test/commands/unipept/text_taxonomy.rb diff --git a/test/commands/unipept/text_taxonomy.rb b/test/commands/unipept/text_taxonomy.rb new file mode 100644 index 00000000..73812918 --- /dev/null +++ b/test/commands/unipept/text_taxonomy.rb @@ -0,0 +1,37 @@ +require_relative '../../../lib/commands' + +module Unipept + class UnipeptTaxonomyTestCase < Unipept::TestCase + def test_batch_size + command = Cri::Command.define { name 'taxonomy' } + taxonomy = Commands::Taxonomy.new({ host: 'http://api.unipept.ugent.be' }, [], command) + assert_equal(100, taxonomy.batch_size) + end + + def test_help + out, _err = capture_io_while do + assert_raises SystemExit do + Commands::Unipept.run(%w(taxonomy -h)) + end + end + assert(out.include? 'show help for this command') + + out, _err = capture_io_while do + assert_raises SystemExit do + Commands::Unipept.run(%w(taxonomy --help)) + end + end + assert(out.include? 'show help for this command') + end + + def test_run + out, err = capture_io_while do + Commands::Unipept.run(%w(taxonomy --host http://api.unipept.ugent.be 1)) + end + lines = out.each_line + assert_equal('', err) + assert(lines.next.start_with? 'taxon_id,taxon_name,taxon_rank') + assert(lines.next.start_with? '1,root,no rank,') + end + end +end From dc400a7b826e8ef680e466f3ad77ce171b2f9b9e Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Thu, 11 Jun 2015 17:27:21 +0200 Subject: [PATCH 61/64] fix misnamed file --- test/commands/unipept/{text_taxonomy.rb => test_taxonomy.rb} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename test/commands/unipept/{text_taxonomy.rb => test_taxonomy.rb} (95%) diff --git a/test/commands/unipept/text_taxonomy.rb b/test/commands/unipept/test_taxonomy.rb similarity index 95% rename from test/commands/unipept/text_taxonomy.rb rename to test/commands/unipept/test_taxonomy.rb index 73812918..6820649c 100644 --- a/test/commands/unipept/text_taxonomy.rb +++ b/test/commands/unipept/test_taxonomy.rb @@ -31,7 +31,7 @@ def test_run lines = out.each_line assert_equal('', err) assert(lines.next.start_with? 'taxon_id,taxon_name,taxon_rank') - assert(lines.next.start_with? '1,root,no rank,') + assert(lines.next.start_with? '1,root,no rank') end end end From a603b5d6bbf399d2424001c4fb8430a22acd54e3 Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Fri, 12 Jun 2015 15:10:30 +0200 Subject: [PATCH 62/64] don't use get in accessor names --- .rubocop.yml | 2 -- lib/commands/unipept/api_runner.rb | 4 ++-- test/commands/unipept/test_api_runner.rb | 12 ++++++------ test/test_formatters.rb | 20 ++++++++++---------- 4 files changed, 18 insertions(+), 20 deletions(-) diff --git a/.rubocop.yml b/.rubocop.yml index e3590e55..ff8d785e 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -10,8 +10,6 @@ Style/ClassAndModuleChildren: Enabled: false # disable for now -Style/AccessorMethodName: - Enabled: false Style/ClassVars: Enabled: false Style/Documentation: diff --git a/lib/commands/unipept/api_runner.rb b/lib/commands/unipept/api_runner.rb index 9ce74233..27c2662d 100644 --- a/lib/commands/unipept/api_runner.rb +++ b/lib/commands/unipept/api_runner.rb @@ -21,13 +21,13 @@ def initialize(args, opts, cmd) # - the host # - the user agent def set_configuration - @host = get_host + @host = host @user_agent = 'Unipept CLI - unipept ' + Unipept::VERSION end # Returns the host. If a value is defined by both an option and the config # file, the value of the option is used. - def get_host + def host # find host in opts first host = options[:host] ? options[:host] : @configuration['host'] diff --git a/test/commands/unipept/test_api_runner.rb b/test/commands/unipept/test_api_runner.rb index 071485a9..7a185f3c 100644 --- a/test/commands/unipept/test_api_runner.rb +++ b/test/commands/unipept/test_api_runner.rb @@ -22,14 +22,14 @@ def test_config_host runner = new_runner('test', { host: 'http://param_host' }, %w(a b c)) runner.options.delete(:host) runner.configuration['host'] = 'http://config_host' - host = runner.get_host + host = runner.host assert_equal('http://config_host', host) end def test_param_host runner = new_runner('test', { host: 'http://param_host' }, %w(a b c)) runner.configuration.delete('host') - host = runner.get_host + host = runner.host assert_equal('http://param_host', host) end @@ -39,7 +39,7 @@ def test_no_host runner.options.delete(:host) _out, err = capture_io_while do assert_raises SystemExit do - runner.get_host + runner.host end end assert(err.start_with? 'WARNING: no host has been set') @@ -48,19 +48,19 @@ def test_no_host def test_host_priority runner = new_runner('test', { host: 'http://param_host' }, %w(a b c)) runner.configuration['host'] = 'http://config_host' - host = runner.get_host + host = runner.host assert_equal('http://param_host', host) end def test_http_host runner = new_runner('test', { host: 'param_host' }, %w(a b c)) - host = runner.get_host + host = runner.host assert_equal('http://param_host', host) end def test_https_host runner = new_runner('test', { host: 'https://param_host' }, %w(a b c)) - host = runner.get_host + host = runner.host assert_equal('https://param_host', host) end diff --git a/test/test_formatters.rb b/test/test_formatters.rb index 985d9747..36e4efd3 100644 --- a/test/test_formatters.rb +++ b/test/test_formatters.rb @@ -38,7 +38,7 @@ def formatter end def test_header - assert_equal('', formatter.header(TestObject.get_object)) + assert_equal('', formatter.header(TestObject.test_object)) end def test_type @@ -46,7 +46,7 @@ def test_type end def test_format - assert_equal(TestObject.get_object, formatter.format(TestObject.get_object)) + assert_equal(TestObject.test_object, formatter.format(TestObject.test_object)) end end @@ -56,7 +56,7 @@ def formatter end def test_header - assert_equal('', formatter.header(TestObject.get_object)) + assert_equal('', formatter.header(TestObject.test_object)) end def test_type @@ -64,7 +64,7 @@ def test_type end def test_format - assert_equal(TestObject.as_json, formatter.format(TestObject.get_object)) + assert_equal(TestObject.as_json, formatter.format(TestObject.test_object)) end end @@ -75,7 +75,7 @@ def formatter def test_header fasta = [['peptide', '>test']] - object = [TestObject.get_object, TestObject.get_object] + object = [TestObject.test_object, TestObject.test_object] assert_equal(TestObject.as_csv_header, formatter.header(object)) assert_equal('fasta_header,' + TestObject.as_csv_header, formatter.header(object, fasta)) end @@ -85,14 +85,14 @@ def test_type end def test_format - object = [TestObject.get_object, TestObject.get_object] + object = [TestObject.test_object, TestObject.test_object] csv = [TestObject.as_csv, TestObject.as_csv, ''].join("\n") assert_equal(csv, formatter.format(object)) end def test_format_with_fasta fasta = [['>test', '5']] - object = [TestObject.get_object, TestObject.get_object] + object = [TestObject.test_object, TestObject.test_object] csv = ['>test,' + TestObject.as_csv, '>test,' + TestObject.as_csv, ''].join("\n") assert_equal(csv, formatter.format(object, fasta)) end @@ -104,7 +104,7 @@ def formatter end def test_header - assert_equal('', formatter.header(TestObject.get_object)) + assert_equal('', formatter.header(TestObject.test_object)) end def test_type @@ -112,12 +112,12 @@ def test_type end def test_format - assert_equal(TestObject.as_xml, formatter.format(TestObject.get_object)) + assert_equal(TestObject.as_xml, formatter.format(TestObject.test_object)) end end class TestObject - def self.get_object + def self.test_object JSON.parse('{"integer": 5, "string": "string", "list": ["a", 2, false]}') end From a56f133f176bd6af90d912931a7e3c89dcb58a7c Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Fri, 12 Jun 2015 15:18:37 +0200 Subject: [PATCH 63/64] use modifier if for single line body --- .rubocop.yml | 2 -- lib/commands/peptfilter.rb | 4 +--- lib/commands/unipept/config.rb | 4 +--- lib/commands/uniprot.rb | 4 +--- 4 files changed, 3 insertions(+), 11 deletions(-) diff --git a/.rubocop.yml b/.rubocop.yml index ff8d785e..8dacae2e 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -14,8 +14,6 @@ Style/ClassVars: Enabled: false Style/Documentation: Enabled: false -Style/IfUnlessModifier: - Enabled: false Style/RescueModifier: Enabled: false Metrics/AbcSize: diff --git a/lib/commands/peptfilter.rb b/lib/commands/peptfilter.rb index dfd4cd77..b638079c 100644 --- a/lib/commands/peptfilter.rb +++ b/lib/commands/peptfilter.rb @@ -33,9 +33,7 @@ class Peptfilter end pept = pept.chomp - if Peptfilter.filter(pept, minlen, maxlen, lacks, contains) - puts pept - end + puts pept if Peptfilter.filter(pept, minlen, maxlen, lacks, contains) end end end diff --git a/lib/commands/unipept/config.rb b/lib/commands/unipept/config.rb index e547f0a6..51a1989c 100644 --- a/lib/commands/unipept/config.rb +++ b/lib/commands/unipept/config.rb @@ -1,9 +1,7 @@ module Unipept class Commands::Config < Cri::CommandRunner def run - if arguments.size == 0 || arguments.size > 2 - abort command.help - end + abort command.help if arguments.size == 0 || arguments.size > 2 key, value = *arguments diff --git a/lib/commands/uniprot.rb b/lib/commands/uniprot.rb index 9d354ed4..cae8c18b 100644 --- a/lib/commands/uniprot.rb +++ b/lib/commands/uniprot.rb @@ -62,9 +62,7 @@ def self.get_uniprot_entry(accession, format) else # other format has been specified, just download and output resp = Typhoeus.get("http://www.uniprot.org/uniprot/#{accession}.#{format}") - if resp.success? - resp.response_body - end + resp.response_body if resp.success? end end end From a31d5706f8509cd465398da9d724425ccf4f5e1e Mon Sep 17 00:00:00 2001 From: Bart Mesuere Date: Fri, 12 Jun 2015 15:27:28 +0200 Subject: [PATCH 64/64] reenable extra rubocop checks --- .rubocop.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.rubocop.yml b/.rubocop.yml index 8dacae2e..f91664ec 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -20,11 +20,7 @@ Metrics/AbcSize: Enabled: false Metrics/ClassLength: Enabled: false -Metrics/CyclomaticComplexity: - Enabled: false Metrics/LineLength: Enabled: false Metrics/MethodLength: Enabled: false -Metrics/PerceivedComplexity: - Enabled: false