Permalink
Browse files

loosen dependencies, fix incorrect ordering headers

  • Loading branch information...
Ben J. Woodcroft
Ben J. Woodcroft committed Mar 27, 2013
1 parent bd2f2df commit edd1b34dd73e8907631b07bb140ac5fd1a47cf5a
Showing with 32 additions and 21 deletions.
  1. +3 −3 Gemfile
  2. +7 −7 bin/kmer_counter.rb
  3. +2 −0 test/data/100random.fa
  4. +2 −0 test/helper.rb
  5. +18 −11 test/test_bio-kmer_counter.rb
View
@@ -11,8 +11,8 @@ gem 'bio-logger', '>=1.0.1'
# Include everything needed to run rake, tests, features, etc.
group :development do
gem "shoulda", ">= 0"
- gem "rdoc", "~> 3.12"
- gem "jeweler", "~> 1.8.3"
+ gem "rdoc", ">= 3.12"
+ gem "jeweler",">= 1.8.3"
gem "bundler", ">= 1.0.21"
- gem "rdoc", "~> 3.12"
+ gem "rdoc", ">= 3.12"
end
View
@@ -64,8 +64,8 @@
opts.on("-l", "--window-length", "print the length of the window in the output [default #{options[:sequence_length]}]") do |v|
options[:sequence_length] = true
end
-
-
+
+
# logger options
opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") do |q|
Bio::Log::CLI.trace('error')
@@ -90,7 +90,7 @@
# Print headers
print "ID\t"
-print Bio::Sequence::Kmer.merge_down_to_lowest_lexigraphical_form(Bio::Sequence::Kmer.empty_full_kmer_hash(options[:kmer])).keys.join("\t")
+print Bio::Sequence::Kmer.merge_down_to_lowest_lexigraphical_form(Bio::Sequence::Kmer.empty_full_kmer_hash(options[:kmer])).keys.sort.join("\t")
print "\tWindowLength" if options[:sequence_length]
print "\tcontig" if options[:contig_name]
puts
@@ -99,18 +99,18 @@
process_window = lambda do |window,kmer,sequence_name,contig_name|
counts = orig.dup
num_kmers_counted = 0
-
+
window.window_search(options[:kmer],1) do |tetranucleotide|
str = tetranucleotide.to_s
next unless str.gsub(/[ATGC]+/,'') == ''
num_kmers_counted += 1
counts[str]+=1
#counts[Bio::Sequence::NA.new(tetranucleotide).lowest_lexigraphical_form.to_s.upcase] += 1
end
-
+
# Merge everything into lowest lexigraphical form
new_counts = Bio::Sequence::Kmer.merge_down_to_lowest_lexigraphical_form counts
-
+
if num_kmers_counted == 0
log.warn "Skipping window #{sequence_name} because few/none ATGC's were detected (was it all N's?)"
else
@@ -127,7 +127,7 @@
fasta_filename = ARGV[0]
progress = nil
progress = ProgressBar.new('kmer_counter', `grep -c '>' '#{fasta_filename}'`.to_i) if options[:progressbar]
-ff = Bio::FlatFile.open(fasta_filename)
+ff = Bio::FlatFile.open(fasta_filename)
ff.each do |sequence|
window_counter = 0
View
@@ -0,0 +1,2 @@
+>random
+GCAGAGCACCTCCGCGTGACATTCTATTATGGAATTGAAGTCCAGTCAGACCAGTACCCTTGCACAGGCAATACATTGGAACTGGATCAGAACTTCCTAC
View
@@ -14,5 +14,7 @@
$LOAD_PATH.unshift(File.dirname(__FILE__))
require 'bio-kmer_counter'
+TEST_DATA_DIR = File.join(File.dirname(__FILE__), 'data')
+
class Test::Unit::TestCase
end
@@ -8,24 +8,24 @@ class TestBioKmerCounter < Test::Unit::TestCase
assert_equal Bio::Sequence::NA.new('AA'), Bio::Sequence::NA.new('TT').lowest_lexigraphical_form
assert_equal Bio::Sequence::NA.new('AG'), Bio::Sequence::NA.new('CT').lowest_lexigraphical_form
end
-
+
should 'test_empty_full_kmer_hash' do
answer = {}; %w(A C G T).each{|k| answer[k] = 0}
assert_equal answer, Bio::Sequence::Kmer.empty_full_kmer_hash(1)
end
-
+
should 'test merge down' do
answer = {}; %w(A C).each{|k| answer[k] = 0}
full = Bio::Sequence::Kmer.empty_full_kmer_hash(1)
assert_equal answer, Bio::Sequence::Kmer.merge_down_to_lowest_lexigraphical_form(full)
full = Bio::Sequence::Kmer.empty_full_kmer_hash #defaults to kmer hash length 4
assert_equal 136, Bio::Sequence::Kmer.merge_down_to_lowest_lexigraphical_form(full).length
end
-
+
def script_path
File.join(File.dirname(__FILE__),'..','bin','kmer_counter.rb')
end
-
+
should 'test_running1' do
Tempfile.open('one') do |tempfile|
tempfile.puts '>one'
@@ -35,7 +35,7 @@ def script_path
assert_equal "ID\tA\tC\none_0\t0.6\t0.4\n", `#{script_path} -w 5 -k 1 #{tempfile.path}`
end
end
-
+
should 'not whack out when there isnt any sequence to count' do
Tempfile.open('one') do |tempfile|
tempfile.puts '>one'
@@ -45,13 +45,13 @@ def script_path
assert_equal "ID\tA\tC\n", `#{script_path} -w 5 -k 1 #{tempfile.path}`
end
end
-
+
should 'give correct increments in window numbering' do
Tempfile.open('one') do |tempfile|
tempfile.puts '>one'
tempfile.puts 'ATGCATGCAT' #10 letters long
tempfile.close
-
+
expected = "ID\tA\tC\n"+
"one_0\t0.5\t0.5\n"+
"one_1\t0.5\t0.5\n"+
@@ -60,14 +60,14 @@ def script_path
assert_equal expected, `#{script_path} -w 4 -k 1 -m 2 #{tempfile.path}`
end
end
-
+
should "print help when no arguments are given" do
command = "#{script_path}"
Open3.popen3(command) do |stdin, stdout, stderr|
assert stderr.readlines[0].match(/^Usage: kmer_counter/)
end
end
-
+
should 'work with lowercase' do
Tempfile.open('one') do |tempfile|
tempfile.puts '>one'
@@ -77,7 +77,7 @@ def script_path
assert_equal "ID\tA\tC\none_0\t0.6\t0.4\n", `#{script_path} -w 5 -k 1 #{tempfile.path}`
end
end
-
+
should 'by default count contigs greater than 2kb but less than 5kb' do
Tempfile.open('one') do |tempfile|
tempfile.puts '>one'
@@ -87,7 +87,7 @@ def script_path
assert_equal "ID\tA\tC\none_leftover_0\t1.0\t0.0\n", `#{script_path} -k 1 #{tempfile.path}`
end
end
-
+
should 'by default count contigs greater than 2kb but less than 5kb' do
Tempfile.open('one') do |tempfile|
tempfile.puts '>one'
@@ -97,4 +97,11 @@ def script_path
assert_equal "ID\tA\tC\none_0\t1.0\t0.0\none_leftover_1\t1.0\t0.0\n", `#{script_path} -k 1 #{tempfile.path}`
end
end
+
+ should 'work simulated example with kmer length = 2' do
+ expected = %w(ID AA AC AG AT CA CC CG GA GC TA).join("\t")+"\n"+
+ %w(random_leftover_0 0.1111111111111111 0.13131313131313133 0.1414141414141414 0.0707070707070707 0.1717171717171717 0.1111111111111111 0.020202020202020204 0.1414141414141414 0.050505050505050504 0.050505050505050504).join("\t")+"\n"
+
+ assert_equal expected, `#{script_path} -k 2 -m 1 #{File.join(TEST_DATA_DIR,'100random.fa')}`
+ end
end

0 comments on commit edd1b34

Please sign in to comment.