Skip to content

Commit

Permalink
less buggy handling of paired-end sequences
Browse files Browse the repository at this point in the history
  • Loading branch information
wwood committed Jan 5, 2015
1 parent db6f084 commit af4449b
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 5 deletions.
19 changes: 18 additions & 1 deletion lib/bio-velvet_underground/binary_sequence_store.rb
Expand Up @@ -24,6 +24,18 @@ def length
@readset[:readCount]
end

# Return true if paired, else false
def paired?(sequence_id)
cat = FFI::Pointer.new(:int8, @readset[:categories])[sequence_id-1].read_int8
if cat == 0
return false
elsif cat == 1
return true
else
raise "Unexpected velvet sequence category found: #{cat}"
end
end

# Returns true if the sequence ID refers to the
# second in a pair of sequences.
def is_second_in_pair?(sequence_id)
Expand All @@ -33,8 +45,10 @@ def is_second_in_pair?(sequence_id)
Bio::Velvet::Underground.isSecondInPair @readset, sequence_id-1
end

# Returns the ID of the given sequence_id's pair
# Returns the ID of the given sequence_id's pair, or nil if it is not a
# paired sequence
def pair_id(sequence_id)
return nil unless paired?(sequence_id)
if is_second_in_pair?(sequence_id)
sequence_id-1
else
Expand Down Expand Up @@ -80,6 +94,9 @@ def self.attach_binary_sequence_functions
# IDnum position);
attach_function :getTightStringInArray, [:pointer, :int32], :pointer

# int pairedCategories(ReadSet * reads);
attach_function :pairedCategories, [:pointer], :int

# boolean isSecondInPair(ReadSet * reads, IDnum index);
attach_function :isSecondInPair, [:pointer, :int32], :bool
end
Expand Down
9 changes: 5 additions & 4 deletions lib/bio-velvet_underground/graph.rb
Expand Up @@ -119,14 +119,16 @@ def twin
end

def fwd_short_reads
return @short_reads unless @short_reads.nil?
array_start_pointer = Bio::Velvet::Underground.getNodeReads @internal_node_struct, @graph.internal_graph_struct
num_short_reads = Bio::Velvet::Underground.getNodeReadCount @internal_node_struct, @graph.internal_graph_struct
short_reads = (0...num_short_reads).collect do |i|
struct_size = Bio::Velvet::Underground::ShortReadMarker.size #calculate once for performance
@short_reads = 0.step(num_short_reads-1, 1).collect do |i|
# Use the fact that FFI pointers can do pointer arithmetic
pointer = array_start_pointer+(i*Bio::Velvet::Underground::ShortReadMarker.size)
pointer = array_start_pointer+(i*struct_size)
NodedRead.new Bio::Velvet::Underground::ShortReadMarker.new(pointer), true
end
return short_reads
return @short_reads
end

def rev_short_reads
Expand All @@ -141,7 +143,6 @@ def short_reads
end
return reads
end

end

# TODO: this class is currently unimplemented.
Expand Down
9 changes: 9 additions & 0 deletions spec/binary_sequence_store_spec.rb
Expand Up @@ -36,4 +36,13 @@
seqs.pair_id(5).should == 6
seqs.pair_id(6).should == 5
end

it 'should be able to understand non-mates and mates in the same run' do
path = File.join TEST_DATA_DIR, '5_singles_and_pairs', 'CnyUnifiedSeq'
seqs = Bio::Velvet::Underground::BinarySequenceStore.new path
seqs.pair_id(1).should == nil
seqs.pair_id(50000).should == nil
seqs.pair_id(50001).should == 50002
seqs.pair_id(100000).should == 99999
end
end

0 comments on commit af4449b

Please sign in to comment.