Skip to content

Commit

Permalink
Merge pull request #679 from Lazy-poet/issue/669
Browse files Browse the repository at this point in the history
add method for computing number of sequences in a query
  • Loading branch information
yannickwurm committed Feb 21, 2024
2 parents 89fd7a0 + eb08a87 commit 61d96fa
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 5 deletions.
23 changes: 20 additions & 3 deletions lib/sequenceserver/blast/job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,32 @@ def initialize(params)
else
validate params
super do
@method = params[:method]
@method = params[:method]
@query = params[:sequence]
@qfile = store('query.fa', params[:sequence])
@databases = Database[params[:databases]]
@advanced = params[:advanced].to_s.strip
@options = @advanced + defaults
# The following params are for analytics only
@num_threads = config[:num_threads]
@query_length = calculate_query_size
@number_of_sequences = calculate_number_of_sequences
@databases_ncharacters_total = calculate_databases_ncharacters_total
end
end
end

# :nodoc:
# Attributes used by us - should be considered private.
attr_reader :advanced
attr_reader :databases, :databases_ncharacters_total, :method, :num_threads, :options, :qfile, :query_length
attr_reader :advanced,
:databases,
:databases_ncharacters_total,
:method,
:num_threads,
:options,
:qfile,
:query_length,
:number_of_sequences

# :nodoc:
# Deprecated; see Report#extract_params
Expand Down Expand Up @@ -92,6 +101,14 @@ def calculate_query_size
size
end

def calculate_number_of_sequences
# splitting the query by ">" starting a new line lets us determine number of sequences
sequences = @query.split(/\n\s*>\s*+/)
# Remove any empty strings from the split result
sequences.reject!(&:empty?)
sequences.length
end

def validate(params)
validate_method params[:method]
validate_sequences params[:sequence]
Expand Down
67 changes: 65 additions & 2 deletions spec/job_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,34 @@ module SequenceServer
"corrected\n YUOP POKJ M NAHAA JJHFGF YERTQ "
end

let(:query_without_symbol) do
"MNTLWLSLWDYPGKLPLNFMVFDTKDDLQAAYWRDPYSIPLAVIFEDPQPISQRLIYEIR
TNPSYTLPPPPTKLYSAPISCRKNKTGHWMDDILSIKTGESCPVNNYLHSGFLALQMITD
ITKIKLENSDVTIPDIKLIMFPKEPYTADWMLAFRVVIPLYMVLALSQFITYLLILIVGE
KENKIKEGMKMMGLNDSVF"
end

let(:query_with_symbol) do
'>SI2.2.0_13722 locus=Si_gnF.scaffold06207[1925625..1928536].pep_1 quality=100.00
MSANRLNVLVTLMLAVALLVTESGNAQVDGYLQFNPKRSAVSSPQKYCGKKLSNALQIIC
DGVYNSMFKKSGQDFPPQNKRHIAHRINGNEEESFTTLKSNFLNWCVEVYHRHYRFVFVS
EMEMADYPLAYDISPYLPPFLSRARARGMLDGRFAGRRYRRESRGIHEECCINGCTINEL
TSYCGP'
end

let(:query_with_and_without_symbol) do
"MNTLWLSLWDYPGKLPLNFMVFDTKDDLQAAYWRDPYSIPLAVIFEDPQPISQRLIYEIR
TNPSYTLPPPPTKLYSAPISCRKNKTGHWMDDILSIKTGESCPVNNYLHSGFLALQMITD
ITKIKLENSDVTIPDIKLIMFPKEPYTADWMLAFRVVIPLYMVLALSQFITYLLILIVGE
KENKIKEGMKMMGLNDSVF
>SI2.2.0_13722 locus=Si_gnF.scaffold06207[1925625..1928536].pep_1 quality=100.00
MSANRLNVLVTLMLAVALLVTESGNAQVDGYLQFNPKRSAVSSPQKYCGKKLSNALQIIC
DGVYNSMFKKSGQDFPPQNKRHIAHRINGNEEESFTTLKSNFLNWCVEVYHRHYRFVFVS
EMEMADYPLAYDISPYLPPFLSRARARGMLDGRFAGRRYRRESRGIHEECCINGCTINEL
TSYCGP
"
end

# all databases used here are v5 databases included in
# SequenceServer.init(database_dir: "#{__dir__}/database")
# [15] = funky ids (nucleotide)
Expand Down Expand Up @@ -85,8 +113,43 @@ module SequenceServer
databases: [Database.ids[15], Database.ids[16], Database.ids[19]],
method: 'blastn'
}
end

@params_query_without_symbol ={
sequence: query_without_symbol,
databases: [Database.ids[17]],
method: 'blastp'
}

@params_query_with_symbol ={
sequence: query_with_symbol,
databases: [Database.ids[17]],
method: 'blastp'
}
@params_query_with_and_without_symbol ={
sequence: query_with_and_without_symbol,
databases: [Database.ids[17]],
method: 'blastp'
}
end
context 'queries not containing a >' do
let(:job) {Job.create(@params_query_without_symbol)}
it 'should accurately compute number of sequences' do
expect(job.number_of_sequences).to eq(1)
end
end
context 'queries containing a > at the start' do
let(:job) {Job.create(@params_query_with_symbol)}
it 'should accurately compute number of sequences' do
expect(job.number_of_sequences).to eq(1)
end
end
context 'queries containing a > but not at the start' do
let(:job) {Job.create(@params_query_with_and_without_symbol)}
it 'should accurately compute number of sequences' do
expect(job.number_of_sequences).to eq(2)
end
end

context 'with one protein database' do
let(:test_job1) { Job.create(@params_prot_1db) }

Expand All @@ -105,7 +168,7 @@ module SequenceServer
it 'should accurately compute total characters of databases used' do
expect(test_job2.databases_ncharacters_total).to eql(280_685)
end

it 'should accurately compute query length' do
expect(test_job2.query_length).to eq(64)
end
Expand Down

0 comments on commit 61d96fa

Please sign in to comment.