Skip to content

Commit

Permalink
fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Ben J. Woodcroft committed Mar 1, 2018
1 parent 2c25cf5 commit 5e97625
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 14 deletions.
6 changes: 3 additions & 3 deletions coverage_regionwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,11 @@ def finish_a_contig(contig_lengths, refname, last_position, interval, last_count
current_total_pairs = 0
interval = args.interval
cmd = "samtools view -f2 -F3852 '%s'" % args.bam_file
(out, err) = subprocess.Popen(['bash','-c',cmd], stdout=subprocess.PIPE).communicate() #TODO worry about stderr
proc = subprocess.Popen(['bash','-c',cmd], stdout=subprocess.PIPE) #TODO worry about stderr

for line in out.splitlines():
for line in proc.stdout:
splits = line.split("\t") #TODO: use csv for faster
logging.debug("Interrogating line %s" % str(splits))
#logging.debug("Interrogating line %s" % str(splits))
if len(splits) <= 11: raise Exception("unexpected number of fields in sam line %s" % line)
ref = splits[2]
start = int(splits[3])
Expand Down
37 changes: 26 additions & 11 deletions fastq_split.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
:fwd_read_grep => ' 1:',
:rev_read_grep => ' 2:',
:file_type => 'fastq',
:awk => false,
}
o = OptionParser.new do |opts|
opts.banner = "
Expand All @@ -35,6 +36,9 @@
opts.on('--threads NUM',Integer,'file is fasta, not fastq [default: #{options[:num_processes]}]') do |i|
options[:num_processes] = i
end
opts.on('--awk','split file up with awk, not grep [default: #{options[:awk]}]') do
options[:awk] = true
end

# logger options
opts.separator "\nVerbosity:\n\n"
Expand All @@ -49,7 +53,6 @@
# Setup logging. bio-logger defaults to STDERR not STDOUT, I disagree
Bio::Log::CLI.logger(options[:logger]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)


ARGV.each do |fastq_file|
raise "not fastq file found: #{fastq_file}" unless File.exists?(fastq_file)
base = File.basename fastq_file
Expand All @@ -66,17 +69,29 @@
end
end

num_following_lines = {'fastq' => '3', 'fasta' => '1'}[options[:file_type]]
log.debug("Using #{num_following_lines} lines following")
if options[:awk]
log.info "Creating #{fq1} .."
`zcat '#{fastq_file}' | awk 'NR%8<5 && NR%8>0' |pigz -p #{options[:num_processes]} >#{fq1}`
fq1_size = File.size fq1
log.info "#{fq1} size #{fq1_size}"

log.info "Creating #{fq1} .."
`zcat '#{fastq_file}' | grep -A#{num_following_lines} '#{options[:fwd_read_grep]}' |grep -v '^--$' |pigz -p #{options[:num_processes]} >#{fq1}`
fq1_size = File.size fq1
log.info "#{fq1} size #{fq1_size}"
log.info "Creating #{fq2} .."
`zcat '#{fastq_file}' | awk 'NR%8>4 || NR%8==0' |pigz -p #{options[:num_processes]} >#{fq2}`
fq2_size = File.size fq2
log.info "#{fq2} size #{fq2_size}"
else
num_following_lines = {'fastq' => '3', 'fasta' => '1'}[options[:file_type]]
log.debug("Using #{num_following_lines} lines following")

log.info "Creating #{fq2} .."
`zcat '#{fastq_file}' | grep -A#{num_following_lines} '#{options[:rev_read_grep]}' |grep -v '^--$' |pigz -p #{options[:num_processes]} >#{fq2}`
fq2_size = File.size fq2
log.info "#{fq2} size #{fq2_size}"
log.info "Creating #{fq1} .."
`zcat '#{fastq_file}' | grep -A#{num_following_lines} '#{options[:fwd_read_grep]}' |grep -v '^--$' |pigz -p #{options[:num_processes]} >#{fq1}`
fq1_size = File.size fq1
log.info "#{fq1} size #{fq1_size}"

log.info "Creating #{fq2} .."
`zcat '#{fastq_file}' | grep -A#{num_following_lines} '#{options[:rev_read_grep]}' |grep -v '^--$' |pigz -p #{options[:num_processes]} >#{fq2}`
fq2_size = File.size fq2
log.info "#{fq2} size #{fq2_size}"
end
end
end #end if running as a script

0 comments on commit 5e97625

Please sign in to comment.