Skip to content

Commit

Permalink
Update to noodles 0.63.0
Browse files Browse the repository at this point in the history
  • Loading branch information
zaeleus committed Feb 8, 2024
1 parent 8298149 commit b6650f9
Show file tree
Hide file tree
Showing 13 changed files with 176 additions and 139 deletions.
41 changes: 27 additions & 14 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,15 @@ codegen-units = 1

[dependencies]
anyhow = "1.0.32"
bstr = { version = "1.9.0", default-features = false, features = ["std"] }
clap = { version = "4.0.10", features = ["derive", "string"] }
crossbeam-channel = "0.5.6"
flate2 = "1.0.14"
git-testament = "0.2.0"
interval-tree = { git = "https://github.com/zaeleus/interval-tree.git", rev = "e303d7254d53de5c418d6079d4b66c30c10958d4" }
mimalloc = { version = "0.1.26", default-features = false }
noodles = { version = "0.59.0", features = ["bam", "bgzf", "core", "gff", "sam"] }
noodles-bgzf = { version = "0.25.0", features = ["libdeflate"] }
noodles = { version = "0.63.0", features = ["bam", "bgzf", "core", "gff", "sam"] }
noodles-bgzf = { version = "0.26.0", features = ["libdeflate"] }
thiserror = "1.0.40"
tracing = "0.1.25"
tracing-subscriber = "0.3.3"
10 changes: 8 additions & 2 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::{num::NonZeroUsize, path::PathBuf};

use clap::{Parser, Subcommand};
use git_testament::{git_testament, render_testament};
use noodles::sam::record::MappingQuality;
use noodles::sam::alignment::record::MappingQuality;

use crate::{normalization::Method, StrandSpecificationOption};

Expand Down Expand Up @@ -71,7 +71,7 @@ pub struct Quantify {
#[arg(short = 'i', long, default_value = "gene_id")]
pub id: String,

#[arg(long, default_value = "10")]
#[arg(long, value_parser = parse_mapping_quality, default_value = "10")]
pub min_mapping_quality: MappingQuality,

/// Output destination for feature counts.
Expand All @@ -89,3 +89,9 @@ pub struct Quantify {
/// Input alignment file.
pub src: PathBuf,
}

fn parse_mapping_quality(s: &str) -> Result<MappingQuality, &'static str> {
s.parse::<u8>()
.map_err(|_| "invalid input")
.and_then(|n| MappingQuality::new(n).ok_or("missing mapping quality"))
}
2 changes: 1 addition & 1 deletion src/commands/quantify.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ where
let mut reader = bgzf::reader::Builder::default()
.set_worker_count(worker_count)
.build_from_path(bam_src.as_ref())
.map(bam::Reader::from)
.map(bam::io::Reader::from)
.with_context(|| format!("Could not open {}", bam_src.as_ref().display()))?;

let header = reader.read_header()?;
Expand Down
55 changes: 31 additions & 24 deletions src/count.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,15 @@ use std::{
thread,
};

use bstr::{BStr, ByteSlice};
use interval_tree::IntervalTree;
use noodles::{
bam,
core::Position,
gff,
sam::{
header::{
record::value::{map::ReferenceSequence, Map},
ReferenceSequences,
},
record::ReferenceSequenceName,
sam::header::{
record::value::{map::ReferenceSequence, Map},
ReferenceSequences,
},
};

Expand All @@ -33,7 +31,7 @@ use self::context::Event;
const CHUNK_SIZE: usize = 8192;

pub fn count_single_end_records<R>(
mut reader: bam::Reader<R>,
mut reader: bam::io::Reader<R>,
features: &Features,
reference_sequences: &ReferenceSequences,
filter: &Filter,
Expand All @@ -47,7 +45,7 @@ where
let (tx, rx) = crossbeam_channel::bounded(worker_count.get());

scope.spawn(move || {
let mut records = reader.lazy_records();
let mut records = reader.records();

loop {
let mut chunk = Vec::with_capacity(CHUNK_SIZE);
Expand Down Expand Up @@ -110,7 +108,7 @@ pub fn count_single_end_record(
reference_sequences: &ReferenceSequences,
filter: &Filter,
strand_specification: StrandSpecification,
record: &bam::lazy::Record,
record: &bam::Record,
) -> io::Result<Event> {
if let Some(event) = filter.filter(record)? {
return Ok(event);
Expand All @@ -119,14 +117,14 @@ pub fn count_single_end_record(
let tree = match get_tree(
features,
reference_sequences,
record.reference_sequence_id()?,
record.reference_sequence_id().transpose()?,
)? {
Some(t) => t,
None => return Ok(Event::NoFeature),
};

let cigar = record.cigar();
let start = record.alignment_start()?.expect("missing alignment start");
let start = record.alignment_start().expect("missing alignment start")?;
let intervals = MatchIntervals::new(&cigar, start);

let flags = record.flags();
Expand All @@ -141,7 +139,7 @@ pub fn count_single_end_record(
}

pub fn count_paired_end_records<R>(
reader: bam::Reader<R>,
reader: bam::io::Reader<R>,
features: &Features,
reference_sequences: &ReferenceSequences,
filter: &Filter,
Expand Down Expand Up @@ -238,20 +236,24 @@ pub fn count_paired_end_record_pair(
reference_sequences: &ReferenceSequences,
filter: &Filter,
strand_specification: StrandSpecification,
r1: &bam::lazy::Record,
r2: &bam::lazy::Record,
r1: &bam::Record,
r2: &bam::Record,
) -> io::Result<Event> {
if let Some(event) = filter.filter_pair(r1, r2)? {
return Ok(event);
}

let tree = match get_tree(features, reference_sequences, r1.reference_sequence_id()?)? {
let tree = match get_tree(
features,
reference_sequences,
r1.reference_sequence_id().transpose()?,
)? {
Some(t) => t,
None => return Ok(Event::NoFeature),
};

let cigar = r1.cigar();
let start = r1.alignment_start()?.expect("missing alignment start");
let start = r1.alignment_start().expect("missing alignment start")?;
let intervals = MatchIntervals::new(&cigar, start);

let f1 = r1.flags();
Expand All @@ -262,13 +264,17 @@ pub fn count_paired_end_record_pair(

let mut set = find(tree, intervals, strand_specification, is_reverse)?;

let tree = match get_tree(features, reference_sequences, r2.reference_sequence_id()?)? {
let tree = match get_tree(
features,
reference_sequences,
r2.reference_sequence_id().transpose()?,
)? {
Some(t) => t,
None => return Ok(Event::NoFeature),
};

let cigar = r2.cigar();
let start = r2.alignment_start()?.expect("missing alignment start");
let start = r2.alignment_start().expect("missing alignment start")?;
let intervals = MatchIntervals::new(&cigar, start);

let f2 = r2.flags();
Expand All @@ -289,7 +295,7 @@ pub fn count_paired_end_singleton_record(
reference_sequences: &ReferenceSequences,
filter: &Filter,
strand_specification: StrandSpecification,
record: &bam::lazy::Record,
record: &bam::Record,
) -> io::Result<Event> {
if let Some(event) = filter.filter(record)? {
return Ok(event);
Expand All @@ -298,14 +304,14 @@ pub fn count_paired_end_singleton_record(
let tree = match get_tree(
features,
reference_sequences,
record.reference_sequence_id()?,
record.reference_sequence_id().transpose()?,
)? {
Some(t) => t,
None => return Ok(Event::NoFeature),
};

let cigar = record.cigar();
let start = record.alignment_start()?.expect("missing alignment start");
let start = record.alignment_start().expect("missing alignment start")?;
let intervals = MatchIntervals::new(&cigar, start);

let flags = record.flags();
Expand Down Expand Up @@ -361,9 +367,10 @@ fn find(
fn get_reference_sequence(
reference_sequences: &ReferenceSequences,
reference_sequence_id: Option<usize>,
) -> io::Result<(&ReferenceSequenceName, &Map<ReferenceSequence>)> {
) -> io::Result<(&BStr, &Map<ReferenceSequence>)> {
reference_sequence_id
.and_then(|id| reference_sequences.get_index(id))
.map(|(name, map)| (name.as_bstr(), map))
.ok_or_else(|| {
io::Error::new(
io::ErrorKind::InvalidData,
Expand All @@ -388,7 +395,7 @@ pub fn get_tree<'t>(
reference_sequence_id: Option<usize>,
) -> io::Result<Option<&'t IntervalTree<Position, Entry>>> {
let (name, _) = get_reference_sequence(reference_sequences, reference_sequence_id)?;
Ok(features.get(name.as_str()))
Ok(features.get(name))
}

#[cfg(test)]
Expand Down Expand Up @@ -423,7 +430,7 @@ mod tests {
let reference_sequence_id = Some(1);
let (name, reference_sequence) =
get_reference_sequence(&reference_sequences, reference_sequence_id)?;
assert_eq!(name.as_str(), "sq1");
assert_eq!(name, &b"sq1"[..]);
assert_eq!(usize::from(reference_sequence.length()), 13);

let reference_sequence_id = None;
Expand Down
Loading

0 comments on commit b6650f9

Please sign in to comment.