Skip to content

Commit

Permalink
bcf/header/string_map: Replace IndexSet with a custom ordered set
Browse files Browse the repository at this point in the history
This is a necessary step to allow nonsequential entries.
  • Loading branch information
zaeleus committed Jan 7, 2022
1 parent 0f7790a commit 4291d88
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 48 deletions.
7 changes: 7 additions & 0 deletions noodles-bcf/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
`vcf::header::ParseError::StringMapPositionMismatch` if the string map
position of an entry and record-defined IDX field value do not match.

### Removed

* bcf/header/string_map: Remove `Deref<Target = IndexSet<String>>` for
`StringMap`.

`StringMap` is no longer backed by an `IndexMap`.

## 0.10.0 - 2021-12-16

### Added
Expand Down
155 changes: 107 additions & 48 deletions noodles-bcf/src/header/string_map.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use std::{
ops::Deref,
collections::HashMap,
mem,
str::{FromStr, Lines},
};

use indexmap::IndexSet;
use noodles_vcf::{
self as vcf,
header::{Filter, Format, Info, ParseError, Record},
Expand All @@ -15,31 +15,77 @@ use noodles_vcf::{
///
/// See § 6.2.1 Dictionary of strings (2021-05-13).
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct StringMap(IndexSet<String>);
pub struct StringMap {
indices: HashMap<String, usize>,
entries: Vec<String>,
}

impl StringMap {
fn insert(&mut self, value: String) {
self.0.insert(value);
/// Returns an entry by index.
///
/// # Examples
///
/// ```
/// use noodles_bcf::header::StringMap;
/// let string_map = StringMap::default();
/// assert_eq!(string_map.get_index(0), Some("PASS"));
/// assert!(string_map.get_index(1).is_none());
/// ```
pub fn get_index(&self, i: usize) -> Option<&str> {
self.entries.get(i).map(|entry| &**entry)
}

/// Returns the index of the entry of the given value.
///
/// # Examples
///
/// ```
/// use noodles_bcf::header::StringMap;
/// let string_map = StringMap::default();
/// assert_eq!(string_map.get_index_of("PASS"), Some(0));
/// assert!(string_map.get_index_of("DP").is_none());
/// ```
pub fn get_index_of(&self, value: &str) -> Option<usize> {
self.indices.get(value).copied()
}

fn insert(&mut self, value: String) -> Option<String> {
self.insert_full(value).1
}

fn insert_full(&mut self, value: String) -> (usize, Option<String>) {
match self.get_index_of(&value) {
Some(i) => {
let entry = mem::replace(&mut self.entries[i], value);
(i, Some(entry))
}
None => {
let i = self.push(value);
(i, None)
}
}
}

fn insert_full(&mut self, value: String) -> (usize, bool) {
self.0.insert_full(value)
fn push(&mut self, value: String) -> usize {
let i = self.entries.len();

self.indices.insert(value.clone(), i);
self.entries.push(value);

i
}
}

impl Default for StringMap {
fn default() -> Self {
// § 6.2.1 Dictionary of strings (2021-01-13): "Note that 'PASS' is always implicitly
// encoded as the first entry in the header dictionary."
Self([Filter::pass().id().into()].into_iter().collect())
}
}

impl Deref for StringMap {
type Target = IndexSet<String>;
let pass = Filter::pass().id().to_string();

fn deref(&self) -> &Self::Target {
&self.0
Self {
indices: [(pass.clone(), 0)].into_iter().collect(),
entries: vec![pass],
}
}
}

Expand Down Expand Up @@ -137,14 +183,17 @@ mod tests {

#[test]
fn test_default() {
assert_eq!(
StringMap::default(),
StringMap([String::from("PASS")].into_iter().collect())
);
let string_map = StringMap::default();

assert_eq!(string_map.indices.len(), 1);
assert_eq!(string_map.indices.get("PASS"), Some(&0));

assert_eq!(string_map.entries.len(), 1);
assert_eq!(string_map.entries[0], String::from("PASS"));
}

#[test]
fn test_from_str() -> Result<(), ParseError> {
fn test_from_str() {
let s = r#"##fileformat=VCFv4.3
##fileDate=20210412
##contig=<ID=sq0,length=8>
Expand All @@ -160,22 +209,25 @@ mod tests {
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample0
"#;

let actual: StringMap = s.parse()?;
let expected = StringMap(
[
String::from("PASS"),
String::from("NS"),
String::from("DP"),
String::from("q10"),
String::from("GT"),
]
.into_iter()
.collect(),
);

assert!(actual.iter().eq(expected.iter()));

Ok(())
let indices = [
(String::from("PASS"), 0),
(String::from("NS"), 1),
(String::from("DP"), 2),
(String::from("q10"), 3),
(String::from("GT"), 4),
]
.into_iter()
.collect();
let entries = vec![
String::from("PASS"),
String::from("NS"),
String::from("DP"),
String::from("q10"),
String::from("GT"),
];
let expected = StringMap { indices, entries };

assert_eq!(s.parse(), Ok(expected));
}

#[test]
Expand Down Expand Up @@ -233,19 +285,26 @@ mod tests {
.build();

let actual = StringMap::from(&header);
let expected = StringMap(
[
String::from("PASS"),
String::from("NS"),
String::from("DP"),
String::from("q10"),
String::from("GT"),
]
.into_iter()
.collect(),
);

assert!(actual.iter().eq(expected.iter()));
let indices = [
(String::from("PASS"), 0),
(String::from("NS"), 1),
(String::from("DP"), 2),
(String::from("q10"), 3),
(String::from("GT"), 4),
]
.into_iter()
.collect();
let entries = vec![
String::from("PASS"),
String::from("NS"),
String::from("DP"),
String::from("q10"),
String::from("GT"),
];
let expected = StringMap { indices, entries };

assert_eq!(actual, expected);
}

#[test]
Expand Down

0 comments on commit 4291d88

Please sign in to comment.