Refactored string module

EbTech · EbTech · commit 0d7f93dcf505 · 2017-06-08T02:00:54.000-07:00
diff --git a/src/arqtree.rs b/src/arqtree.rs
@@ -1,6 +1,4 @@
-
-
-// Based on http://codeforces.com/blog/entry/18051
+// Associative Range Query Tree based on http://codeforces.com/blog/entry/18051
 // Entries [0...size-1] are stored in t[size..2*size-1].
 // The range operation must be associative: in this example, we use addition.
 // In this example, the range operation assigns the value op to all entries.
diff --git a/src/graph.rs b/src/graph.rs
@@ -1,35 +1,44 @@
 
 
+// Represents a union of disjoint sets. Each set's elements are arranged in a
+// tree, whose root is the set's representative.
 pub struct DisjointSets {
     parent: Vec<usize>
 }
 
 impl DisjointSets {
+    // Initialize disjoint sets containing one element each.
     pub fn new(size: usize) -> DisjointSets {
         DisjointSets { parent: (0..size).collect() }
     }
     
+    // Find the set's representative. Do path compression along the way to make
+    // future queries faster.
     pub fn find(&mut self, u: usize) -> usize {
         let pu = self.parent[u];
         if pu != u { self.parent[u] = self.find(pu); }
         self.parent[u]
     }
     
-    // Returns true if u and v were previously in different sets.
+    // Merge the sets containing u and v into a single set containing their
+    // union. Returns true if u and v were previously in different sets.
     pub fn merge(&mut self, u: usize, v: usize) -> bool {
         let (pu, pv) = (self.find(u), self.find(v));
         self.parent[pu] = pv;
         pu != pv
     }
 }
 
+// A compact graph representation.
 pub struct Graph {
     pub first: Vec<Option<usize>>,
     pub next: Vec<Option<usize>>,
     pub endp: Vec<usize>,
 }
 
 impl Graph {
+    // Initialize a graph with vmax vertices and no edges. For best efficiency,
+    // emax should be a tight upper bound on the number of edges to insert.
     pub fn new(vmax: usize, emax: usize) -> Graph {
         Graph {
             first: vec![None; vmax],
@@ -38,44 +47,52 @@ impl Graph {
         }
     }
     
+    // Utilities to compute the number of vertices and edges.
     pub fn num_v(&self) -> usize { self.first.len() }
     pub fn num_e(&self) -> usize { self.next.len() }
     
+    // Add a directed edge from u to v.
     pub fn add_edge(&mut self, u: usize, v: usize) {
         self.next.push(self.first[u]);
         self.first[u] = Some(self.endp.len());
         self.endp.push(v);
     }
     
+    // An undirected edge is two directed edges. If edges are added only via
+    // this funcion, the reverse of any edge e can be found at e^1.
     pub fn add_undirected_edge(&mut self, u: usize, v: usize) {
         self.add_edge(u, v);
         self.add_edge(v, u);
     }
     
-    // Assumes odd-numbered vertices correspond to predecessors' negations.
-    // Logically equivalent forms: u || v, !u -> v, !v -> u
+    // If we think of each even-numbered vertex as a variable, and its successor
+    // as its negation, then we can build the implication graph corresponding
+    // to any 2-CNF formula. Note that u||v == !u -> v == !v -> u.
     pub fn add_two_sat_clause(&mut self, u: usize, v: usize) {
         self.add_edge(u^1, v);
         self.add_edge(v^1, u);
     }
     
+    // Gets vertex u's adjacency list.
     pub fn adj_list<'a>(&'a self, u: usize) -> AdjListIterator<'a> {
         AdjListIterator {
             graph: self,
             next_e: self.first[u]
         }
     }
     
+    // Helper function used by euler_path. Note that we can't consume the
+    // adjacency list in a for loop because recursive calls may need it.
     fn euler_recurse(&self, u: usize, adj: &mut [AdjListIterator], edges: &mut Vec<usize>) {
         while let Some((e, v)) = adj[u].next() {
             self.euler_recurse(v, adj, edges);
             edges.push(e);
         }
     }
     
-    // Finds an Euler path starting from u, assuming it exists, and that the
-    // graph is directed. To deal with undirected graphs, one simply needs to
-    // keep track of visited edges to avoid repeating them.
+    // Finds the sequence of edges in an Euler path starting from u, assuming it
+    // exists and that the graph is directed. To extend this to undirected
+    // graphs, keep track of a visited array to skip the reverse edge.
     pub fn euler_path(&self, u: usize) -> Vec<usize> {
         let mut adj_iters = (0..self.num_v()).map(|u| self.adj_list(u))
                             .collect::<Vec<_>>();
@@ -86,15 +103,16 @@ impl Graph {
     }
 }
 
+// An iterator for convenient adjacency list traversal.
 pub struct AdjListIterator<'a> {
     graph: &'a Graph,
     next_e: Option<usize>
 }
 
 impl<'a> Iterator for AdjListIterator<'a> {
-    // Produces an outgoing edge and vertex.
     type Item = (usize, usize);
-
+    
+    // Produces an outgoing edge and vertex.
     fn next(&mut self) -> Option<Self::Item> {
         self.next_e.map( |e| {
             let v = self.graph.endp[e];
@@ -104,7 +122,7 @@ impl<'a> Iterator for AdjListIterator<'a> {
     }
 }
 
-// Assumes graph is undirected.
+// Kruskal's minimum spanning tree algorithm on an undirected graph.
 pub fn min_spanning_tree(graph: &Graph, weights: &[i64]) -> Vec<usize> {
     assert_eq!(graph.num_e(), 2 * weights.len());
     let mut edges = (0..weights.len()).collect::<Vec<_>>();
@@ -190,8 +208,8 @@ impl FlowGraph {
         self.vdata[t].lev != None
     }
     
-    // Dinic's fast maximum flow: V^2E in general,
-    // min(V^(2/3),sqrt(E))E on unit caps, sqrt(V)E on bipartite
+    // Dinic's maximum flow / Hopcroft-Karp maximum bipartite matching: V^2E in
+    // general, min(V^(2/3),sqrt(E))E on unit capacity, sqrt(V)E on bipartite.
     pub fn dinic(&mut self, s: usize, t: usize) -> i64 {
         let mut flow = 0;
         while self.bfs(s, t) {
@@ -209,7 +227,7 @@ impl FlowGraph {
     }
 }
 
-// 2-vertex and 2-edge connected components
+// Strongly connected, 2-vertex-connected, and 2-edge-connected components
 // should handle multiple-edges and self-loops
 // USAGE: 1) new(); 2) add_edge(...); 3) compute_bcc();
 // 4) use is_cut_vertex(vertex_index) or is_cut_edge(2 * edge_index)
diff --git a/src/math.rs b/src/math.rs
@@ -1,7 +1,4 @@
-
-
-// Find x,y such that d = gcd(a,b) = ax + by
-// * a^-1 (mod m): let (d, x, y) = egcd(a,m), 1); assert!(d, 1); return (x+m)%m;
+// Finds (d, x, y) such that d = gcd(a, b) = ax + by.
 pub fn extended_gcd(a: i64, b: i64) -> (i64, i64, i64) {
     if b == 0 {
         (a.abs(), a.signum(), 0)
@@ -12,18 +9,18 @@ pub fn extended_gcd(a: i64, b: i64) -> (i64, i64, i64) {
     }
 }
 
-// Assuming a != 0, find smallest y >= 0 such that ax + by = c (if possible)
+// Assuming a != 0, finds smallest y >= 0 such that ax + by = c.
 pub fn canon_egcd(a: i64, b: i64, c: i64) -> Option<(i64, i64, i64)> {
-    let (d, _, mut y) = extended_gcd(a, b);
-    let z = (a / d).abs();
-    if c % d != 0 {
-        None
-    }
-    else {
-        y = (y*(c/d)%z + z)%z;
+    let (d, _, yy) = extended_gcd(a, b);
+    if c % d == 0 {
+        let z = (a / d).abs();
+        let y = (yy*(c/d)%z + z)%z;
         let x = (c - b*y)/a;
         Some((d, x, y))
     }
+    else {
+        None
+    }
 }
 
 #[cfg(test)]
diff --git a/src/scanner.rs b/src/scanner.rs
@@ -1,5 +1,4 @@
-
-
+// Generic utility for reading data from standard input.
 pub struct Scanner {
     buffer: ::std::collections::VecDeque<String>
 }
@@ -10,7 +9,8 @@ impl Scanner {
             buffer: ::std::collections::VecDeque::new()
         }
     }
-
+    
+    // Use "turbofish" syntax next::<T>() to select data type of next token.
     pub fn next<T: ::std::str::FromStr>(&mut self) -> T {
         while self.buffer.is_empty() {
             let mut input = String::new();
diff --git a/src/string_proc.rs b/src/string_proc.rs
@@ -1,54 +1,62 @@
-// Palindrome substrings in O(n), Manacher's algorithm
-// length of odd palin centred at s[i] is len[2*i]
-// even palin btwn s[i],s[i+1]: len[2*i+1]
-// TODO: check for underflows
-// Alternative version:
-// for c in (..)
-//   while bla
-//     len[c] += 2;
-//     if len[c]-r+i == len[c-r+i] { len.push(len[c]-r+i); r += 1; }
-fn find_pals(text: &[u8]) -> Vec<usize> {
-    let mut len = Vec::with_capacity(2*text.len() - 1); 
-    len.push(1); len.push(0);
-    let mut i = 1;
-    while i < 2*text.len() - 2 {
-        let max_len = ::std::cmp::min(i+1, 2*text.len()-1-i);
-        while len[i] < max_len && text[(i-len[i]-1)/2] == text[(i+len[i]+1)/2] {
-            len[i] += 2;
+// Data structure for Knuth-Morris-Pratt string matching against a pattern.
+pub struct Matcher<'a> {
+    pub pattern: &'a [u8],
+    pub fail: Vec<usize>
+}
+
+impl<'a> Matcher<'a> {
+    // Sets fail[i] = length of longest proper prefix-suffix of pattern[0...i].
+    pub fn new(pattern: &'a [u8]) -> Matcher {
+        let mut fail = Vec::with_capacity(pattern.len());
+        fail.push(0);
+        let mut len = 0;
+        for &ch in &pattern[1..] {
+            while len > 0 && pattern[len] != ch { len = fail[len-1]; }
+            if pattern[len] == ch { len += 1; }
+            fail.push(len);
         }
-        let mut d = 1;
-        while len[i-d] < len[i]-d { len[i+d] = len[i-d]; d += 1; }
-        len[i+d] = len[i]-d;
-        i += d;
+        Matcher { pattern: pattern, fail: fail }
     }
-    len
-}
 
-// fail[i] = len of longest proper prefix-suffix of pat[0...i]
-fn kmp_init(pat: &[u8]) -> Vec<usize> {
-    let mut fail = Vec::with_capacity(pat.len());
-    fail.push(0);
-    let mut len = 0;
-    for &ch in &pat[1..] {
-        while len > 0 && pat[len] != ch { len = fail[len-1]; }
-        if pat[len] == ch { len += 1; }
-        fail.push(len);
+    // KMP algorithm, sets matches[i] = length of longest prefix of pattern
+    // matching a suffix of text[0...i].
+    pub fn kmp_match(&self, text: &[u8]) -> Vec<usize> {
+        let mut matches = Vec::with_capacity(text.len());
+        let mut len = 0;
+        for &ch in text {
+            if len == self.pattern.len() { len = self.fail[len-1]; }
+            while len > 0 && self.pattern[len] != ch { len = self.fail[len-1]; }
+            if self.pattern[len] == ch { len += 1; }
+            matches.push(len);
+        }
+        matches
     }
-    fail
 }
 
-// matches[i] = len of longest prefix of pat matching with suffix of text[0...i]
-fn kmp_match(text: &[u8], pat: &[u8]) -> Vec<usize> {
-    let fail = kmp_init(pat);
-    let mut matches = Vec::with_capacity(text.len());
-    let mut len = 0;
-    for &ch in text {
-        if len == pat.len() { len = fail[len-1]; }
-        while len > 0 && pat[len] != ch { len = fail[len-1]; }
-        if pat[len] == ch { len += 1; }
-        matches.push(len);
+// Manacher's algorithm for computing palindrome substrings in linear time.
+// len[2*i] = odd length of palindrome centred at text[i].
+// len[2*i+1] = even length of palindrome centred at text[i+0.5].
+pub fn palindromes(text: &[u8]) -> Vec<usize> {
+    let mut len = Vec::with_capacity(2*text.len() - 1); 
+    len.push(1);
+    while len.len() < len.capacity() {
+        let i = len.len() - 1;
+        let max_len = ::std::cmp::min(i + 1, len.capacity() - i);
+        while len[i] < max_len && text[(i-len[i]-1)/2] == text[(i+len[i]+1)/2] {
+            len[i] += 2;
+        }
+        if len[i] < 2 {
+            let a = 1 - len[i];
+            len.push(a);
+        }
+        else {
+            for d in 1.. {
+                let (a, b) = (len[i-d], len[i] - d);
+                if a < b { len.push(a); } else { len.push(b); break; }
+            }
+        }
     }
-    matches
+    len
 }
 
 #[cfg(test)]
@@ -58,10 +66,10 @@ mod test {
     #[test]
     fn test_string() {
         let text = "abcbc".as_bytes();
-        let pat = "bc".as_bytes();
-        let matches = kmp_match(text, pat);
-        //let pal_len = find_pals(text);
+        let pattern = "bc".as_bytes();
+        let matches = Matcher::new(pattern).kmp_match(text);
+        let pal_len = palindromes(text);
         assert_eq!(matches, vec![0, 1, 2, 1, 2]);
-        //assert_eq!(pal_len, vec![1, 0, 1, 0, 3, 0, 3, 0, 1]);
+        assert_eq!(pal_len, vec![1, 0, 1, 0, 3, 0, 3, 0, 1]);
     }
 }

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,4 @@`
`1`		`-`
`2`		`-`
	`1`	`+// Generic utility for reading data from standard input.`
`3`	`2`	`pub struct Scanner {`
`4`	`3`	`buffer: ::std::collections::VecDeque<String>`
`5`	`4`	`}`
`@@ -10,7 +9,8 @@ impl Scanner {`
`10`	`9`	`buffer: ::std::collections::VecDeque::new()`
`11`	`10`	`}`
`12`	`11`	`}`
`13`		`-`
	`12`	`+`
	`13`	`+ // Use "turbofish" syntax next::<T>() to select data type of next token.`
`14`	`14`	`pub fn next<T: ::std::str::FromStr>(&mut self) -> T {`
`15`	`15`	`while self.buffer.is_empty() {`
`16`	`16`	`let mut input = String::new();`