Skip to content

Commit 0d7f93d

Browse files
committed
Refactored string module
1 parent e214feb commit 0d7f93d

File tree

5 files changed

+99
-78
lines changed

5 files changed

+99
-78
lines changed

src/arqtree.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
2-
3-
// Based on http://codeforces.com/blog/entry/18051
1+
// Associative Range Query Tree based on http://codeforces.com/blog/entry/18051
42
// Entries [0...size-1] are stored in t[size..2*size-1].
53
// The range operation must be associative: in this example, we use addition.
64
// In this example, the range operation assigns the value op to all entries.

src/graph.rs

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,44 @@
11

22

3+
// Represents a union of disjoint sets. Each set's elements are arranged in a
4+
// tree, whose root is the set's representative.
35
pub struct DisjointSets {
46
parent: Vec<usize>
57
}
68

79
impl DisjointSets {
10+
// Initialize disjoint sets containing one element each.
811
pub fn new(size: usize) -> DisjointSets {
912
DisjointSets { parent: (0..size).collect() }
1013
}
1114

15+
// Find the set's representative. Do path compression along the way to make
16+
// future queries faster.
1217
pub fn find(&mut self, u: usize) -> usize {
1318
let pu = self.parent[u];
1419
if pu != u { self.parent[u] = self.find(pu); }
1520
self.parent[u]
1621
}
1722

18-
// Returns true if u and v were previously in different sets.
23+
// Merge the sets containing u and v into a single set containing their
24+
// union. Returns true if u and v were previously in different sets.
1925
pub fn merge(&mut self, u: usize, v: usize) -> bool {
2026
let (pu, pv) = (self.find(u), self.find(v));
2127
self.parent[pu] = pv;
2228
pu != pv
2329
}
2430
}
2531

32+
// A compact graph representation.
2633
pub struct Graph {
2734
pub first: Vec<Option<usize>>,
2835
pub next: Vec<Option<usize>>,
2936
pub endp: Vec<usize>,
3037
}
3138

3239
impl Graph {
40+
// Initialize a graph with vmax vertices and no edges. For best efficiency,
41+
// emax should be a tight upper bound on the number of edges to insert.
3342
pub fn new(vmax: usize, emax: usize) -> Graph {
3443
Graph {
3544
first: vec![None; vmax],
@@ -38,44 +47,52 @@ impl Graph {
3847
}
3948
}
4049

50+
// Utilities to compute the number of vertices and edges.
4151
pub fn num_v(&self) -> usize { self.first.len() }
4252
pub fn num_e(&self) -> usize { self.next.len() }
4353

54+
// Add a directed edge from u to v.
4455
pub fn add_edge(&mut self, u: usize, v: usize) {
4556
self.next.push(self.first[u]);
4657
self.first[u] = Some(self.endp.len());
4758
self.endp.push(v);
4859
}
4960

61+
// An undirected edge is two directed edges. If edges are added only via
62+
// this funcion, the reverse of any edge e can be found at e^1.
5063
pub fn add_undirected_edge(&mut self, u: usize, v: usize) {
5164
self.add_edge(u, v);
5265
self.add_edge(v, u);
5366
}
5467

55-
// Assumes odd-numbered vertices correspond to predecessors' negations.
56-
// Logically equivalent forms: u || v, !u -> v, !v -> u
68+
// If we think of each even-numbered vertex as a variable, and its successor
69+
// as its negation, then we can build the implication graph corresponding
70+
// to any 2-CNF formula. Note that u||v == !u -> v == !v -> u.
5771
pub fn add_two_sat_clause(&mut self, u: usize, v: usize) {
5872
self.add_edge(u^1, v);
5973
self.add_edge(v^1, u);
6074
}
6175

76+
// Gets vertex u's adjacency list.
6277
pub fn adj_list<'a>(&'a self, u: usize) -> AdjListIterator<'a> {
6378
AdjListIterator {
6479
graph: self,
6580
next_e: self.first[u]
6681
}
6782
}
6883

84+
// Helper function used by euler_path. Note that we can't consume the
85+
// adjacency list in a for loop because recursive calls may need it.
6986
fn euler_recurse(&self, u: usize, adj: &mut [AdjListIterator], edges: &mut Vec<usize>) {
7087
while let Some((e, v)) = adj[u].next() {
7188
self.euler_recurse(v, adj, edges);
7289
edges.push(e);
7390
}
7491
}
7592

76-
// Finds an Euler path starting from u, assuming it exists, and that the
77-
// graph is directed. To deal with undirected graphs, one simply needs to
78-
// keep track of visited edges to avoid repeating them.
93+
// Finds the sequence of edges in an Euler path starting from u, assuming it
94+
// exists and that the graph is directed. To extend this to undirected
95+
// graphs, keep track of a visited array to skip the reverse edge.
7996
pub fn euler_path(&self, u: usize) -> Vec<usize> {
8097
let mut adj_iters = (0..self.num_v()).map(|u| self.adj_list(u))
8198
.collect::<Vec<_>>();
@@ -86,15 +103,16 @@ impl Graph {
86103
}
87104
}
88105

106+
// An iterator for convenient adjacency list traversal.
89107
pub struct AdjListIterator<'a> {
90108
graph: &'a Graph,
91109
next_e: Option<usize>
92110
}
93111

94112
impl<'a> Iterator for AdjListIterator<'a> {
95-
// Produces an outgoing edge and vertex.
96113
type Item = (usize, usize);
97-
114+
115+
// Produces an outgoing edge and vertex.
98116
fn next(&mut self) -> Option<Self::Item> {
99117
self.next_e.map( |e| {
100118
let v = self.graph.endp[e];
@@ -104,7 +122,7 @@ impl<'a> Iterator for AdjListIterator<'a> {
104122
}
105123
}
106124

107-
// Assumes graph is undirected.
125+
// Kruskal's minimum spanning tree algorithm on an undirected graph.
108126
pub fn min_spanning_tree(graph: &Graph, weights: &[i64]) -> Vec<usize> {
109127
assert_eq!(graph.num_e(), 2 * weights.len());
110128
let mut edges = (0..weights.len()).collect::<Vec<_>>();
@@ -190,8 +208,8 @@ impl FlowGraph {
190208
self.vdata[t].lev != None
191209
}
192210

193-
// Dinic's fast maximum flow: V^2E in general,
194-
// min(V^(2/3),sqrt(E))E on unit caps, sqrt(V)E on bipartite
211+
// Dinic's maximum flow / Hopcroft-Karp maximum bipartite matching: V^2E in
212+
// general, min(V^(2/3),sqrt(E))E on unit capacity, sqrt(V)E on bipartite.
195213
pub fn dinic(&mut self, s: usize, t: usize) -> i64 {
196214
let mut flow = 0;
197215
while self.bfs(s, t) {
@@ -209,7 +227,7 @@ impl FlowGraph {
209227
}
210228
}
211229

212-
// 2-vertex and 2-edge connected components
230+
// Strongly connected, 2-vertex-connected, and 2-edge-connected components
213231
// should handle multiple-edges and self-loops
214232
// USAGE: 1) new(); 2) add_edge(...); 3) compute_bcc();
215233
// 4) use is_cut_vertex(vertex_index) or is_cut_edge(2 * edge_index)

src/math.rs

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
2-
3-
// Find x,y such that d = gcd(a,b) = ax + by
4-
// * a^-1 (mod m): let (d, x, y) = egcd(a,m), 1); assert!(d, 1); return (x+m)%m;
1+
// Finds (d, x, y) such that d = gcd(a, b) = ax + by.
52
pub fn extended_gcd(a: i64, b: i64) -> (i64, i64, i64) {
63
if b == 0 {
74
(a.abs(), a.signum(), 0)
@@ -12,18 +9,18 @@ pub fn extended_gcd(a: i64, b: i64) -> (i64, i64, i64) {
129
}
1310
}
1411

15-
// Assuming a != 0, find smallest y >= 0 such that ax + by = c (if possible)
12+
// Assuming a != 0, finds smallest y >= 0 such that ax + by = c.
1613
pub fn canon_egcd(a: i64, b: i64, c: i64) -> Option<(i64, i64, i64)> {
17-
let (d, _, mut y) = extended_gcd(a, b);
18-
let z = (a / d).abs();
19-
if c % d != 0 {
20-
None
21-
}
22-
else {
23-
y = (y*(c/d)%z + z)%z;
14+
let (d, _, yy) = extended_gcd(a, b);
15+
if c % d == 0 {
16+
let z = (a / d).abs();
17+
let y = (yy*(c/d)%z + z)%z;
2418
let x = (c - b*y)/a;
2519
Some((d, x, y))
2620
}
21+
else {
22+
None
23+
}
2724
}
2825

2926
#[cfg(test)]

src/scanner.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
2-
1+
// Generic utility for reading data from standard input.
32
pub struct Scanner {
43
buffer: ::std::collections::VecDeque<String>
54
}
@@ -10,7 +9,8 @@ impl Scanner {
109
buffer: ::std::collections::VecDeque::new()
1110
}
1211
}
13-
12+
13+
// Use "turbofish" syntax next::<T>() to select data type of next token.
1414
pub fn next<T: ::std::str::FromStr>(&mut self) -> T {
1515
while self.buffer.is_empty() {
1616
let mut input = String::new();

src/string_proc.rs

Lines changed: 56 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,62 @@
1-
// Palindrome substrings in O(n), Manacher's algorithm
2-
// length of odd palin centred at s[i] is len[2*i]
3-
// even palin btwn s[i],s[i+1]: len[2*i+1]
4-
// TODO: check for underflows
5-
// Alternative version:
6-
// for c in (..)
7-
// while bla
8-
// len[c] += 2;
9-
// if len[c]-r+i == len[c-r+i] { len.push(len[c]-r+i); r += 1; }
10-
fn find_pals(text: &[u8]) -> Vec<usize> {
11-
let mut len = Vec::with_capacity(2*text.len() - 1);
12-
len.push(1); len.push(0);
13-
let mut i = 1;
14-
while i < 2*text.len() - 2 {
15-
let max_len = ::std::cmp::min(i+1, 2*text.len()-1-i);
16-
while len[i] < max_len && text[(i-len[i]-1)/2] == text[(i+len[i]+1)/2] {
17-
len[i] += 2;
1+
// Data structure for Knuth-Morris-Pratt string matching against a pattern.
2+
pub struct Matcher<'a> {
3+
pub pattern: &'a [u8],
4+
pub fail: Vec<usize>
5+
}
6+
7+
impl<'a> Matcher<'a> {
8+
// Sets fail[i] = length of longest proper prefix-suffix of pattern[0...i].
9+
pub fn new(pattern: &'a [u8]) -> Matcher {
10+
let mut fail = Vec::with_capacity(pattern.len());
11+
fail.push(0);
12+
let mut len = 0;
13+
for &ch in &pattern[1..] {
14+
while len > 0 && pattern[len] != ch { len = fail[len-1]; }
15+
if pattern[len] == ch { len += 1; }
16+
fail.push(len);
1817
}
19-
let mut d = 1;
20-
while len[i-d] < len[i]-d { len[i+d] = len[i-d]; d += 1; }
21-
len[i+d] = len[i]-d;
22-
i += d;
18+
Matcher { pattern: pattern, fail: fail }
2319
}
24-
len
25-
}
2620

27-
// fail[i] = len of longest proper prefix-suffix of pat[0...i]
28-
fn kmp_init(pat: &[u8]) -> Vec<usize> {
29-
let mut fail = Vec::with_capacity(pat.len());
30-
fail.push(0);
31-
let mut len = 0;
32-
for &ch in &pat[1..] {
33-
while len > 0 && pat[len] != ch { len = fail[len-1]; }
34-
if pat[len] == ch { len += 1; }
35-
fail.push(len);
21+
// KMP algorithm, sets matches[i] = length of longest prefix of pattern
22+
// matching a suffix of text[0...i].
23+
pub fn kmp_match(&self, text: &[u8]) -> Vec<usize> {
24+
let mut matches = Vec::with_capacity(text.len());
25+
let mut len = 0;
26+
for &ch in text {
27+
if len == self.pattern.len() { len = self.fail[len-1]; }
28+
while len > 0 && self.pattern[len] != ch { len = self.fail[len-1]; }
29+
if self.pattern[len] == ch { len += 1; }
30+
matches.push(len);
31+
}
32+
matches
3633
}
37-
fail
3834
}
3935

40-
// matches[i] = len of longest prefix of pat matching with suffix of text[0...i]
41-
fn kmp_match(text: &[u8], pat: &[u8]) -> Vec<usize> {
42-
let fail = kmp_init(pat);
43-
let mut matches = Vec::with_capacity(text.len());
44-
let mut len = 0;
45-
for &ch in text {
46-
if len == pat.len() { len = fail[len-1]; }
47-
while len > 0 && pat[len] != ch { len = fail[len-1]; }
48-
if pat[len] == ch { len += 1; }
49-
matches.push(len);
36+
// Manacher's algorithm for computing palindrome substrings in linear time.
37+
// len[2*i] = odd length of palindrome centred at text[i].
38+
// len[2*i+1] = even length of palindrome centred at text[i+0.5].
39+
pub fn palindromes(text: &[u8]) -> Vec<usize> {
40+
let mut len = Vec::with_capacity(2*text.len() - 1);
41+
len.push(1);
42+
while len.len() < len.capacity() {
43+
let i = len.len() - 1;
44+
let max_len = ::std::cmp::min(i + 1, len.capacity() - i);
45+
while len[i] < max_len && text[(i-len[i]-1)/2] == text[(i+len[i]+1)/2] {
46+
len[i] += 2;
47+
}
48+
if len[i] < 2 {
49+
let a = 1 - len[i];
50+
len.push(a);
51+
}
52+
else {
53+
for d in 1.. {
54+
let (a, b) = (len[i-d], len[i] - d);
55+
if a < b { len.push(a); } else { len.push(b); break; }
56+
}
57+
}
5058
}
51-
matches
59+
len
5260
}
5361

5462
#[cfg(test)]
@@ -58,10 +66,10 @@ mod test {
5866
#[test]
5967
fn test_string() {
6068
let text = "abcbc".as_bytes();
61-
let pat = "bc".as_bytes();
62-
let matches = kmp_match(text, pat);
63-
//let pal_len = find_pals(text);
69+
let pattern = "bc".as_bytes();
70+
let matches = Matcher::new(pattern).kmp_match(text);
71+
let pal_len = palindromes(text);
6472
assert_eq!(matches, vec![0, 1, 2, 1, 2]);
65-
//assert_eq!(pal_len, vec![1, 0, 1, 0, 3, 0, 3, 0, 1]);
73+
assert_eq!(pal_len, vec![1, 0, 1, 0, 3, 0, 3, 0, 1]);
6674
}
6775
}

0 commit comments

Comments
 (0)