Permalink
Browse files

Add new caching strategy. Modest performance increase

Cache regex matches per pattern, so that each regex only runs as many
times as it needs to.
  • Loading branch information...
1 parent 11ac7b4 commit 881f0e06307f07af131253d052088bdbb325df8d @trishume committed Jun 29, 2016
Showing with 52 additions and 24 deletions.
  1. +8 −0 DESIGN.md
  2. +2 −2 Readme.md
  3. +1 −0 src/highlighting/highlighter.rs
  4. +40 −21 src/parsing/parser.rs
  5. +1 −1 src/util.rs
View
@@ -91,6 +91,14 @@ $cargo run --example syncat testdata/jquery.js | grep leastmatch | wc -l
Compiling syntect v0.6.0 (file:///Users/tristan/Box/Dev/Projects/syntect)
Running `target/debug/examples/syncat testdata/jquery.js`
137842
+# With search caching
+$cargo run --example syncat testdata/jquery.js | grep searchcached | wc -l
+ Compiling syntect v0.6.0 (file:///Users/tristan/Box/Dev/Projects/syntect)
+ Running `target/debug/examples/syncat testdata/jquery.js`
+ 2440527
+$cargo run --example syncat testdata/jquery.js | grep regsearch | wc -l
+ Running `target/debug/examples/syncat testdata/jquery.js`
+ 950195
```
Average unique regexes per line is 87.58, average non-unique is regsearch/lines = 317
View
@@ -74,8 +74,8 @@ Currently `syntect` is reasonably fast but not as fast as it could be. The follo
The current perf numbers are below. These numbers should get better once I implement more of the things above, but they're on par with many other text editors.
-- Highlighting 9200 lines/247kb of jQuery 2.1 takes 1.4s, or ~6500 lines/second. For comparison:
- - Textmate 2, Spacemacs and Visual Studio Code all take around the same time (2ish seconds)
+- Highlighting 9200 lines/247kb of jQuery 2.1 takes 1.0s. For comparison:
+ - Textmate 2, Spacemacs and Visual Studio Code all take around 2ish seconds (measured by hand with a stopwatch, hence approximate).
- Atom takes 6s
- Sublime Text 3 dev build takes ~0.22s, despite having a super fancy javascript syntax definition
- Vim is instantaneous but that isn't a fair comparison since vim's highlighting is far more basic than the other editors (Compare [vim's grammar](https://github.com/vim/vim/blob/master/runtime/syntax/javascript.vim) to [Sublime's](https://github.com/sublimehq/Packages/blob/master/JavaScript/JavaScript.sublime-syntax)).
@@ -114,6 +114,7 @@ impl<'a, 'b> Iterator for HighlightIterator<'a, 'b> {
match command {
ScopeStackOp::Push(scope) => {
self.state.path.push(scope);
+ // println!("{}", self.state.path);
self.state
.styles
.push(style.apply(self.highlighter.get_style(self.state.path.as_slice())));
View
@@ -2,6 +2,7 @@ use super::syntax_definition::*;
use super::scope::*;
use onig::{self, Region};
use std::usize;
+use std::collections::HashMap;
use std::i32;
/// Keeps the current parser state (the internal syntax interpreter stack) between lines of parsing.
@@ -40,8 +41,8 @@ struct RegexMatch {
pat_index: usize,
}
-// TODO cache actual matching regions
-type MatchCache = Vec<bool>;
+/// maps the pattern to the start index, which is -1 if not found.
+type SearchCache = HashMap<*const MatchPattern,Option<Region>>;
impl ParseState {
/// Create a state from a syntax, keeps its own reference counted
@@ -83,10 +84,10 @@ impl ParseState {
}
let mut regions = Region::with_capacity(8);
- let mut match_cache: MatchCache = Vec::with_capacity(64); // TODO find best capacity
+ let mut search_cache: SearchCache = HashMap::with_capacity(128); // TODO find the best capacity
while self.parse_next_token(line,
&mut match_start,
- &mut match_cache,
+ &mut search_cache,
&mut regions,
&mut res) {
}
@@ -96,7 +97,7 @@ impl ParseState {
fn parse_next_token(&mut self,
line: &str,
start: &mut usize,
- cache: &mut MatchCache,
+ search_cache: &mut SearchCache,
regions: &mut Region,
ops: &mut Vec<(usize, ScopeStackOp)>)
-> bool {
@@ -114,19 +115,37 @@ impl ParseState {
.chain(prototype.into_iter())
.chain(Some(cur_level.context.clone()).into_iter());
// println!("{:#?}", cur_level);
- let mut overall_index = 0;
+ // println!("token at {} on {}", start, line.trim_right());
for ctx in context_chain {
for (pat_context_ptr, pat_index) in context_iter(ctx) {
- if overall_index < cache.len() && cache[overall_index] == false {
- overall_index += 1;
- continue; // we've determined this pattern doesn't match this line anywhere
- }
let mut pat_context = pat_context_ptr.borrow_mut();
let mut match_pat = pat_context.match_at_mut(pat_index);
-
// println!("{} - {:?} - {:?}", match_pat.regex_str, match_pat.has_captures, cur_level.captures.is_some());
+
+ if let Some(maybe_region) = search_cache.get(&(match_pat as *const MatchPattern)) {
+ let mut valid_entry = true;
+ if let &Some(ref region) = maybe_region {
+ let match_start = region.pos(0).unwrap().0;
+ if match_start < *start {
+ valid_entry = false;
+ }
+ if match_start < min_start && valid_entry {
+ // print!("match {} at {} on {}", match_pat.regex_str, match_start, line);
+ min_start = match_start;
+ cur_match = Some(RegexMatch {
+ regions: region.clone(),
+ context: pat_context_ptr.clone(),
+ pat_index: pat_index,
+ });
+ }
+ }
+ if valid_entry {
+ continue;
+ }
+ }
+
match_pat.ensure_compiled_if_possible();
- let refs_regex = if cur_level.captures.is_some() && match_pat.has_captures {
+ let refs_regex = if match_pat.has_captures && cur_level.captures.is_some() {
let &(ref region, ref s) = cur_level.captures.as_ref().unwrap();
Some(match_pat.compile_with_refs(region, s))
} else {
@@ -142,27 +161,30 @@ impl ParseState {
line.len(),
onig::SEARCH_OPTION_NONE,
Some(regions));
- if overall_index >= cache.len() {
- cache.push(matched.is_some());
- } // TODO update the cache even if this is another time over
if let Some(match_start) = matched {
let match_end = regions.pos(0).unwrap().1;
// this is necessary to avoid infinite looping on dumb patterns
let does_something = match match_pat.operation {
MatchOperation::None => match_start != match_end,
_ => true,
};
+ if refs_regex.is_none() && does_something {
+ search_cache.insert(match_pat, Some(regions.clone()));
+ }
if match_start < min_start && does_something {
+ // print!("catch {} at {} on {}", match_pat.regex_str, match_start, line);
min_start = match_start;
cur_match = Some(RegexMatch {
regions: regions.clone(),
context: pat_context_ptr.clone(),
pat_index: pat_index,
});
}
+ } else {
+ if refs_regex.is_none() {
+ search_cache.insert(match_pat, None);
+ }
}
-
- overall_index += 1;
}
}
cur_match
@@ -172,10 +194,7 @@ impl ParseState {
let (_, match_end) = reg_match.regions.pos(0).unwrap();
*start = match_end;
let level_context = self.stack[self.stack.len() - 1].context.clone();
- let stack_changed = self.exec_pattern(line, reg_match, level_context, ops);
- if stack_changed {
- cache.clear();
- }
+ self.exec_pattern(line, reg_match, level_context, ops);
true
} else {
false
View
@@ -41,7 +41,7 @@ pub fn as_24_bit_terminal_escaped(v: &[(Style, &str)], bg: bool) -> String {
/// with visual alignment to the line. Obviously for debugging.
pub fn debug_print_ops(line: &str, ops: &Vec<(usize, ScopeStackOp)>) {
for &(i, ref op) in ops.iter() {
- println!("{}", line);
+ println!("{}", line.trim_right());
print!("{: <1$}", "", i);
match op {
&ScopeStackOp::Push(s) => {

0 comments on commit 881f0e0

Please sign in to comment.