From 178b6145f5932e7e66567fc3803121124d790d01 Mon Sep 17 00:00:00 2001 From: Iain H Date: Sun, 17 Jul 2022 21:07:43 -0400 Subject: [PATCH 1/3] Support determining mode based on shebang interpreter directive Attempt to determine the appropriate mode based on the presence of an interpreter directive on the first line of the file. Interpreter directives are examined first, followed by the filename when determining the mode mirroring what Emacs does by default. --- Cargo.lock | 1 + zee-grammar/Cargo.toml | 1 + zee-grammar/src/config.rs | 2 ++ zee-grammar/src/lib.rs | 15 +++++++++++++++ zee/config/config.ron | 3 +++ zee/src/editor/buffer.rs | 13 +++++++++---- zee/src/editor/mod.rs | 11 ++++++++--- 7 files changed, 39 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4525feb..31cca3b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1810,6 +1810,7 @@ dependencies = [ "log", "once_cell", "rayon", + "regex", "serde", "serde_derive", "tree-sitter", diff --git a/zee-grammar/Cargo.toml b/zee-grammar/Cargo.toml index cb7dd3a..84de3eb 100644 --- a/zee-grammar/Cargo.toml +++ b/zee-grammar/Cargo.toml @@ -18,6 +18,7 @@ libloading = "0.7.3" log = "0.4.16" once_cell = { version = "1.10.0", features = ["parking_lot"] } rayon = "1.5.2" +regex = "1.5.5" serde = "1.0.136" serde_derive = "1.0.136" tree-sitter = "0.20.6" diff --git a/zee-grammar/src/config.rs b/zee-grammar/src/config.rs index 150e00c..f569c1d 100644 --- a/zee-grammar/src/config.rs +++ b/zee-grammar/src/config.rs @@ -17,6 +17,8 @@ pub struct ModeConfig { pub comment: Option, pub indentation: IndentationConfig, pub grammar: Option, + #[serde(default)] + pub shebangs: Vec, } #[derive(Clone, Debug, Deserialize, Serialize)] diff --git a/zee-grammar/src/lib.rs b/zee-grammar/src/lib.rs index 0abbfdf..dabb1a4 100644 --- a/zee-grammar/src/lib.rs +++ b/zee-grammar/src/lib.rs @@ -5,11 +5,15 @@ mod git; use anyhow::Result; use once_cell::sync::Lazy; +use regex::Regex; use std::path::Path; use tree_sitter::{Language, Query}; use self::config::{CommentConfig, FilenamePattern, IndentationConfig, ModeConfig}; +static SHEBANG_REGEX: Lazy = + Lazy::new(|| Regex::new(r"^#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)").unwrap()); + #[derive(Debug)] pub struct Mode { pub name: String, @@ -19,6 +23,7 @@ pub struct Mode { pub comment: Option, pub indentation: IndentationConfig, grammar: LazyGrammar, + pub shebangs: Vec, } impl Mode { @@ -31,6 +36,7 @@ impl Mode { comment, indentation, grammar: grammar_config, + shebangs, } = config; Self { name, @@ -44,6 +50,7 @@ impl Mode { .map(|grammar_config| grammar_config.grammar_id) .map(builder::load_grammar) })), + shebangs, } } @@ -53,6 +60,13 @@ impl Mode { .any(|pattern| pattern.matches(filename.as_ref())) } + pub fn matches_by_shebang(&self, shebang: &str) -> bool { + SHEBANG_REGEX + .captures(shebang) + .and_then(|captures| self.shebangs.contains(&captures[1].into()).then(|| 0)) + .is_some() + } + pub fn language(&self) -> Option> { Some(self.grammar()?.map(|parser| parser.language)) } @@ -74,6 +88,7 @@ impl Default for Mode { comment: None, indentation: Default::default(), grammar: Lazy::new(Box::new(|| None)), + shebangs: vec![], } } } diff --git a/zee/config/config.ron b/zee/config/config.ron index b2c73e0..9602eef 100644 --- a/zee/config/config.ron +++ b/zee/config/config.ron @@ -146,6 +146,7 @@ width: 4, unit: Space, ), + shebangs: ["node"], grammar: Some( Grammar( id: "javascript", @@ -248,6 +249,7 @@ width: 4, unit: Space, ), + shebangs: ["python"], grammar: Some( Grammar( id: "python", @@ -464,6 +466,7 @@ width: 2, unit: Space, ), + shebangs: ["sh", "bash", "dash", "zsh"], grammar: Some( Grammar( id: "bash", diff --git a/zee/src/editor/buffer.rs b/zee/src/editor/buffer.rs index 1a9635b..668db14 100644 --- a/zee/src/editor/buffer.rs +++ b/zee/src/editor/buffer.rs @@ -173,11 +173,16 @@ impl Buffer { file_path: Option, repo: Option, ) -> Self { - let mode = file_path - .as_ref() - .map(|path| context.0.mode_by_filename(path)) + let mode = text + .line(0) + .as_str() + .and_then(|shebang| context.0.mode_by_shebang(shebang)) + .or_else(|| { + file_path + .as_ref() + .and_then(|path| context.0.mode_by_filename(path)) + }) .unwrap_or(&PLAIN_TEXT_MODE); - let mut parser = mode .language() .and_then(|result| result.ok()) diff --git a/zee/src/editor/mod.rs b/zee/src/editor/mod.rs index 4075477..7346ef2 100644 --- a/zee/src/editor/mod.rs +++ b/zee/src/editor/mod.rs @@ -32,7 +32,7 @@ use crate::{ splash::{Properties as SplashProperties, Splash}, theme::{Theme, THEMES}, }, - config::{EditorConfig, PLAIN_TEXT_MODE}, + config::EditorConfig, error::Result, task::TaskPool, }; @@ -94,11 +94,16 @@ pub struct Context { } impl Context { - pub fn mode_by_filename(&self, filename: impl AsRef) -> &Mode { + pub fn mode_by_filename(&self, filename: impl AsRef) -> Option<&Mode> { self.modes .iter() .find(|&mode| mode.matches_by_filename(filename.as_ref())) - .unwrap_or(&PLAIN_TEXT_MODE) + } + + pub fn mode_by_shebang(&self, shebang: &str) -> Option<&Mode> { + self.modes + .iter() + .find(|&mode| mode.matches_by_shebang(shebang)) } } From ecc4c48118b549e89058d91cee3fe6b7a352870e Mon Sep 17 00:00:00 2001 From: Iain H Date: Wed, 20 Jul 2022 22:26:57 -0400 Subject: [PATCH 2/3] Further shebang scan work 1) Constrain the number of characters examine when determining the interpreter directive to 256 to avoid performance penalty of scanning long lines unnecessarily. 2) Move to a single mode_by_file() function in Context. 3) Always attempt to match the mode based on the filename first, falling back to the shebang line only if no match is found. --- zee/src/editor/buffer.rs | 19 +++++++------------ zee/src/editor/mod.rs | 30 +++++++++++++++++------------- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/zee/src/editor/buffer.rs b/zee/src/editor/buffer.rs index 668db14..6b3a4c0 100644 --- a/zee/src/editor/buffer.rs +++ b/zee/src/editor/buffer.rs @@ -17,12 +17,13 @@ use zee_grammar::Mode; use super::{ContextHandle, Editor}; use crate::{ - config::PLAIN_TEXT_MODE, error::Result, syntax::parse::{ParseTree, ParserPool, ParserStatus}, versioned::{Versioned, WeakHandle}, }; +const MAX_SHEBANG_LENGTH: usize = 256; + #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub struct BufferId(usize); @@ -173,17 +174,11 @@ impl Buffer { file_path: Option, repo: Option, ) -> Self { - let mode = text - .line(0) - .as_str() - .and_then(|shebang| context.0.mode_by_shebang(shebang)) - .or_else(|| { - file_path - .as_ref() - .and_then(|path| context.0.mode_by_filename(path)) - }) - .unwrap_or(&PLAIN_TEXT_MODE); - let mut parser = mode + let shebang_range = 0..std::cmp::min(text.len_chars(), MAX_SHEBANG_LENGTH); + + let mut parser = context + .0 + .mode_by_file(file_path.as_ref(), &text.slice(shebang_range)) .language() .and_then(|result| result.ok()) .map(ParserPool::new); diff --git a/zee/src/editor/mod.rs b/zee/src/editor/mod.rs index 7346ef2..2de5369 100644 --- a/zee/src/editor/mod.rs +++ b/zee/src/editor/mod.rs @@ -5,13 +5,13 @@ mod windows; pub use self::buffer::{BufferId, ModifiedStatus}; use git2::Repository; -use ropey::Rope; +use ropey::{Rope, RopeSlice}; use std::{ borrow::Cow, fmt::Display, fs::File, io::{self, BufReader}, - path::{Path, PathBuf}, + path::PathBuf, sync::Arc, }; use zi::{ @@ -32,7 +32,7 @@ use crate::{ splash::{Properties as SplashProperties, Splash}, theme::{Theme, THEMES}, }, - config::EditorConfig, + config::{EditorConfig, PLAIN_TEXT_MODE}, error::Result, task::TaskPool, }; @@ -94,16 +94,20 @@ pub struct Context { } impl Context { - pub fn mode_by_filename(&self, filename: impl AsRef) -> Option<&Mode> { - self.modes - .iter() - .find(|&mode| mode.matches_by_filename(filename.as_ref())) - } - - pub fn mode_by_shebang(&self, shebang: &str) -> Option<&Mode> { - self.modes - .iter() - .find(|&mode| mode.matches_by_shebang(shebang)) + pub fn mode_by_file(&self, path: Option<&PathBuf>, content: &RopeSlice) -> &Mode { + path.and_then(|path| { + self.modes + .iter() + .find(|&mode| mode.matches_by_filename(path)) + }) + .or_else(|| { + content.as_str().and_then(|shebang| { + self.modes + .iter() + .find(|&mode| mode.matches_by_shebang(shebang)) + }) + }) + .unwrap_or(&PLAIN_TEXT_MODE) } } From 7936db637f439020c7807efba0668fbf9838e51c Mon Sep 17 00:00:00 2001 From: Iain H Date: Wed, 20 Jul 2022 23:25:10 -0400 Subject: [PATCH 3/3] Build fix --- zee/src/editor/buffer.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/zee/src/editor/buffer.rs b/zee/src/editor/buffer.rs index 6b3a4c0..4c8cd8c 100644 --- a/zee/src/editor/buffer.rs +++ b/zee/src/editor/buffer.rs @@ -176,9 +176,11 @@ impl Buffer { ) -> Self { let shebang_range = 0..std::cmp::min(text.len_chars(), MAX_SHEBANG_LENGTH); - let mut parser = context + let mode = context .0 - .mode_by_file(file_path.as_ref(), &text.slice(shebang_range)) + .mode_by_file(file_path.as_ref(), &text.slice(shebang_range)); + + let mut parser = mode .language() .and_then(|result| result.ok()) .map(ParserPool::new);