From d46f10921505925e856557fdc3cf2558f1dc1cd1 Mon Sep 17 00:00:00 2001 From: Christopher Harrison Date: Fri, 18 Aug 2023 13:51:51 +0100 Subject: [PATCH 1/7] Beginnings of thread-safe language definition caching --- topiary-cli/src/error.rs | 9 +++++- topiary-cli/src/language.rs | 58 +++++++++++++++++++++++++++++++++++++ topiary-cli/src/main.rs | 1 + 3 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 topiary-cli/src/language.rs diff --git a/topiary-cli/src/error.rs b/topiary-cli/src/error.rs index 1140fac2..4008ddaa 100644 --- a/topiary-cli/src/error.rs +++ b/topiary-cli/src/error.rs @@ -1,4 +1,4 @@ -use std::{error, fmt, io, process::ExitCode, result}; +use std::{error, fmt, io, process::ExitCode, result, sync}; use topiary::FormatterError; /// A convenience wrapper around `std::result::Result`. @@ -130,6 +130,13 @@ where } } +impl From> for TopiaryError { + fn from(_: sync::PoisonError) -> Self { + // TODO Pass the error into `CLIError::Generic`, without invalidating lifetime constraints + Self::Bin("Could not acquire cache lock".into(), None) + } +} + impl From for TopiaryError { fn from(e: toml::de::Error) -> Self { TopiaryError::Bin( diff --git a/topiary-cli/src/language.rs b/topiary-cli/src/language.rs new file mode 100644 index 00000000..760a6d1c --- /dev/null +++ b/topiary-cli/src/language.rs @@ -0,0 +1,58 @@ +use std::{ + collections::HashMap, + hash::{Hash, Hasher}, + path::PathBuf, + sync::{Arc, Mutex}, +}; + +use topiary::{Language, TopiaryQuery}; + +use crate::{error::CLIResult, io::InputFile}; + +/// `LanguageDefinition` contains the necessary language-related values that the Topiary API +/// expects to do its job +pub struct LanguageDefinition { + pub query: TopiaryQuery, + pub language: Language, + pub grammar: tree_sitter_facade::Language, +} + +/// Key type for the cache of language definitions +/// +/// NOTE This is not public, as it is constructed from an `io::InputFile` reference +struct LanguageKey<'cfg> { + language: &'cfg Language, + query: PathBuf, +} + +impl<'cfg> Hash for LanguageKey<'cfg> { + fn hash(&self, state: &mut H) { + self.language.name.hash(state); + self.query.hash(state); + } +} + +impl<'i, 'cfg> From<&'i InputFile<'cfg>> for LanguageKey<'cfg> { + fn from(input: &'i InputFile<'cfg>) -> Self { + todo!() + } +} + +/// Thread-safe language definition cache +pub struct LanguageDefinitionCache<'cfg>(Mutex, LanguageDefinition>>); + +impl<'cfg> LanguageDefinitionCache<'cfg> { + pub fn new() -> Arc { + Arc::new(LanguageDefinitionCache(Mutex::new(HashMap::new()))) + } + + pub fn fetch<'k, T>(&mut self, key: &'k T) -> CLIResult + where + &'k T: Into>, + { + self.0.lock()?; + + let key = key.into(); + todo!() + } +} diff --git a/topiary-cli/src/main.rs b/topiary-cli/src/main.rs index 8b4914d7..c554ed73 100644 --- a/topiary-cli/src/main.rs +++ b/topiary-cli/src/main.rs @@ -2,6 +2,7 @@ mod cli; mod configuration; mod error; mod io; +mod language; mod visualisation; use std::{error::Error, process::ExitCode}; From a3772a1eff3fe6a0bd5a35f1281cfecf177e7483 Mon Sep 17 00:00:00 2001 From: Christopher Harrison Date: Sat, 19 Aug 2023 17:15:50 +0100 Subject: [PATCH 2/7] Expose the raw values and implement Display --- topiary-cli/src/io.rs | 44 +++++++++++++++++++++--------------- topiary-cli/src/main.rs | 2 +- topiary/src/configuration.rs | 2 +- topiary/src/language.rs | 16 +++++++++---- 4 files changed, 40 insertions(+), 24 deletions(-) diff --git a/topiary-cli/src/io.rs b/topiary-cli/src/io.rs index cceeec4d..750b7278 100644 --- a/topiary-cli/src/io.rs +++ b/topiary-cli/src/io.rs @@ -1,6 +1,7 @@ use std::{ borrow::Cow, ffi::OsString, + fmt, fs::File, io::{stdin, stdout, BufReader, ErrorKind, Read, Result, Write}, path::{Path, PathBuf}, @@ -61,11 +62,20 @@ impl From<&AtLeastOneInput> for InputFrom { /// Each `InputFile` needs to locate its source (standard input or disk), such that its `io::Read` /// implementation can do the right thing. #[derive(Debug)] -enum InputSource { +pub enum InputSource { Stdin, Disk(PathBuf, Option), } +impl fmt::Display for InputSource { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Stdin => write!(f, "standard input"), + Self::Disk(path, _) => write!(f, "{}", path.to_string_lossy()), + } + } +} + /// An `InputFile` is the unit of input for Topiary, encapsulating everything needed for downstream /// processing. It implements `io::Read`, so it can be passed directly to the Topiary API. #[derive(Debug)] @@ -93,22 +103,19 @@ impl<'cfg> InputFile<'cfg> { Ok((query, self.language.clone(), grammar)) } - /// Expose input source, for logging - pub fn source(&self) -> Cow { - match &self.source { - InputSource::Stdin => "standard input".into(), - InputSource::Disk(path, _) => path.to_string_lossy(), - } + /// Expose input source + pub fn source(&self) -> &InputSource { + &self.source } - /// Expose language for input, for logging - pub fn language(&self) -> &str { - &self.language.name + /// Expose language for input + pub fn language(&self) -> &Language { + &self.language } - /// Expose query path for input, for logging - pub fn query(&self) -> Cow { - self.query.to_string_lossy() + /// Expose query path for input + pub fn query(&self) -> &PathBuf { + &self.query } } @@ -225,12 +232,13 @@ impl OutputFile { Ok(()) } +} - /// Expose output sink, for logging - pub fn sink(&self) -> Cow { - match &self { - Self::Stdout => "standard output".into(), - Self::Disk { output, .. } => output.to_string_lossy(), +impl fmt::Display for OutputFile { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Stdout => write!(f, "standard ouput"), + Self::Disk { output, .. } => write!(f, "{}", output.to_string_lossy()), } } } diff --git a/topiary-cli/src/main.rs b/topiary-cli/src/main.rs index c554ed73..4bec057e 100644 --- a/topiary-cli/src/main.rs +++ b/topiary-cli/src/main.rs @@ -53,7 +53,7 @@ async fn run() -> CLIResult<()> { "Visualising {}, as {}, to {}", input.source(), input.language(), - output.sink() + output ); // TODO `InputFile::to_language_definition` will re-process the `(Language, PathBuf)` diff --git a/topiary/src/configuration.rs b/topiary/src/configuration.rs index 04a9b5a8..273fcede 100644 --- a/topiary/src/configuration.rs +++ b/topiary/src/configuration.rs @@ -92,7 +92,7 @@ impl PartialEq for Configuration { impl fmt::Display for Configuration { /// Pretty-print configuration as TOML - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let toml = toml::to_string_pretty(self).map_err(|_| fmt::Error)?; write!(f, "{toml}") } diff --git a/topiary/src/language.rs b/topiary/src/language.rs index 662ede30..3830adbd 100644 --- a/topiary/src/language.rs +++ b/topiary/src/language.rs @@ -1,6 +1,8 @@ -use std::collections::HashSet; -use std::io; -use std::path::{Path, PathBuf}; +use std::{ + collections::HashSet, + fmt, io, + path::{Path, PathBuf}, +}; use clap::ValueEnum; use serde::{Deserialize, Serialize}; @@ -15,7 +17,7 @@ pub struct Language { /// the Configuration, and to convert from a language to the respective tree-sitter /// grammar. pub name: String, - /// A Set of the filetype extentions associated with this particular language. + /// A Set of the filetype extensions associated with this particular language. /// Enables Topiary to pick the right language given an input file pub extensions: HashSet, /// The indentation string used for that particular language. Defaults to " " @@ -103,6 +105,12 @@ impl Language { } } +impl fmt::Display for Language { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.name) + } +} + /// Convert a Language into the canonical basename of its query file, under the most appropriate /// search path. We test 3 different locations for query files, in the following priority order, /// returning the first that exists: From 2b20f3f515f7a64f606f1b0bf4c9f192ca29aa4c Mon Sep 17 00:00:00 2001 From: Christopher Harrison Date: Mon, 21 Aug 2023 15:57:44 +0100 Subject: [PATCH 3/7] Thread-safe cache of language definitions --- topiary-cli/Cargo.toml | 2 +- topiary-cli/src/io.rs | 22 +++++++++---------- topiary-cli/src/language.rs | 43 ++++++++++++++++++++++--------------- topiary-cli/src/main.rs | 19 ++++++++-------- 4 files changed, 46 insertions(+), 40 deletions(-) diff --git a/topiary-cli/Cargo.toml b/topiary-cli/Cargo.toml index 38802161..ad518dc7 100644 --- a/topiary-cli/Cargo.toml +++ b/topiary-cli/Cargo.toml @@ -35,7 +35,7 @@ itertools = { workspace = true } log = { workspace = true } serde-toml-merge = { workspace = true } tempfile = { workspace = true } -tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } +tokio = { workspace = true, features = ["fs", "rt-multi-thread", "macros"] } toml = { workspace = true } topiary = { path = "../topiary" } tree-sitter-facade = { workspace = true } diff --git a/topiary-cli/src/io.rs b/topiary-cli/src/io.rs index 750b7278..b9ec798b 100644 --- a/topiary-cli/src/io.rs +++ b/topiary-cli/src/io.rs @@ -1,9 +1,8 @@ use std::{ - borrow::Cow, ffi::OsString, fmt, fs::File, - io::{stdin, stdout, BufReader, ErrorKind, Read, Result, Write}, + io::{stdin, stdout, ErrorKind, Read, Result, Write}, path::{Path, PathBuf}, }; @@ -13,6 +12,7 @@ use topiary::{Configuration, Language, SupportedLanguage, TopiaryQuery}; use crate::{ cli::{AtLeastOneInput, ExactlyOneInput, FromStdin}, error::{CLIResult, TopiaryError}, + language::LanguageDefinition, }; type QueryPath = PathBuf; @@ -85,22 +85,20 @@ pub struct InputFile<'cfg> { query: QueryPath, } -// TODO This feels like a leaky abstraction, but it's enough to satisfy the Topiary API... impl<'cfg> InputFile<'cfg> { /// Convert our `InputFile` into language definition values that Topiary can consume - pub async fn to_language_definition( - &self, - ) -> CLIResult<(TopiaryQuery, Language, tree_sitter_facade::Language)> { + pub async fn to_language_definition(&self) -> CLIResult { let grammar = self.language.grammar().await?; let query = { - let mut reader = BufReader::new(File::open(&self.query)?); - let mut contents = String::new(); - reader.read_to_string(&mut contents)?; - + let contents = tokio::fs::read_to_string(&self.query).await?; TopiaryQuery::new(&grammar, &contents)? }; - Ok((query, self.language.clone(), grammar)) + Ok(LanguageDefinition { + query, + language: self.language.clone(), + grammar, + }) } /// Expose input source @@ -110,7 +108,7 @@ impl<'cfg> InputFile<'cfg> { /// Expose language for input pub fn language(&self) -> &Language { - &self.language + self.language } /// Expose query path for input diff --git a/topiary-cli/src/language.rs b/topiary-cli/src/language.rs index 760a6d1c..87d4c55a 100644 --- a/topiary-cli/src/language.rs +++ b/topiary-cli/src/language.rs @@ -2,7 +2,7 @@ use std::{ collections::HashMap, hash::{Hash, Hasher}, path::PathBuf, - sync::{Arc, Mutex}, + sync::{Arc, RwLock}, }; use topiary::{Language, TopiaryQuery}; @@ -18,9 +18,8 @@ pub struct LanguageDefinition { } /// Key type for the cache of language definitions -/// -/// NOTE This is not public, as it is constructed from an `io::InputFile` reference -struct LanguageKey<'cfg> { +#[derive(Eq, PartialEq)] +pub struct LanguageKey<'cfg> { language: &'cfg Language, query: PathBuf, } @@ -32,27 +31,37 @@ impl<'cfg> Hash for LanguageKey<'cfg> { } } -impl<'i, 'cfg> From<&'i InputFile<'cfg>> for LanguageKey<'cfg> { - fn from(input: &'i InputFile<'cfg>) -> Self { - todo!() +impl<'cfg> From<&'cfg InputFile<'cfg>> for LanguageKey<'cfg> { + fn from(input: &'cfg InputFile<'cfg>) -> Self { + Self { + language: input.language(), + query: input.query().into(), + } } } /// Thread-safe language definition cache -pub struct LanguageDefinitionCache<'cfg>(Mutex, LanguageDefinition>>); +pub struct LanguageDefinitionCache<'cfg>( + RwLock, Arc>>, +); impl<'cfg> LanguageDefinitionCache<'cfg> { - pub fn new() -> Arc { - Arc::new(LanguageDefinitionCache(Mutex::new(HashMap::new()))) + pub fn new() -> Self { + LanguageDefinitionCache(RwLock::new(HashMap::new())) } - pub fn fetch<'k, T>(&mut self, key: &'k T) -> CLIResult - where - &'k T: Into>, - { - self.0.lock()?; + /// Fetch the language definition from the cache, populating if necessary, with thread-safety + pub async fn fetch(&self, input: &'cfg InputFile<'cfg>) -> CLIResult> { + let key = input.into(); - let key = key.into(); - todo!() + // Return the language definition from the cache, behind a read lock, if it exists... + if let Some(lang_def) = self.0.read()?.get(&key) { + return Ok(Arc::clone(lang_def)); + } + + // ...otherwise, fetch the language definition, to populate the cache behind a write lock + let lang_def = Arc::new(input.to_language_definition().await?); + self.0.write()?.insert(key, Arc::clone(&lang_def)); + Ok(lang_def) } } diff --git a/topiary-cli/src/main.rs b/topiary-cli/src/main.rs index 4bec057e..ac91cd19 100644 --- a/topiary-cli/src/main.rs +++ b/topiary-cli/src/main.rs @@ -11,6 +11,7 @@ use crate::{ cli::Commands, error::CLIResult, io::{Inputs, OutputFile}, + language::LanguageDefinitionCache, }; use topiary::{formatter, Operation}; @@ -34,6 +35,8 @@ async fn run() -> CLIResult<()> { args.global.configuration_collation.as_ref().unwrap(), )?; + let cache = LanguageDefinitionCache::new(); + // Delegate by subcommand match args.command { Commands::Fmt { @@ -49,6 +52,9 @@ async fn run() -> CLIResult<()> { let mut input = Inputs::new(&config, &input).next().unwrap()?; let mut output = OutputFile::Stdout; + // We don't need the cache, here, but for the sake of consistency + let lang_def = cache.fetch(&input).await?; + log::info!( "Visualising {}, as {}, to {}", input.source(), @@ -56,19 +62,12 @@ async fn run() -> CLIResult<()> { output ); - // TODO `InputFile::to_language_definition` will re-process the `(Language, PathBuf)` - // tuple for each valid input file. Here we only have one file, but when it comes to - // formatting, many input files will share the same `(Language, PathBuf)` tuples, so - // we'll end up doing a lot of unnecessary work, including IO (although that'll - // probably be cached by the OS). Caching these values in memory would make sense. - let (query, language, grammar) = input.to_language_definition().await?; - formatter( &mut input, &mut output, - &query, - &language, - &grammar, + &lang_def.query, + &lang_def.language, + &lang_def.grammar, Operation::Visualise { output_format: format.into(), }, From 119c0092eda8c1afc666654a186d24e2d8ffcd69 Mon Sep 17 00:00:00 2001 From: Christopher Harrison Date: Tue, 22 Aug 2023 17:01:03 +0100 Subject: [PATCH 4/7] WIP: Formatting Formatting is now functional, but there remain implementation details to sort out. Bufferin, caching and error handling, in particular... --- Cargo.lock | 33 +++++++ Cargo.toml | 1 + topiary-cli/Cargo.toml | 1 + topiary-cli/src/main.rs | 213 +++++++++++++--------------------------- 4 files changed, 103 insertions(+), 145 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index eaa8bff8..43b4013d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -105,6 +105,18 @@ dependencies = [ "wait-timeout", ] +[[package]] +name = "async-scoped" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7a6a57c8aeb40da1ec037f5d455836852f7a57e69e1b1ad3d8f38ac1d6cadf" +dependencies = [ + "futures", + "pin-project", + "slab", + "tokio", +] + [[package]] name = "async-stream" version = "0.3.5" @@ -769,6 +781,26 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "pin-project" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fda4ed1c6c173e3fc7a83629421152e01d7b1f9b7f65fb301e490e8cfc656422" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.26", +] + [[package]] name = "pin-project-lite" version = "0.2.10" @@ -1294,6 +1326,7 @@ name = "topiary-cli" version = "0.2.3" dependencies = [ "assert_cmd", + "async-scoped", "clap", "directories", "env_logger", diff --git a/Cargo.toml b/Cargo.toml index c591a60c..94f14d0b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,6 +36,7 @@ targets = ["x86_64-unknown-linux-gnu", "x86_64-apple-darwin", "x86_64-pc-windows [workspace.dependencies] assert_cmd = "2.0" +async-scoped = { version = "0.7.1", features = ["use-tokio"] } cfg-if = "1.0.0" clap = { version = "4.3", features = [ "env" ] } criterion = "0.5" diff --git a/topiary-cli/Cargo.toml b/topiary-cli/Cargo.toml index ad518dc7..b99ca658 100644 --- a/topiary-cli/Cargo.toml +++ b/topiary-cli/Cargo.toml @@ -26,6 +26,7 @@ path = "src/main.rs" [dependencies] # For now we just load the tree-sitter language parsers statically. # Eventually we will want to dynamically load them, like Helix does. +async-scoped = { workspace = true } clap = { workspace = true, features = ["derive", "env", "wrap_help"] } directories = { workspace = true } env_logger = { workspace = true } diff --git a/topiary-cli/src/main.rs b/topiary-cli/src/main.rs index ac91cd19..2cc105e2 100644 --- a/topiary-cli/src/main.rs +++ b/topiary-cli/src/main.rs @@ -11,7 +11,7 @@ use crate::{ cli::Commands, error::CLIResult, io::{Inputs, OutputFile}, - language::LanguageDefinitionCache, + //language::LanguageDefinitionCache, }; use topiary::{formatter, Operation}; @@ -35,8 +35,6 @@ async fn run() -> CLIResult<()> { args.global.configuration_collation.as_ref().unwrap(), )?; - let cache = LanguageDefinitionCache::new(); - // Delegate by subcommand match args.command { Commands::Fmt { @@ -44,16 +42,80 @@ async fn run() -> CLIResult<()> { skip_idempotence, inputs, } => { - todo!(); + let inputs = Inputs::new(&config, &inputs); + //let cache = LanguageDefinitionCache::new(); + + let (_, tasks) = async_scoped::TokioScope::scope_and_block(|scope| { + for input in inputs { + scope.spawn({ + //let cache = &cache; + + async { + match input { + Ok(mut input) => { + // TODO BufReader and BufWriter + let mut output = OutputFile::try_from(&input)?; + + //let lang_def = cache.fetch(&input).await?; + let lang_def = input.to_language_definition().await?; + + log::info!( + "Formatting {}, as {} using {}, to {}", + input.source(), + input.language(), + input.query().to_string_lossy(), + output + ); + + formatter( + &mut input, + &mut output, + &lang_def.query, + &lang_def.language, + &lang_def.grammar, + Operation::Format { + skip_idempotence, + tolerate_parsing_errors, + }, + )?; + + output.persist()?; + } + + Err(error) => { + // By this point, we've lost any reference to the original + // input; we trust that it is embedded into `error`. + log::warn!("Skipping: {error}"); + } + } + + CLIResult::Ok(()) + } + }); + } + }); + + // TODO This outputs all the errors from the concurrent tasks _after_ they've + // completed. For join failures, that makes sense, but for Topiary errors, we lose + // context in the logs. Topiary error handling should be done within the task. + for task in tasks { + match task { + Err(join_error) => print_error(&join_error), + Ok(Err(topiary_error)) => print_error(&topiary_error), + _ => {} + } + } } Commands::Vis { format, input } => { // We are guaranteed (by clap) to have exactly one input, so it's safe to unwrap + // TODO BufReader and BufWriter let mut input = Inputs::new(&config, &input).next().unwrap()?; let mut output = OutputFile::Stdout; - // We don't need the cache, here, but for the sake of consistency - let lang_def = cache.fetch(&input).await?; + // We don't need a `LanguageDefinitionCache` when there's only one input, + // which saves us the thread-safety overhead + let lang_def = input.to_language_definition().await?; log::info!( "Visualising {}, as {}, to {}", @@ -83,145 +145,6 @@ async fn run() -> CLIResult<()> { Ok(()) } -/* - let io_files: Vec<(String, String)> = if args.in_place || args.input_files.len() > 1 { - args.input_files - .iter() - .map(|f| (f.clone(), f.clone())) - .collect() - } else { - // Clap guarantees our input_files is non-empty - vec![( - args.input_files.first().unwrap().clone(), - match args.output_file.as_deref() { - Some("-") | None => String::from("-"), - Some(f) => String::from(f), - }, - )] - }; - - type IoFile = ( - String, - String, - Language, - Option, - CLIResult, - ); - - // Add the language and query Path to the io_files - let io_files: Vec = io_files - .into_iter() - // Add the appropriate language to all of the tuples - .map(|(i, o)| { - let language = if let Some(language) = args.language { - language.to_language(&configuration).clone() - } else { - Language::detect(&i, &configuration)?.clone() - }; - - let query_path = if let Some(query) = &args.query { - Ok(query.clone()) - } else { - language.query_file() - } - .map_err(TopiaryError::Lib); - - Ok((i, o, language, args.query.clone(), query_path)) - }) - .collect::>>()?; - - // Converts the simple types into arguments we can pass to the `formatter` function - // _ holds the tree_sitter_facade::Language - let fmt_args: Vec<(String, String, Language, _, TopiaryQuery)> = - futures::future::try_join_all(io_files.into_iter().map( - |(i, o, language, query_arg, query_path)| async move { - let grammar = language.grammar().await?; - - let query = query_path - .and_then(|query_path| { - { - let mut reader = BufReader::new(File::open(query_path)?); - let mut contents = String::new(); - reader.read_to_string(&mut contents)?; - Ok(contents) - } - .map_err(|e| { - TopiaryError::Bin( - "Could not open query file".into(), - Some(CLIError::IOError(e)), - ) - }) - }) - .and_then(|query_content: String| { - Ok(TopiaryQuery::new(&grammar, &query_content)?) - }) - .or_else(|e| { - // If we weren't able to read the query file, and the user didn't - // request a specific query file, we should fall back to the built-in - // queries. - if query_arg.is_none() { - log::info!( - "No language file found for {language:?}. Will use built-in query." - ); - Ok((&language).try_into()?) - } else { - Err(e) - } - })?; - - Ok::<_, TopiaryError>((i, o, language, grammar, query)) - }, - )) - .await?; - - // The operation needs not be part of the Vector of Structs because it is the same for every formatting instance - let operation = if let Some(visualisation) = args.visualise { - Operation::Visualise { - output_format: visualisation.into(), - } - } else { - Operation::Format { - skip_idempotence: args.skip_idempotence, - tolerate_parsing_errors: args.tolerate_parsing_errors, - } - }; - - let tasks: Vec<_> = fmt_args - .into_iter() - .map(|(input, output, language, grammar, query)| -> tokio::task::JoinHandle> { - tokio::spawn(async move { - let mut input: Box = match input.as_str() { - "-" => Box::new(stdin()), - file => Box::new(BufReader::new(File::open(file)?)), - }; - let mut output: BufWriter = BufWriter::new(OutputFile::new(&output)?); - - formatter( - &mut input, - &mut output, - &query, - &language, - &grammar, - operation, - )?; - - output.into_inner()?.persist()?; - - Ok(()) - }) - }) - .collect(); - - for task in tasks { - // The await results in a `Result, JoinError>`. - // The first ? concerns the `JoinError`, the second one the `TopiaryError`. - task.await??; - } - - Ok(()) -} -*/ - fn print_error(e: &dyn Error) { log::error!("{e}"); if let Some(source) = e.source() { From a1941df5e1d8ae3073d636145f9f5fd6f26fa8d9 Mon Sep 17 00:00:00 2001 From: Christopher Harrison Date: Tue, 22 Aug 2023 17:28:13 +0100 Subject: [PATCH 5/7] Add buffered reading and writing --- topiary-cli/src/main.rs | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/topiary-cli/src/main.rs b/topiary-cli/src/main.rs index 2cc105e2..c10f3cce 100644 --- a/topiary-cli/src/main.rs +++ b/topiary-cli/src/main.rs @@ -5,7 +5,11 @@ mod io; mod language; mod visualisation; -use std::{error::Error, process::ExitCode}; +use std::{ + error::Error, + io::{BufReader, BufWriter}, + process::ExitCode, +}; use crate::{ cli::Commands, @@ -52,9 +56,8 @@ async fn run() -> CLIResult<()> { async { match input { - Ok(mut input) => { - // TODO BufReader and BufWriter - let mut output = OutputFile::try_from(&input)?; + Ok(input) => { + let output = OutputFile::try_from(&input)?; //let lang_def = cache.fetch(&input).await?; let lang_def = input.to_language_definition().await?; @@ -67,9 +70,12 @@ async fn run() -> CLIResult<()> { output ); + let mut buf_input = BufReader::new(input); + let mut buf_output = BufWriter::new(output); + formatter( - &mut input, - &mut output, + &mut buf_input, + &mut buf_output, &lang_def.query, &lang_def.language, &lang_def.grammar, @@ -79,7 +85,7 @@ async fn run() -> CLIResult<()> { }, )?; - output.persist()?; + buf_output.into_inner()?.persist()?; } Err(error) => { @@ -109,9 +115,8 @@ async fn run() -> CLIResult<()> { Commands::Vis { format, input } => { // We are guaranteed (by clap) to have exactly one input, so it's safe to unwrap - // TODO BufReader and BufWriter - let mut input = Inputs::new(&config, &input).next().unwrap()?; - let mut output = OutputFile::Stdout; + let input = Inputs::new(&config, &input).next().unwrap()?; + let output = OutputFile::Stdout; // We don't need a `LanguageDefinitionCache` when there's only one input, // which saves us the thread-safety overhead @@ -124,9 +129,12 @@ async fn run() -> CLIResult<()> { output ); + let mut buf_input = BufReader::new(input); + let mut buf_output = BufWriter::new(output); + formatter( - &mut input, - &mut output, + &mut buf_input, + &mut buf_output, &lang_def.query, &lang_def.language, &lang_def.grammar, From 523e5415d61719bdd961d0429fa407a92d46b270 Mon Sep 17 00:00:00 2001 From: Christopher Harrison Date: Wed, 23 Aug 2023 11:29:18 +0100 Subject: [PATCH 6/7] Make the cache operational, if not suboptimal :( --- topiary-cli/src/language.rs | 65 ++++++++++++++------------- topiary-cli/src/main.rs | 87 ++++++++++++++++++------------------- 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/topiary-cli/src/language.rs b/topiary-cli/src/language.rs index 87d4c55a..5afc8896 100644 --- a/topiary-cli/src/language.rs +++ b/topiary-cli/src/language.rs @@ -1,7 +1,6 @@ use std::{ - collections::HashMap, + collections::{hash_map::DefaultHasher, HashMap}, hash::{Hash, Hasher}, - path::PathBuf, sync::{Arc, RwLock}, }; @@ -17,51 +16,55 @@ pub struct LanguageDefinition { pub grammar: tree_sitter_facade::Language, } -/// Key type for the cache of language definitions -#[derive(Eq, PartialEq)] -pub struct LanguageKey<'cfg> { - language: &'cfg Language, - query: PathBuf, -} - -impl<'cfg> Hash for LanguageKey<'cfg> { - fn hash(&self, state: &mut H) { - self.language.name.hash(state); - self.query.hash(state); - } -} - -impl<'cfg> From<&'cfg InputFile<'cfg>> for LanguageKey<'cfg> { - fn from(input: &'cfg InputFile<'cfg>) -> Self { - Self { - language: input.language(), - query: input.query().into(), - } - } -} - /// Thread-safe language definition cache -pub struct LanguageDefinitionCache<'cfg>( - RwLock, Arc>>, -); +pub struct LanguageDefinitionCache(RwLock>>); -impl<'cfg> LanguageDefinitionCache<'cfg> { +impl LanguageDefinitionCache { pub fn new() -> Self { LanguageDefinitionCache(RwLock::new(HashMap::new())) } /// Fetch the language definition from the cache, populating if necessary, with thread-safety - pub async fn fetch(&self, input: &'cfg InputFile<'cfg>) -> CLIResult> { - let key = input.into(); + // FIXME Locking is not working as expected. The read/write locks ensure atomicity -- so it's + // not UN-thread-safe -- but many threads/futures can pass the read lock and go into the write + // branch. Using a `Mutex` over the whole `HashMap`, rather than a `RwLock`, breaks the `Send` + // constraint of the async scope... + pub async fn fetch<'i>(&self, input: &'i InputFile<'i>) -> CLIResult> { + // There's no need to store the input's identifying information (language name and query) + // in the key, so we use its hash directly. This side-steps any awkward lifetime issues. + let key = { + let mut hash = DefaultHasher::new(); + input.language().name.hash(&mut hash); + input.query().hash(&mut hash); + + hash.finish() + }; // Return the language definition from the cache, behind a read lock, if it exists... if let Some(lang_def) = self.0.read()?.get(&key) { + log::debug!( + "Cache {:p}: Hit at {:#016x} ({}, {})", + self, + key, + input.language(), + input.query().file_name().unwrap().to_string_lossy() + ); + return Ok(Arc::clone(lang_def)); } // ...otherwise, fetch the language definition, to populate the cache behind a write lock let lang_def = Arc::new(input.to_language_definition().await?); self.0.write()?.insert(key, Arc::clone(&lang_def)); + + log::debug!( + "Cache {:p}: Insert at {:#016x} ({}, {})", + self, + key, + input.language(), + input.query().file_name().unwrap().to_string_lossy() + ); + Ok(lang_def) } } diff --git a/topiary-cli/src/main.rs b/topiary-cli/src/main.rs index c10f3cce..8b1ea4af 100644 --- a/topiary-cli/src/main.rs +++ b/topiary-cli/src/main.rs @@ -15,7 +15,7 @@ use crate::{ cli::Commands, error::CLIResult, io::{Inputs, OutputFile}, - //language::LanguageDefinitionCache, + language::LanguageDefinitionCache, }; use topiary::{formatter, Operation}; @@ -47,56 +47,51 @@ async fn run() -> CLIResult<()> { inputs, } => { let inputs = Inputs::new(&config, &inputs); - //let cache = LanguageDefinitionCache::new(); + let cache = LanguageDefinitionCache::new(); let (_, tasks) = async_scoped::TokioScope::scope_and_block(|scope| { for input in inputs { - scope.spawn({ - //let cache = &cache; - - async { - match input { - Ok(input) => { - let output = OutputFile::try_from(&input)?; - - //let lang_def = cache.fetch(&input).await?; - let lang_def = input.to_language_definition().await?; - - log::info!( - "Formatting {}, as {} using {}, to {}", - input.source(), - input.language(), - input.query().to_string_lossy(), - output - ); - - let mut buf_input = BufReader::new(input); - let mut buf_output = BufWriter::new(output); - - formatter( - &mut buf_input, - &mut buf_output, - &lang_def.query, - &lang_def.language, - &lang_def.grammar, - Operation::Format { - skip_idempotence, - tolerate_parsing_errors, - }, - )?; - - buf_output.into_inner()?.persist()?; - } - - Err(error) => { - // By this point, we've lost any reference to the original - // input; we trust that it is embedded into `error`. - log::warn!("Skipping: {error}"); - } + scope.spawn(async { + match input { + Ok(input) => { + // FIXME The cache is performing suboptimally; see `language.rs` + let output = OutputFile::try_from(&input)?; + let lang_def = cache.fetch(&input).await?; + + log::info!( + "Formatting {}, as {} using {}, to {}", + input.source(), + input.language(), + input.query().to_string_lossy(), + output + ); + + let mut buf_input = BufReader::new(input); + let mut buf_output = BufWriter::new(output); + + formatter( + &mut buf_input, + &mut buf_output, + &lang_def.query, + &lang_def.language, + &lang_def.grammar, + Operation::Format { + skip_idempotence, + tolerate_parsing_errors, + }, + )?; + + buf_output.into_inner()?.persist()?; } - CLIResult::Ok(()) + Err(error) => { + // By this point, we've lost any reference to the original + // input; we trust that it is embedded into `error`. + log::warn!("Skipping: {error}"); + } } + + CLIResult::Ok(()) }); } }); @@ -111,6 +106,8 @@ async fn run() -> CLIResult<()> { _ => {} } } + + // TODO Exit code: if 1 input => normal; all inputs => multiple failures } Commands::Vis { format, input } => { From 2ef86df6547f56bfb1f9a06fa60ffd52d720fd7e Mon Sep 17 00:00:00 2001 From: Christopher Harrison Date: Wed, 23 Aug 2023 13:07:07 +0100 Subject: [PATCH 7/7] Better error handling --- Cargo.lock | 31 +++++++++++ Cargo.toml | 1 + README.md | 6 +++ topiary-cli/Cargo.toml | 1 + topiary-cli/src/error.rs | 5 ++ topiary-cli/src/main.rs | 109 ++++++++++++++++++++++----------------- 6 files changed, 106 insertions(+), 47 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 43b4013d..03507208 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1340,6 +1340,7 @@ dependencies = [ "toml", "topiary", "tree-sitter-facade", + "tryvial", ] [[package]] @@ -1453,6 +1454,26 @@ dependencies = [ "tree-sitter", ] +[[package]] +name = "tryvial" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edebbe6f32d5a62f44fc7d7a7e79963bc4cac720ba63cc1e00ec17832d9464ee" +dependencies = [ + "tryvial-proc", +] + +[[package]] +name = "tryvial-proc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78d7245fd71d5be726bdc4dbe7e1b94f8d7627c5ccfeb6d624deb96542d0d149" +dependencies = [ + "proc-macro2", + "quote", + "venial", +] + [[package]] name = "unescape" version = "0.1.0" @@ -1477,6 +1498,16 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "venial" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61584a325b16f97b5b25fcc852eb9550843a251057a5e3e5992d2376f3df4bb2" +dependencies = [ + "proc-macro2", + "quote", +] + [[package]] name = "wait-timeout" version = "0.2.0" diff --git a/Cargo.toml b/Cargo.toml index 94f14d0b..320134fa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -66,6 +66,7 @@ tree-sitter-ocamllex = "0.20.2" tree-sitter-query = { git = "https://github.com/nvim-treesitter/tree-sitter-query" } tree-sitter-rust = "0.20.3" tree-sitter-toml = "0.20.0" +tryvial = "0.2.0" unescape = "0.1" wasm-bindgen = "0.2.84" wasm-bindgen-futures = "0.4.34" diff --git a/README.md b/README.md index ffa50f63..d0b692e8 100644 --- a/README.md +++ b/README.md @@ -379,6 +379,12 @@ formatting. Otherwise, the following exit codes are defined: | Language detection error | 6 | | Idempotency error | 7 | | Unspecified formatting error | 8 | +| Multiple errors | 9 | + +When given multiple inputs, Topiary will do its best to process them +all, even in the presence of errors. Should _any_ errors occur, Topiary +will return a non-zero exit code. For more details on the nature of +these errors, run Topiary at the `warn` logging level. #### Example diff --git a/topiary-cli/Cargo.toml b/topiary-cli/Cargo.toml index b99ca658..7978b9be 100644 --- a/topiary-cli/Cargo.toml +++ b/topiary-cli/Cargo.toml @@ -40,6 +40,7 @@ tokio = { workspace = true, features = ["fs", "rt-multi-thread", "macros"] } toml = { workspace = true } topiary = { path = "../topiary" } tree-sitter-facade = { workspace = true } +tryvial = { workspace = true } [dev-dependencies] assert_cmd = { workspace = true } diff --git a/topiary-cli/src/error.rs b/topiary-cli/src/error.rs index 4008ddaa..cb7e339f 100644 --- a/topiary-cli/src/error.rs +++ b/topiary-cli/src/error.rs @@ -18,6 +18,7 @@ pub enum TopiaryError { pub enum CLIError { IOError(io::Error), Generic(Box), + Multiple, } /// # Safety @@ -48,6 +49,7 @@ impl error::Error for TopiaryError { Self::Lib(error) => error.source(), Self::Bin(_, Some(CLIError::IOError(error))) => Some(error), Self::Bin(_, Some(CLIError::Generic(error))) => error.source(), + Self::Bin(_, Some(CLIError::Multiple)) => None, Self::Bin(_, None) => None, } } @@ -56,6 +58,9 @@ impl error::Error for TopiaryError { impl From for ExitCode { fn from(e: TopiaryError) -> Self { let exit_code = match e { + // Multiple errors: Exit 9 + TopiaryError::Bin(_, Some(CLIError::Multiple)) => 9, + // Idempotency parsing errors: Exit 8 TopiaryError::Lib(FormatterError::IdempotenceParsing(_)) => 8, diff --git a/topiary-cli/src/main.rs b/topiary-cli/src/main.rs index 8b1ea4af..9dd97998 100644 --- a/topiary-cli/src/main.rs +++ b/topiary-cli/src/main.rs @@ -13,7 +13,7 @@ use std::{ use crate::{ cli::Commands, - error::CLIResult, + error::{CLIError, CLIResult, TopiaryError}, io::{Inputs, OutputFile}, language::LanguageDefinitionCache, }; @@ -49,65 +49,80 @@ async fn run() -> CLIResult<()> { let inputs = Inputs::new(&config, &inputs); let cache = LanguageDefinitionCache::new(); - let (_, tasks) = async_scoped::TokioScope::scope_and_block(|scope| { + let (_, mut results) = async_scoped::TokioScope::scope_and_block(|scope| { for input in inputs { scope.spawn(async { - match input { + // NOTE "try blocks" and "async closures" are both unstable features. As + // such, to report errors when they happen -- rather than collated at the + // end -- we have to resort to this awkward dance, so we can benefit from + // `?` syntax sugar. Rewrite with a "try block" once the feature is stable. + let result: CLIResult<()> = match input { Ok(input) => { // FIXME The cache is performing suboptimally; see `language.rs` - let output = OutputFile::try_from(&input)?; - let lang_def = cache.fetch(&input).await?; - - log::info!( - "Formatting {}, as {} using {}, to {}", - input.source(), - input.language(), - input.query().to_string_lossy(), - output - ); - - let mut buf_input = BufReader::new(input); - let mut buf_output = BufWriter::new(output); - - formatter( - &mut buf_input, - &mut buf_output, - &lang_def.query, - &lang_def.language, - &lang_def.grammar, - Operation::Format { - skip_idempotence, - tolerate_parsing_errors, - }, - )?; - - buf_output.into_inner()?.persist()?; + let lang_def = match cache.fetch(&input).await { + Ok(lang_def) => lang_def, + Err(error) => return Err(error), + }; + + tryvial::try_block! { + let output = OutputFile::try_from(&input)?; + + log::info!( + "Formatting {}, as {} using {}, to {}", + input.source(), + input.language(), + input.query().to_string_lossy(), + output + ); + + let mut buf_input = BufReader::new(input); + let mut buf_output = BufWriter::new(output); + + formatter( + &mut buf_input, + &mut buf_output, + &lang_def.query, + &lang_def.language, + &lang_def.grammar, + Operation::Format { + skip_idempotence, + tolerate_parsing_errors, + }, + )?; + + buf_output.into_inner()?.persist()?; + } } - Err(error) => { - // By this point, we've lost any reference to the original - // input; we trust that it is embedded into `error`. - log::warn!("Skipping: {error}"); - } + // This happens when the input resolver cannot establish an input + // source, language or query file. + Err(error) => Err(error), + }; + + if let Err(error) = &result { + // By this point, we've lost any reference to the original + // input; we trust that it is embedded into `error`. + log::warn!("Skipping: {error}"); } - CLIResult::Ok(()) + result }); } }); - // TODO This outputs all the errors from the concurrent tasks _after_ they've - // completed. For join failures, that makes sense, but for Topiary errors, we lose - // context in the logs. Topiary error handling should be done within the task. - for task in tasks { - match task { - Err(join_error) => print_error(&join_error), - Ok(Err(topiary_error)) => print_error(&topiary_error), - _ => {} - } + if results.len() == 1 { + // If we just had one input, then handle errors as normal + results.remove(0)?? + } else if results + .iter() + .any(|result| matches!(result, Err(_) | Ok(Err(_)))) + { + // For multiple inputs, bail out if any failed with a "multiple errors" failure + return Err(TopiaryError::Bin( + "Processing of some inputs failed; see warning logs for details".into(), + Some(CLIError::Multiple), + )); } - - // TODO Exit code: if 1 input => normal; all inputs => multiple failures } Commands::Vis { format, input } => {