Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLI Changes Sub-PR: Formatting subcommand #599

Merged
merged 7 commits into from
Aug 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
64 changes: 64 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ targets = ["x86_64-unknown-linux-gnu", "x86_64-apple-darwin", "x86_64-pc-windows

[workspace.dependencies]
assert_cmd = "2.0"
async-scoped = { version = "0.7.1", features = ["use-tokio"] }
cfg-if = "1.0.0"
clap = { version = "4.3", features = [ "env" ] }
criterion = "0.5"
Expand Down Expand Up @@ -65,6 +66,7 @@ tree-sitter-ocamllex = "0.20.2"
tree-sitter-query = { git = "https://github.com/nvim-treesitter/tree-sitter-query" }
tree-sitter-rust = "0.20.3"
tree-sitter-toml = "0.20.0"
tryvial = "0.2.0"
unescape = "0.1"
wasm-bindgen = "0.2.84"
wasm-bindgen-futures = "0.4.34"
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,12 @@ formatting. Otherwise, the following exit codes are defined:
| Language detection error | 6 |
| Idempotency error | 7 |
| Unspecified formatting error | 8 |
| Multiple errors | 9 |

When given multiple inputs, Topiary will do its best to process them
all, even in the presence of errors. Should _any_ errors occur, Topiary
will return a non-zero exit code. For more details on the nature of
these errors, run Topiary at the `warn` logging level.

#### Example

Expand Down
4 changes: 3 additions & 1 deletion topiary-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ path = "src/main.rs"
[dependencies]
# For now we just load the tree-sitter language parsers statically.
# Eventually we will want to dynamically load them, like Helix does.
async-scoped = { workspace = true }
clap = { workspace = true, features = ["derive", "env", "wrap_help"] }
directories = { workspace = true }
env_logger = { workspace = true }
Expand All @@ -35,10 +36,11 @@ itertools = { workspace = true }
log = { workspace = true }
serde-toml-merge = { workspace = true }
tempfile = { workspace = true }
tokio = { workspace = true, features = ["rt-multi-thread", "macros"] }
tokio = { workspace = true, features = ["fs", "rt-multi-thread", "macros"] }
toml = { workspace = true }
topiary = { path = "../topiary" }
tree-sitter-facade = { workspace = true }
tryvial = { workspace = true }

[dev-dependencies]
assert_cmd = { workspace = true }
14 changes: 13 additions & 1 deletion topiary-cli/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{error, fmt, io, process::ExitCode, result};
use std::{error, fmt, io, process::ExitCode, result, sync};
use topiary::FormatterError;

/// A convenience wrapper around `std::result::Result<T, TopiaryError>`.
Expand All @@ -18,6 +18,7 @@ pub enum TopiaryError {
pub enum CLIError {
IOError(io::Error),
Generic(Box<dyn error::Error>),
Multiple,
}
Xophmeister marked this conversation as resolved.
Show resolved Hide resolved

/// # Safety
Expand Down Expand Up @@ -48,6 +49,7 @@ impl error::Error for TopiaryError {
Self::Lib(error) => error.source(),
Self::Bin(_, Some(CLIError::IOError(error))) => Some(error),
Self::Bin(_, Some(CLIError::Generic(error))) => error.source(),
Self::Bin(_, Some(CLIError::Multiple)) => None,
Self::Bin(_, None) => None,
}
}
Expand All @@ -56,6 +58,9 @@ impl error::Error for TopiaryError {
impl From<TopiaryError> for ExitCode {
fn from(e: TopiaryError) -> Self {
let exit_code = match e {
// Multiple errors: Exit 9
TopiaryError::Bin(_, Some(CLIError::Multiple)) => 9,

// Idempotency parsing errors: Exit 8
TopiaryError::Lib(FormatterError::IdempotenceParsing(_)) => 8,

Expand Down Expand Up @@ -130,6 +135,13 @@ where
}
}

impl<T> From<sync::PoisonError<T>> for TopiaryError {
fn from(_: sync::PoisonError<T>) -> Self {
// TODO Pass the error into `CLIError::Generic`, without invalidating lifetime constraints
Self::Bin("Could not acquire cache lock".into(), None)
}
}

impl From<toml::de::Error> for TopiaryError {
fn from(e: toml::de::Error) -> Self {
TopiaryError::Bin(
Expand Down
64 changes: 35 additions & 29 deletions topiary-cli/src/io.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use std::{
borrow::Cow,
ffi::OsString,
fmt,
fs::File,
io::{stdin, stdout, BufReader, ErrorKind, Read, Result, Write},
io::{stdin, stdout, ErrorKind, Read, Result, Write},
path::{Path, PathBuf},
};

Expand All @@ -12,6 +12,7 @@ use topiary::{Configuration, Language, SupportedLanguage, TopiaryQuery};
use crate::{
cli::{AtLeastOneInput, ExactlyOneInput, FromStdin},
error::{CLIResult, TopiaryError},
language::LanguageDefinition,
};

type QueryPath = PathBuf;
Expand Down Expand Up @@ -61,11 +62,20 @@ impl From<&AtLeastOneInput> for InputFrom {
/// Each `InputFile` needs to locate its source (standard input or disk), such that its `io::Read`
/// implementation can do the right thing.
#[derive(Debug)]
enum InputSource {
pub enum InputSource {
Stdin,
Disk(PathBuf, Option<File>),
}

impl fmt::Display for InputSource {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Stdin => write!(f, "standard input"),
Self::Disk(path, _) => write!(f, "{}", path.to_string_lossy()),
}
}
}

/// An `InputFile` is the unit of input for Topiary, encapsulating everything needed for downstream
/// processing. It implements `io::Read`, so it can be passed directly to the Topiary API.
#[derive(Debug)]
Expand All @@ -75,40 +85,35 @@ pub struct InputFile<'cfg> {
query: QueryPath,
}

// TODO This feels like a leaky abstraction, but it's enough to satisfy the Topiary API...
impl<'cfg> InputFile<'cfg> {
/// Convert our `InputFile` into language definition values that Topiary can consume
pub async fn to_language_definition(
&self,
) -> CLIResult<(TopiaryQuery, Language, tree_sitter_facade::Language)> {
pub async fn to_language_definition(&self) -> CLIResult<LanguageDefinition> {
let grammar = self.language.grammar().await?;
let query = {
let mut reader = BufReader::new(File::open(&self.query)?);
let mut contents = String::new();
reader.read_to_string(&mut contents)?;

let contents = tokio::fs::read_to_string(&self.query).await?;
TopiaryQuery::new(&grammar, &contents)?
};

Ok((query, self.language.clone(), grammar))
Ok(LanguageDefinition {
query,
language: self.language.clone(),
grammar,
})
}

/// Expose input source, for logging
pub fn source(&self) -> Cow<str> {
match &self.source {
InputSource::Stdin => "standard input".into(),
InputSource::Disk(path, _) => path.to_string_lossy(),
}
/// Expose input source
pub fn source(&self) -> &InputSource {
&self.source
}

/// Expose language for input, for logging
pub fn language(&self) -> &str {
&self.language.name
/// Expose language for input
pub fn language(&self) -> &Language {
self.language
}

/// Expose query path for input, for logging
pub fn query(&self) -> Cow<str> {
self.query.to_string_lossy()
/// Expose query path for input
pub fn query(&self) -> &PathBuf {
&self.query
}
}

Expand Down Expand Up @@ -225,12 +230,13 @@ impl OutputFile {

Ok(())
}
}

/// Expose output sink, for logging
pub fn sink(&self) -> Cow<str> {
match &self {
Self::Stdout => "standard output".into(),
Self::Disk { output, .. } => output.to_string_lossy(),
impl fmt::Display for OutputFile {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Stdout => write!(f, "standard ouput"),
Self::Disk { output, .. } => write!(f, "{}", output.to_string_lossy()),
}
}
}
Expand Down
70 changes: 70 additions & 0 deletions topiary-cli/src/language.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
use std::{
collections::{hash_map::DefaultHasher, HashMap},
hash::{Hash, Hasher},
sync::{Arc, RwLock},
};

use topiary::{Language, TopiaryQuery};

use crate::{error::CLIResult, io::InputFile};

/// `LanguageDefinition` contains the necessary language-related values that the Topiary API
/// expects to do its job
pub struct LanguageDefinition {
pub query: TopiaryQuery,
pub language: Language,
pub grammar: tree_sitter_facade::Language,
}

/// Thread-safe language definition cache
pub struct LanguageDefinitionCache(RwLock<HashMap<u64, Arc<LanguageDefinition>>>);

impl LanguageDefinitionCache {
pub fn new() -> Self {
LanguageDefinitionCache(RwLock::new(HashMap::new()))
}

/// Fetch the language definition from the cache, populating if necessary, with thread-safety
// FIXME Locking is not working as expected. The read/write locks ensure atomicity -- so it's
// not UN-thread-safe -- but many threads/futures can pass the read lock and go into the write
// branch. Using a `Mutex` over the whole `HashMap`, rather than a `RwLock`, breaks the `Send`
// constraint of the async scope...
pub async fn fetch<'i>(&self, input: &'i InputFile<'i>) -> CLIResult<Arc<LanguageDefinition>> {
// There's no need to store the input's identifying information (language name and query)
// in the key, so we use its hash directly. This side-steps any awkward lifetime issues.
let key = {
let mut hash = DefaultHasher::new();
input.language().name.hash(&mut hash);
input.query().hash(&mut hash);

hash.finish()
};

// Return the language definition from the cache, behind a read lock, if it exists...
if let Some(lang_def) = self.0.read()?.get(&key) {
log::debug!(
"Cache {:p}: Hit at {:#016x} ({}, {})",
self,
key,
input.language(),
input.query().file_name().unwrap().to_string_lossy()
);

return Ok(Arc::clone(lang_def));
}

// ...otherwise, fetch the language definition, to populate the cache behind a write lock
let lang_def = Arc::new(input.to_language_definition().await?);
self.0.write()?.insert(key, Arc::clone(&lang_def));

log::debug!(
"Cache {:p}: Insert at {:#016x} ({}, {})",
self,
key,
input.language(),
input.query().file_name().unwrap().to_string_lossy()
);

Ok(lang_def)
}
}