From 2c19f9e4964d3182502ca1a796912f8c3307852b Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Mon, 27 Nov 2023 11:37:39 +0100 Subject: [PATCH] expr: refactor AST and parsing --- src/uu/expr/src/expr.rs | 87 ++-- src/uu/expr/src/syntax_tree.rs | 921 ++++++++++++++++----------------- src/uu/expr/src/tokens.rs | 147 ------ 3 files changed, 496 insertions(+), 659 deletions(-) delete mode 100644 src/uu/expr/src/tokens.rs diff --git a/src/uu/expr/src/expr.rs b/src/uu/expr/src/expr.rs index 909c4c37653..c271f0935fd 100644 --- a/src/uu/expr/src/expr.rs +++ b/src/uu/expr/src/expr.rs @@ -3,14 +3,19 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +use std::fmt::Display; + use clap::{crate_version, Arg, ArgAction, Command}; +use syntax_tree::AstNode; use uucore::{ - error::{UResult, USimpleError, UUsageError}, + display::Quotable, + error::{UError, UResult}, format_usage, help_about, help_section, help_usage, }; +use crate::syntax_tree::is_truthy; + mod syntax_tree; -mod tokens; mod options { pub const VERSION: &str = "version"; @@ -18,6 +23,51 @@ mod options { pub const EXPRESSION: &str = "expression"; } +pub type ExprResult = Result; + +#[derive(Debug, PartialEq, Eq)] +pub enum ExprError { + UnexpectedArgument(String), + MissingArgument(String), + NonIntegerArgument, + MissingOperand, + DivisionByZero, + InvalidRegexExpression, + ExpectedClosingBraceAfter(String), +} + +impl Display for ExprError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::UnexpectedArgument(s) => { + write!(f, "syntax error: unexpected argument {}", s.quote()) + } + Self::MissingArgument(s) => { + write!(f, "syntax error: missing argument after {}", s.quote()) + } + Self::NonIntegerArgument => write!(f, "non-integer argument"), + Self::MissingOperand => write!(f, "missing operand"), + Self::DivisionByZero => write!(f, "division by zero"), + Self::InvalidRegexExpression => write!(f, "Invalid regex expression"), + Self::ExpectedClosingBraceAfter(s) => { + write!(f, "expected ')' after {}", s.quote()) + } + } + } +} + +impl std::error::Error for ExprError {} + +impl UError for ExprError { + fn code(&self) -> i32 { + 2 + } + + fn usage(&self) -> bool { + *self == Self::MissingOperand + } +} + pub fn uu_app() -> Command { Command::new(uucore::util_name()) .version(crate_version!()) @@ -53,36 +103,15 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { // For expr utility we do not want getopts. // The following usage should work without escaping hyphens: `expr -15 = 1 + 2 \* \( 3 - -4 \)` let matches = uu_app().try_get_matches_from(args)?; - let token_strings = matches + let token_strings: Vec<&str> = matches .get_many::(options::EXPRESSION) .map(|v| v.into_iter().map(|s| s.as_ref()).collect::>()) .unwrap_or_default(); - if token_strings.is_empty() { - return Err(UUsageError::new(2, "missing operand")); - } - - match process_expr(&token_strings[..]) { - Ok(expr_result) => print_expr_ok(&expr_result), - Err(expr_error) => Err(USimpleError::new(2, &expr_error)), + let res = AstNode::parse(&token_strings)?.eval()?; + println!("{res}"); + if !is_truthy(&res) { + return Err(1.into()); } -} - -fn process_expr(token_strings: &[&str]) -> Result { - let maybe_tokens = tokens::strings_to_tokens(token_strings); - let maybe_ast = syntax_tree::tokens_to_ast(maybe_tokens); - evaluate_ast(maybe_ast) -} - -fn print_expr_ok(expr_result: &str) -> UResult<()> { - println!("{expr_result}"); - if expr_result.parse::() == Ok(0) || expr_result.is_empty() { - Err(1.into()) - } else { - Ok(()) - } -} - -fn evaluate_ast(maybe_ast: Result, String>) -> Result { - maybe_ast.and_then(|ast| ast.evaluate()) + Ok(()) } diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 2260b2e2186..9c4dcd832e7 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -11,566 +11,521 @@ // spell-checker:ignore (ToDO) ints paren prec multibytes use num_bigint::BigInt; -use num_traits::Zero; use onig::{Regex, RegexOptions, Syntax}; -use uucore::display::Quotable; -use crate::tokens::Token; +use crate::{ExprError, ExprResult}; -type TokenStack = Vec<(usize, Token)>; -pub type OperandsList = Vec>; +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BinOp { + Relation(RelationOp), + Numeric(NumericOp), + String(StringOp), +} -#[derive(Debug)] -pub enum AstNode { - Leaf { - token_idx: usize, - value: String, - }, - Node { - token_idx: usize, - op_type: String, - operands: OperandsList, - }, +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum RelationOp { + Lt, + Leq, + Eq, + Neq, + Gt, + Geq, } -impl AstNode { - fn debug_dump(&self) { - self.debug_dump_impl(1); - } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum NumericOp { + Add, + Sub, + Mul, + Div, + Mod, +} - fn debug_dump_impl(&self, depth: usize) { - for _ in 0..depth { - print!("\t",); - } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum StringOp { + Match, + Index, + And, + Or, +} + +impl BinOp { + fn eval(&self, left: &AstNode, right: &AstNode) -> ExprResult { match self { - Self::Leaf { token_idx, value } => println!( - "Leaf( {} ) at #{} ( evaluate -> {:?} )", - value, - token_idx, - self.evaluate() - ), - Self::Node { - token_idx, - op_type, - operands, - } => { - println!( - "Node( {} ) at #{} ( evaluate -> {:?} )", - op_type, - token_idx, - self.evaluate() - ); - for operand in operands { - operand.debug_dump_impl(depth + 1); - } - } + Self::Relation(op) => op.eval(left, right), + Self::Numeric(op) => op.eval(left, right), + Self::String(op) => op.eval(left, right), } } +} - fn new_node(token_idx: usize, op_type: &str, operands: OperandsList) -> Box { - Box::new(Self::Node { - token_idx, - op_type: op_type.into(), - operands, - }) - } - - fn new_leaf(token_idx: usize, value: &str) -> Box { - Box::new(Self::Leaf { - token_idx, - value: value.into(), - }) +impl RelationOp { + fn eval(&self, a: &AstNode, b: &AstNode) -> ExprResult { + let a = a.eval()?; + let b = b.eval()?; + let b = if let (Ok(a), Ok(b)) = (a.parse::(), b.parse::()) { + match self { + Self::Lt => a < b, + Self::Leq => a <= b, + Self::Eq => a == b, + Self::Neq => a != b, + Self::Gt => a > b, + Self::Geq => a >= b, + } + } else { + // These comparisons should be using locale settings + match self { + Self::Lt => a < b, + Self::Leq => a <= b, + Self::Eq => a == b, + Self::Neq => a != b, + Self::Gt => a > b, + Self::Geq => a >= b, + } + }; + if b { + Ok("1".into()) + } else { + Ok("0".into()) + } } +} - pub fn evaluate(&self) -> Result { - match self { - Self::Leaf { value, .. } => Ok(value.clone()), - Self::Node { op_type, .. } => match self.operand_values() { - Err(reason) => Err(reason), - Ok(operand_values) => match op_type.as_ref() { - "+" => { - infix_operator_two_ints(|a: BigInt, b: BigInt| Ok(a + b), &operand_values) - } - "-" => { - infix_operator_two_ints(|a: BigInt, b: BigInt| Ok(a - b), &operand_values) - } - "*" => { - infix_operator_two_ints(|a: BigInt, b: BigInt| Ok(a * b), &operand_values) - } - "/" => infix_operator_two_ints( - |a: BigInt, b: BigInt| { - if b.is_zero() { - Err("division by zero".to_owned()) - } else { - Ok(a / b) - } - }, - &operand_values, - ), - "%" => infix_operator_two_ints( - |a: BigInt, b: BigInt| { - if b.is_zero() { - Err("division by zero".to_owned()) - } else { - Ok(a % b) - } - }, - &operand_values, - ), - "=" => infix_operator_two_ints_or_two_strings( - |a: BigInt, b: BigInt| Ok(bool_as_int(a == b)), - |a: &String, b: &String| Ok(bool_as_string(a == b)), - &operand_values, - ), - "!=" => infix_operator_two_ints_or_two_strings( - |a: BigInt, b: BigInt| Ok(bool_as_int(a != b)), - |a: &String, b: &String| Ok(bool_as_string(a != b)), - &operand_values, - ), - "<" => infix_operator_two_ints_or_two_strings( - |a: BigInt, b: BigInt| Ok(bool_as_int(a < b)), - |a: &String, b: &String| Ok(bool_as_string(a < b)), - &operand_values, - ), - ">" => infix_operator_two_ints_or_two_strings( - |a: BigInt, b: BigInt| Ok(bool_as_int(a > b)), - |a: &String, b: &String| Ok(bool_as_string(a > b)), - &operand_values, - ), - "<=" => infix_operator_two_ints_or_two_strings( - |a: BigInt, b: BigInt| Ok(bool_as_int(a <= b)), - |a: &String, b: &String| Ok(bool_as_string(a <= b)), - &operand_values, - ), - ">=" => infix_operator_two_ints_or_two_strings( - |a: BigInt, b: BigInt| Ok(bool_as_int(a >= b)), - |a: &String, b: &String| Ok(bool_as_string(a >= b)), - &operand_values, - ), - "|" => Ok(infix_operator_or(&operand_values)), - "&" => Ok(infix_operator_and(&operand_values)), - ":" | "match" => operator_match(&operand_values), - "length" => Ok(prefix_operator_length(&operand_values)), - "index" => Ok(prefix_operator_index(&operand_values)), - "substr" => Ok(prefix_operator_substr(&operand_values)), - - _ => Err(format!("operation not implemented: {op_type}")), - }, +impl NumericOp { + fn eval(&self, left: &AstNode, right: &AstNode) -> ExprResult { + let a: BigInt = left + .eval()? + .parse() + .map_err(|_| ExprError::NonIntegerArgument)?; + let b: BigInt = right + .eval()? + .parse() + .map_err(|_| ExprError::NonIntegerArgument)?; + Ok(match self { + Self::Add => a + b, + Self::Sub => a - b, + Self::Mul => a * b, + Self::Div => match a.checked_div(&b) { + Some(x) => x, + None => return Err(ExprError::DivisionByZero), }, + Self::Mod => { + if a.checked_div(&b).is_none() { + return Err(ExprError::DivisionByZero); + }; + a % b + } } + .to_string()) } +} - pub fn operand_values(&self) -> Result, String> { - if let Self::Node { - operands, op_type, .. - } = self - { - let mut out = Vec::with_capacity(operands.len()); - let mut operands = operands.iter(); - - if let Some(value) = operands.next() { - let value = value.evaluate()?; - out.push(value.clone()); - // short-circuit evaluation for `|` and `&` - // push dummy to pass `assert!(values.len() == 2);` - match op_type.as_ref() { - "|" => { - if value_as_bool(&value) { - out.push(String::from("dummy")); - return Ok(out); - } - } - "&" => { - if !value_as_bool(&value) { - out.push(String::from("dummy")); - return Ok(out); +impl StringOp { + fn eval(&self, left: &AstNode, right: &AstNode) -> ExprResult { + match self { + Self::Or => { + let left = left.eval()?; + if is_truthy(&left) { + return Ok(left); + } + let right = right.eval()?; + if is_truthy(&right) { + return Ok(right); + } + Ok("0".into()) + } + Self::And => { + let left = left.eval()?; + if !is_truthy(&left) { + return Ok("0".into()); + } + let right = right.eval()?; + if !is_truthy(&right) { + return Ok("0".into()); + } + Ok(left) + } + Self::Match => { + let left = left.eval()?; + let right = right.eval()?; + let re_string = format!("^{}", &right); + let re = Regex::with_options( + &re_string, + RegexOptions::REGEX_OPTION_NONE, + Syntax::grep(), + ) + .map_err(|_| ExprError::InvalidRegexExpression)?; + Ok(if re.captures_len() > 0 { + re.captures(&left) + .map(|captures| captures.at(1).unwrap()) + .unwrap_or("") + .to_string() + } else { + re.find(&left) + .map_or("0".to_string(), |(start, end)| (end - start).to_string()) + }) + } + Self::Index => { + let left = left.eval()?; + let right = right.eval()?; + for (current_idx, ch_h) in left.chars().enumerate() { + for ch_n in right.chars() { + if ch_n == ch_h { + return Ok((current_idx + 1).to_string()); } } - _ => {} } + Ok("0".to_string()) } - - for operand in operands { - let value = operand.evaluate()?; - out.push(value); - } - Ok(out) - } else { - panic!("Invoked .operand_values(&self) not with ASTNode::Node") } } } -pub fn tokens_to_ast( - maybe_tokens: Result, String>, -) -> Result, String> { - maybe_tokens.and_then(|tokens| { - let mut out_stack: TokenStack = Vec::new(); - let mut op_stack: TokenStack = Vec::new(); - - for (token_idx, token) in tokens { - push_token_to_either_stack(token_idx, &token, &mut out_stack, &mut op_stack)?; - } - move_rest_of_ops_to_out(&mut out_stack, &mut op_stack)?; - assert!(op_stack.is_empty()); - - maybe_dump_rpn(&out_stack); - let result = ast_from_rpn(&mut out_stack); - if out_stack.is_empty() { - maybe_dump_ast(&result); - result - } else { - Err( - "syntax error (first RPN token does not represent the root of the expression AST)" - .to_owned(), - ) - } - }) +/// Precedence for infix binary operators +const PRECEDENCE: &[&[(&str, BinOp)]] = &[ + &[("|", BinOp::String(StringOp::Or))], + &[("&", BinOp::String(StringOp::And))], + &[ + ("<", BinOp::Relation(RelationOp::Lt)), + ("<=", BinOp::Relation(RelationOp::Leq)), + ("=", BinOp::Relation(RelationOp::Eq)), + ("!=", BinOp::Relation(RelationOp::Neq)), + (">=", BinOp::Relation(RelationOp::Geq)), + (">", BinOp::Relation(RelationOp::Gt)), + ], + &[ + ("+", BinOp::Numeric(NumericOp::Add)), + ("-", BinOp::Numeric(NumericOp::Sub)), + ], + &[ + ("*", BinOp::Numeric(NumericOp::Mul)), + ("/", BinOp::Numeric(NumericOp::Div)), + ("%", BinOp::Numeric(NumericOp::Mod)), + ], + &[(":", BinOp::String(StringOp::Match))], +]; + +#[derive(Debug, PartialEq, Eq)] +pub enum AstNode { + Leaf { + value: String, + }, + BinOp { + op_type: BinOp, + left: Box, + right: Box, + }, + Substr { + string: Box, + pos: Box, + length: Box, + }, + Length { + string: Box, + }, } -fn maybe_dump_ast(result: &Result, String>) { - use std::env; - if let Ok(debug_var) = env::var("EXPR_DEBUG_AST") { - if debug_var == "1" { - println!("EXPR_DEBUG_AST"); - match result { - Ok(ast) => ast.debug_dump(), - Err(reason) => println!("\terr: {reason:?}"), - } - } +impl AstNode { + pub fn parse(input: &[&str]) -> ExprResult { + Parser::new(input).parse() } -} -#[allow(clippy::ptr_arg)] -fn maybe_dump_rpn(rpn: &TokenStack) { - use std::env; - if let Ok(debug_var) = env::var("EXPR_DEBUG_RPN") { - if debug_var == "1" { - println!("EXPR_DEBUG_RPN"); - for token in rpn { - println!("\t{token:?}"); + pub fn eval(&self) -> ExprResult { + match self { + Self::Leaf { value, .. } => Ok(value.into()), + Self::BinOp { + op_type, + left, + right, + .. + } => op_type.eval(left, right), + Self::Substr { + string, + pos, + length, + .. + } => { + let string = string.eval()?; + + // The GNU docs say: + // + // > If either position or length is negative, zero, or + // > non-numeric, returns the null string. + // + // So we coerce errors into 0 to make that the only case we + // have to care about. + let pos: usize = pos.eval()?.parse().unwrap_or(0); + let length: usize = length.eval()?.parse().unwrap_or(0); + + let (Some(pos), Some(_)) = (pos.checked_sub(1), length.checked_sub(1)) else { + return Ok(String::new()); + }; + + Ok(string.chars().skip(pos).take(length).collect()) } + Self::Length { string, .. } => Ok(string.eval()?.chars().count().to_string()), } } } -fn ast_from_rpn(rpn: &mut TokenStack) -> Result, String> { - match rpn.pop() { - None => Err("syntax error (premature end of expression)".to_owned()), - Some((token_idx, Token::Value { value })) => Ok(AstNode::new_leaf(token_idx, &value)), +struct Parser<'a> { + input: &'a [&'a str], + index: usize, +} + +impl<'a> Parser<'a> { + fn new(input: &'a [&'a str]) -> Self { + Self { input, index: 0 } + } - Some((token_idx, Token::InfixOp { value, .. })) => { - maybe_ast_node(token_idx, &value, 2, rpn) + fn next(&mut self) -> ExprResult<&'a str> { + let next = self.input.get(self.index); + if let Some(next) = next { + self.index += 1; + Ok(next) + } else { + // The indexing won't panic, because we know that the input size + // is greater than zero. + Err(ExprError::MissingArgument( + self.input[self.index - 1].into(), + )) } + } - Some((token_idx, Token::PrefixOp { value, arity })) => { - maybe_ast_node(token_idx, &value, arity, rpn) + fn accept(&mut self, f: impl Fn(&str) -> Option) -> Option { + let next = self.input.get(self.index)?; + let tok = f(next); + if let Some(tok) = tok { + self.index += 1; + Some(tok) + } else { + None } + } - Some((token_idx, unexpected_token)) => { - panic!("unexpected token at #{token_idx} {unexpected_token:?}") + fn parse(&mut self) -> ExprResult { + if self.input.is_empty() { + return Err(ExprError::MissingOperand); + } + let res = self.parse_expression()?; + if let Some(arg) = self.input.get(self.index) { + return Err(ExprError::UnexpectedArgument(arg.to_string())); } + Ok(res) } -} -fn maybe_ast_node( - token_idx: usize, - op_type: &str, - arity: usize, - rpn: &mut TokenStack, -) -> Result, String> { - let mut operands = Vec::with_capacity(arity); - for _ in 0..arity { - let operand = ast_from_rpn(rpn)?; - operands.push(operand); + fn parse_expression(&mut self) -> ExprResult { + self.parse_precedence(0) } - operands.reverse(); - Ok(AstNode::new_node(token_idx, op_type, operands)) -} -fn move_rest_of_ops_to_out( - out_stack: &mut TokenStack, - op_stack: &mut TokenStack, -) -> Result<(), String> { - loop { - match op_stack.pop() { - None => return Ok(()), - Some((token_idx, Token::ParOpen)) => { - return Err(format!( - "syntax error (Mismatched open-parenthesis at #{token_idx})" - )) - } - Some((token_idx, Token::ParClose)) => { - return Err(format!( - "syntax error (Mismatched close-parenthesis at #{token_idx})" - )) + fn parse_op(&mut self, precedence: usize) -> Option { + self.accept(|s| { + for (op_string, op) in PRECEDENCE[precedence] { + if s == *op_string { + return Some(*op); + } } - Some(other) => out_stack.push(other), - } + None + }) } -} -fn push_token_to_either_stack( - token_idx: usize, - token: &Token, - out_stack: &mut TokenStack, - op_stack: &mut TokenStack, -) -> Result<(), String> { - let result = match token { - Token::Value { .. } => { - out_stack.push((token_idx, token.clone())); - Ok(()) + fn parse_precedence(&mut self, precedence: usize) -> ExprResult { + if precedence >= PRECEDENCE.len() { + return self.parse_simple_expression(); } - Token::InfixOp { .. } => { - if op_stack.is_empty() { - op_stack.push((token_idx, token.clone())); - Ok(()) - } else { - push_op_to_stack(token_idx, token, out_stack, op_stack) - } + let mut left = self.parse_precedence(precedence + 1)?; + while let Some(op) = self.parse_op(precedence) { + let right = self.parse_precedence(precedence + 1)?; + left = AstNode::BinOp { + op_type: op, + left: Box::new(left), + right: Box::new(right), + }; } + Ok(left) + } - Token::ParOpen => { - if out_stack.is_empty() { - op_stack.push((token_idx, token.clone())); - Ok(()) - } else { - Err("syntax error: unexpected argument '('".to_string()) + fn parse_simple_expression(&mut self) -> ExprResult { + let first = self.next()?; + Ok(match first { + "match" => { + let left = self.parse_expression()?; + let right = self.parse_expression()?; + AstNode::BinOp { + op_type: BinOp::String(StringOp::Match), + left: Box::new(left), + right: Box::new(right), + } } - } - - Token::PrefixOp { value, .. } => { - if out_stack.is_empty() { - op_stack.push((token_idx, token.clone())); - Ok(()) - } else { - Err(format!( - "syntax error: unexpected argument {}", - value.quote() - )) + "substr" => { + let string = self.parse_expression()?; + let pos = self.parse_expression()?; + let length = self.parse_expression()?; + AstNode::Substr { + string: Box::new(string), + pos: Box::new(pos), + length: Box::new(length), + } } - } - - Token::ParClose => move_till_match_paren(out_stack, op_stack), - }; - maybe_dump_shunting_yard_step(token_idx, token, out_stack, op_stack, &result); - result -} - -#[allow(clippy::ptr_arg)] -fn maybe_dump_shunting_yard_step( - token_idx: usize, - token: &Token, - out_stack: &TokenStack, - op_stack: &TokenStack, - result: &Result<(), String>, -) { - use std::env; - if let Ok(debug_var) = env::var("EXPR_DEBUG_SYA_STEP") { - if debug_var == "1" { - println!("EXPR_DEBUG_SYA_STEP"); - println!("\t{token_idx} => {token:?}"); - println!("\t\tout: {out_stack:?}"); - println!("\t\top : {op_stack:?}"); - println!("\t\tresult: {result:?}"); - } - } -} - -fn push_op_to_stack( - token_idx: usize, - token: &Token, - out_stack: &mut TokenStack, - op_stack: &mut TokenStack, -) -> Result<(), String> { - if let Token::InfixOp { - precedence: prec, - left_assoc: la, - .. - } = *token - { - loop { - match op_stack.last() { - None | Some(&(_, Token::ParOpen)) => { - op_stack.push((token_idx, token.clone())); - return Ok(()); + "index" => { + let left = self.parse_expression()?; + let right = self.parse_expression()?; + AstNode::BinOp { + op_type: BinOp::String(StringOp::Index), + left: Box::new(left), + right: Box::new(right), } - - Some(&( - _, - Token::InfixOp { - precedence: prev_prec, - .. - }, - )) => { - if la && prev_prec >= prec || !la && prev_prec > prec { - out_stack.push(op_stack.pop().unwrap()); - } else { - op_stack.push((token_idx, token.clone())); - return Ok(()); - } + } + "length" => { + let string = self.parse_expression()?; + AstNode::Length { + string: Box::new(string), } - - Some(&(_, Token::PrefixOp { .. })) => { - op_stack.push((token_idx, token.clone())); - return Ok(()); + } + "+" => AstNode::Leaf { + value: self.next()?.into(), + }, + "(" => { + let s = self.parse_expression()?; + let close_paren = self.next()?; + if close_paren != ")" { + // Since we have parsed at least a '(', there will be a token + // at `self.index - 1`. So this indexing won't panic. + return Err(ExprError::ExpectedClosingBraceAfter( + self.input[self.index - 1].into(), + )); } - - Some(_) => panic!("Non-operator on op_stack"), + s } - } - } else { - panic!("Expected infix-op") + s => AstNode::Leaf { value: s.into() }, + }) } } -fn move_till_match_paren( - out_stack: &mut TokenStack, - op_stack: &mut TokenStack, -) -> Result<(), String> { - loop { - let op = op_stack - .pop() - .ok_or_else(|| "syntax error (Mismatched close-parenthesis)".to_string())?; - match op { - (_, Token::ParOpen) => return Ok(()), - other => out_stack.push(other), - } +/// Determine whether `expr` should evaluate the string as "truthy" +/// +/// Truthy strings are either empty or match the regex "-?0+". +pub fn is_truthy(s: &str) -> bool { + // Edge case: `-` followed by nothing is truthy + if s == "-" { + return true; } + + let mut bytes = s.bytes(); + + // Empty string is falsy + let Some(first) = bytes.next() else { + return false; + }; + + let is_zero = (first == b'-' || first == b'0') && bytes.all(|b| b == b'0'); + !is_zero } -fn infix_operator_two_ints(f: F, values: &[String]) -> Result -where - F: Fn(BigInt, BigInt) -> Result, -{ - assert!(values.len() == 2); - if let Ok(left) = values[0].parse::() { - if let Ok(right) = values[1].parse::() { - return f(left, right).map(|big_int| big_int.to_string()); +#[cfg(test)] +mod test { + use super::{AstNode, BinOp, NumericOp, RelationOp, StringOp}; + + impl From<&str> for AstNode { + fn from(value: &str) -> Self { + Self::Leaf { + value: value.into(), + } } } - Err("Expected an integer operand".to_string()) -} -fn infix_operator_two_ints_or_two_strings( - fi: FI, - fs: FS, - values: &[String], -) -> Result -where - FI: Fn(BigInt, BigInt) -> Result, - FS: Fn(&String, &String) -> Result, -{ - assert!(values.len() == 2); - if let (Some(a_int), Some(b_int)) = ( - values[0].parse::().ok(), - values[1].parse::().ok(), - ) { - match fi(a_int, b_int) { - Ok(result) => Ok(result.to_string()), - Err(reason) => Err(reason), + fn op(op_type: BinOp, left: impl Into, right: impl Into) -> AstNode { + AstNode::BinOp { + op_type, + left: Box::new(left.into()), + right: Box::new(right.into()), } - } else { - fs(&values[0], &values[1]) } -} -fn infix_operator_or(values: &[String]) -> String { - assert!(values.len() == 2); - if value_as_bool(&values[0]) { - values[0].clone() - } else if value_as_bool(&values[1]) { - values[1].clone() - } else { - 0.to_string() + fn length(string: impl Into) -> AstNode { + AstNode::Length { + string: Box::new(string.into()), + } } -} -fn infix_operator_and(values: &[String]) -> String { - assert!(values.len() == 2); - if value_as_bool(&values[0]) && value_as_bool(&values[1]) { - values[0].clone() - } else { - 0.to_string() + fn substr( + string: impl Into, + pos: impl Into, + length: impl Into, + ) -> AstNode { + AstNode::Substr { + string: Box::new(string.into()), + pos: Box::new(pos.into()), + length: Box::new(length.into()), + } } -} -fn operator_match(values: &[String]) -> Result { - assert!(values.len() == 2); - let re_string = format!("^{}", &values[1]); - let re = Regex::with_options(&re_string, RegexOptions::REGEX_OPTION_NONE, Syntax::grep()) - .map_err(|err| err.description().to_string())?; - Ok(if re.captures_len() > 0 { - re.captures(&values[0]) - .map(|captures| captures.at(1).unwrap()) - .unwrap_or("") - .to_string() - } else { - re.find(&values[0]) - .map_or("0".to_string(), |(start, end)| (end - start).to_string()) - }) -} - -fn prefix_operator_length(values: &[String]) -> String { - assert!(values.len() == 1); - // Use chars().count() as we can have some multibytes chars - // See https://github.com/uutils/coreutils/issues/3132 - values[0].chars().count().to_string() -} - -fn prefix_operator_index(values: &[String]) -> String { - assert!(values.len() == 2); - let haystack = &values[0]; - let needles = &values[1]; - - for (current_idx, ch_h) in haystack.chars().enumerate() { - for ch_n in needles.chars() { - if ch_n == ch_h { - return (current_idx + 1).to_string(); - } + #[test] + fn infix_operators() { + let cases = [ + ("|", BinOp::String(StringOp::Or)), + ("&", BinOp::String(StringOp::And)), + ("<", BinOp::Relation(RelationOp::Lt)), + ("<=", BinOp::Relation(RelationOp::Leq)), + ("=", BinOp::Relation(RelationOp::Eq)), + ("!=", BinOp::Relation(RelationOp::Neq)), + (">=", BinOp::Relation(RelationOp::Geq)), + (">", BinOp::Relation(RelationOp::Gt)), + ("+", BinOp::Numeric(NumericOp::Add)), + ("-", BinOp::Numeric(NumericOp::Sub)), + ("*", BinOp::Numeric(NumericOp::Mul)), + ("/", BinOp::Numeric(NumericOp::Div)), + ("%", BinOp::Numeric(NumericOp::Mod)), + (":", BinOp::String(StringOp::Match)), + ]; + for (string, value) in cases { + assert_eq!(AstNode::parse(&["1", string, "2"]), Ok(op(value, "1", "2"))); } } - "0".to_string() -} - -fn prefix_operator_substr(values: &[String]) -> String { - assert!(values.len() == 3); - let subj = &values[0]; - let idx = match values[1] - .parse::() - .ok() - .and_then(|v| v.checked_sub(1)) - { - Some(i) => i, - None => return String::new(), - }; - let len = match values[2].parse::() { - Ok(i) => i, - Err(_) => return String::new(), - }; - - subj.chars().skip(idx).take(len).collect() -} - -fn bool_as_int(b: bool) -> u8 { - u8::from(b) -} -fn bool_as_string(b: bool) -> String { - if b { - "1".to_string() - } else { - "0".to_string() + #[test] + fn other_operators() { + assert_eq!( + AstNode::parse(&["match", "1", "2"]), + Ok(op(BinOp::String(StringOp::Match), "1", "2")), + ); + assert_eq!( + AstNode::parse(&["index", "1", "2"]), + Ok(op(BinOp::String(StringOp::Index), "1", "2")), + ); + assert_eq!(AstNode::parse(&["length", "1"]), Ok(length("1")),); + assert_eq!( + AstNode::parse(&["substr", "1", "2", "3"]), + Ok(substr("1", "2", "3")), + ); } -} -fn value_as_bool(s: &str) -> bool { - if s.is_empty() { - return false; - } - match s.parse::() { - Ok(n) => n != Zero::zero(), - Err(_) => true, + #[test] + fn precedence() { + assert_eq!( + AstNode::parse(&["1", "+", "2", "*", "3"]), + Ok(op( + BinOp::Numeric(NumericOp::Add), + "1", + op(BinOp::Numeric(NumericOp::Mul), "2", "3") + )) + ); + assert_eq!( + AstNode::parse(&["(", "1", "+", "2", ")", "*", "3"]), + Ok(op( + BinOp::Numeric(NumericOp::Mul), + op(BinOp::Numeric(NumericOp::Add), "1", "2"), + "3" + )) + ); + assert_eq!( + AstNode::parse(&["1", "*", "2", "+", "3"]), + Ok(op( + BinOp::Numeric(NumericOp::Add), + op(BinOp::Numeric(NumericOp::Mul), "1", "2"), + "3" + )), + ); } } diff --git a/src/uu/expr/src/tokens.rs b/src/uu/expr/src/tokens.rs deleted file mode 100644 index f499881c138..00000000000 --- a/src/uu/expr/src/tokens.rs +++ /dev/null @@ -1,147 +0,0 @@ -// This file is part of the uutils coreutils package. -// -// For the full copyright and license information, please view the LICENSE -// file that was distributed with this source code. - -//! -//! The following tokens are present in the expr grammar: -//! * integer literal; -//! * string literal; -//! * infix binary operators; -//! * prefix operators. -//! -//! According to the man-page of expr we have expression split into tokens (each token -- separate CLI-argument). -//! Hence all we need is to map the strings into the Token structures, except for some ugly fiddling with +-escaping. -//! - -// spell-checker:ignore (ToDO) paren - -#[derive(Debug, Clone)] -pub enum Token { - Value { - value: String, - }, - - ParOpen, - ParClose, - - InfixOp { - precedence: u8, - left_assoc: bool, - value: String, - }, - - PrefixOp { - arity: usize, - value: String, - }, -} - -impl Token { - fn new_infix_op(v: &str, left_assoc: bool, precedence: u8) -> Self { - Self::InfixOp { - left_assoc, - precedence, - value: v.into(), - } - } - - fn new_value(v: &str) -> Self { - Self::Value { value: v.into() } - } - - fn is_infix_plus(&self) -> bool { - match self { - Self::InfixOp { value, .. } => value == "+", - _ => false, - } - } - - fn is_a_value(&self) -> bool { - matches!(*self, Self::Value { .. }) - } - - fn is_a_close_paren(&self) -> bool { - matches!(*self, Self::ParClose) - } -} - -pub fn strings_to_tokens(strings: &[&str]) -> Result, String> { - let mut tokens_acc = Vec::with_capacity(strings.len()); - let mut tok_idx = 1; - - for s in strings { - let token_if_not_escaped = match *s { - "(" => Token::ParOpen, - ")" => Token::ParClose, - - "^" => Token::new_infix_op(s, false, 7), - - ":" => Token::new_infix_op(s, true, 6), - - "*" | "/" | "%" => Token::new_infix_op(s, true, 5), - - "+" | "-" => Token::new_infix_op(s, true, 4), - - "=" | "!=" | "<" | ">" | "<=" | ">=" => Token::new_infix_op(s, true, 3), - - "&" => Token::new_infix_op(s, true, 2), - - "|" => Token::new_infix_op(s, true, 1), - - "match" | "index" => Token::PrefixOp { - arity: 2, - value: s.to_string(), - }, - "substr" => Token::PrefixOp { - arity: 3, - value: s.to_string(), - }, - "length" => Token::PrefixOp { - arity: 1, - value: s.to_string(), - }, - - _ => Token::new_value(s), - }; - push_token_if_not_escaped(&mut tokens_acc, tok_idx, token_if_not_escaped, s); - tok_idx += 1; - } - maybe_dump_tokens_acc(&tokens_acc); - - Ok(tokens_acc) -} - -fn maybe_dump_tokens_acc(tokens_acc: &[(usize, Token)]) { - use std::env; - - if let Ok(debug_var) = env::var("EXPR_DEBUG_TOKENS") { - if debug_var == "1" { - println!("EXPR_DEBUG_TOKENS"); - for token in tokens_acc { - println!("\t{token:?}"); - } - } - } -} - -fn push_token_if_not_escaped(acc: &mut Vec<(usize, Token)>, tok_idx: usize, token: Token, s: &str) { - // `+` may be escaped such as `expr + 1` and `expr 1 + + 1` - let prev_is_plus = match acc.last() { - None => false, - Some(t) => t.1.is_infix_plus(), - }; - let should_use_as_escaped = if prev_is_plus && acc.len() >= 2 { - let pre_prev = &acc[acc.len() - 2]; - !(pre_prev.1.is_a_value() || pre_prev.1.is_a_close_paren()) - } else { - prev_is_plus - }; - - if should_use_as_escaped { - acc.pop(); - acc.push((tok_idx, Token::new_value(s))); - } else { - acc.push((tok_idx, token)); - } -}