diff --git a/.vscode/cspell.dictionaries/workspace.wordlist.txt b/.vscode/cspell.dictionaries/workspace.wordlist.txt index 7242199a5c..85c8bf3bd3 100644 --- a/.vscode/cspell.dictionaries/workspace.wordlist.txt +++ b/.vscode/cspell.dictionaries/workspace.wordlist.txt @@ -68,6 +68,7 @@ splitn trunc # * uutils +basenc chcon chgrp chmod diff --git a/Cargo.lock b/Cargo.lock index a9ec6d7264..d5316d72df 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -308,6 +308,7 @@ dependencies = [ "uu_base32", "uu_base64", "uu_basename", + "uu_basenc", "uu_cat", "uu_chgrp", "uu_chmod", @@ -595,9 +596,29 @@ checksum = "ef8ae57c4978a2acd8b869ce6b9ca1dfe817bff704c220209fdef2c0b75a01b9" [[package]] name = "data-encoding" -version = "2.1.2" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ee2393c4a91429dffb4bedf19f4d6abf27d8a732c8ce4980305d782e5426d57" + +[[package]] +name = "data-encoding-macro" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86927b7cd2fe88fa698b87404b287ab98d1a0063a34071d92e575b72d3029aca" +dependencies = [ + "data-encoding", + "data-encoding-macro-internal", +] + +[[package]] +name = "data-encoding-macro-internal" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f47ca1860a761136924ddd2422ba77b2ea54fe8cc75b9040804a0d9d32ad97" +checksum = "a5bbed42daaa95e780b60a50546aa345b8413a1e46f9a40a12907d3598f038db" +dependencies = [ + "data-encoding", + "syn", +] [[package]] name = "diff" @@ -1959,6 +1980,16 @@ dependencies = [ "uucore_procs", ] +[[package]] +name = "uu_basenc" +version = "0.0.7" +dependencies = [ + "clap", + "uu_base32", + "uucore", + "uucore_procs", +] + [[package]] name = "uu_cat" version = "0.0.7" @@ -2990,6 +3021,7 @@ version = "0.0.9" dependencies = [ "clap", "data-encoding", + "data-encoding-macro", "dns-lookup", "dunce", "getopts", @@ -3002,6 +3034,7 @@ dependencies = [ "time", "wild", "winapi 0.3.9", + "z85", ] [[package]] @@ -3129,3 +3162,9 @@ checksum = "244c3741f4240ef46274860397c7c74e50eb23624996930e484c16679633a54c" dependencies = [ "libc", ] + +[[package]] +name = "z85" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ac8b56e4f9906a4ef5412875e9ce448364023335cec645fd457ecf51d4f2781" diff --git a/Cargo.toml b/Cargo.toml index eb3f99190f..7352b10f9f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,6 +35,7 @@ feat_common_core = [ "base32", "base64", "basename", + "basenc", "cat", "cksum", "comm", @@ -245,6 +246,7 @@ arch = { optional=true, version="0.0.7", package="uu_arch", path="src/uu/arc base32 = { optional=true, version="0.0.7", package="uu_base32", path="src/uu/base32" } base64 = { optional=true, version="0.0.7", package="uu_base64", path="src/uu/base64" } basename = { optional=true, version="0.0.7", package="uu_basename", path="src/uu/basename" } +basenc = { optional=true, version="0.0.7", package="uu_basenc", path="src/uu/basenc" } cat = { optional=true, version="0.0.7", package="uu_cat", path="src/uu/cat" } chgrp = { optional=true, version="0.0.7", package="uu_chgrp", path="src/uu/chgrp" } chmod = { optional=true, version="0.0.7", package="uu_chmod", path="src/uu/chmod" } diff --git a/README.md b/README.md index 23e4c8cd34..f31dcf5b6c 100644 --- a/README.md +++ b/README.md @@ -369,20 +369,21 @@ To contribute to uutils, please see [CONTRIBUTING](CONTRIBUTING.md). | base32 | date | runcon | | base64 | dd | stty | | basename | df | | -| cat | expr | | -| chgrp | install | | -| chmod | join | | -| chown | ls | | -| chroot | more | | -| cksum | numfmt | | -| comm | od (`--strings` and 128-bit data types missing) | -| csplit | pr | | -| cut | printf | | -| dircolors | sort | | -| dirname | split | | -| du | tac | | -| echo | tail | | -| env | test | | +| basenc | expr | | +| cat | install | | +| chgrp | join | | +| chmod | ls | | +| chown | more | | +| chroot | numfmt | | +| cksum | od (`--strings` and 128-bit data types missing) | +| comm | pr | | +| csplit | printf | | +| cut | sort | | +| dircolors | split | | +| dirname | tac | | +| du | tail | | +| echo | test | | +| env | | | | expand | | | | factor | | | | false | | | diff --git a/src/uu/base32/src/base_common.rs b/src/uu/base32/src/base_common.rs index 4f15db6bfc..6150933114 100644 --- a/src/uu/base32/src/base_common.rs +++ b/src/uu/base32/src/base_common.rs @@ -34,7 +34,7 @@ pub mod options { } impl Config { - fn from(app_name: &str, options: clap::ArgMatches) -> Result { + pub fn from(app_name: &str, options: &clap::ArgMatches) -> Result { let file: Option = match options.values_of(options::FILE) { Some(mut values) => { let name = values.next().unwrap(); @@ -85,7 +85,7 @@ pub fn parse_base_cmd_args( let arg_list = args .collect_str(InvalidEncodingHandling::ConvertLossy) .accept_any(); - Config::from(name, app.get_matches_from(arg_list)) + Config::from(name, &app.get_matches_from(arg_list)) } pub fn base_app<'a>(name: &str, version: &'a str, about: &'a str) -> App<'static, 'a> { @@ -145,8 +145,18 @@ pub fn handle_input( } if !decode { - let encoded = data.encode(); - wrap_print(&data, encoded); + match data.encode() { + Ok(s) => { + wrap_print(&data, s); + } + Err(_) => { + eprintln!( + "{}: error: invalid input (length must be multiple of 4 characters)", + name + ); + exit!(1) + } + } } else { match data.decode() { Ok(s) => { diff --git a/src/uu/basenc/Cargo.toml b/src/uu/basenc/Cargo.toml new file mode 100644 index 0000000000..17cf0ec18a --- /dev/null +++ b/src/uu/basenc/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "uu_basenc" +version = "0.0.7" +authors = ["uutils developers"] +license = "MIT" +description = "basenc ~ (uutils) decode/encode input" + +homepage = "https://github.com/uutils/coreutils" +repository = "https://github.com/uutils/coreutils/tree/master/src/uu/basenc" +keywords = ["coreutils", "uutils", "cross-platform", "cli", "utility"] +categories = ["command-line-utilities"] +edition = "2018" + +[lib] +path = "src/basenc.rs" + +[dependencies] +clap = { version = "2.33", features = ["wrap_help"] } +uucore = { version=">=0.0.9", package="uucore", path="../../uucore", features = ["encoding"] } +uucore_procs = { version=">=0.0.6", package="uucore_procs", path="../../uucore_procs" } +uu_base32 = { version=">=0.0.6", package="uu_base32", path="../base32"} + +[[bin]] +name = "basenc" +path = "src/main.rs" diff --git a/src/uu/basenc/src/basenc.rs b/src/uu/basenc/src/basenc.rs new file mode 100644 index 0000000000..e1daea4e67 --- /dev/null +++ b/src/uu/basenc/src/basenc.rs @@ -0,0 +1,95 @@ +// This file is part of the uutils coreutils package. +// +// (c) Jordy Dickinson +// (c) Jian Zeng +// +// For the full copyright and license information, please view the LICENSE file +// that was distributed with this source code. + +//spell-checker:ignore (args) lsbf msbf + +#[macro_use] +extern crate uucore; + +use clap::{crate_version, App, Arg}; +use uu_base32::base_common::{self, Config}; + +use uucore::{encoding::Format, InvalidEncodingHandling}; + +use std::io::{stdin, Read}; + +static ABOUT: &str = " + With no FILE, or when FILE is -, read standard input. + + When decoding, the input may contain newlines in addition to the bytes of + the formal alphabet. Use --ignore-garbage to attempt to recover + from any other non-alphabet bytes in the encoded stream. +"; + +static BASE_CMD_PARSE_ERROR: i32 = 1; + +const ENCODINGS: &[(&str, Format)] = &[ + ("base64", Format::Base64), + ("base64url", Format::Base64Url), + ("base32", Format::Base32), + ("base32hex", Format::Base32Hex), + ("base16", Format::Base16), + ("base2lsbf", Format::Base2Lsbf), + ("base2msbf", Format::Base2Msbf), + ("z85", Format::Z85), + // common abbreviations. TODO: once we have clap 3.0 we can use `AppSettings::InferLongArgs` to get all abbreviations automatically + ("base2l", Format::Base2Lsbf), + ("base2m", Format::Base2Msbf), +]; + +fn get_usage() -> String { + format!("{0} [OPTION]... [FILE]", executable!()) +} + +pub fn uu_app() -> App<'static, 'static> { + let mut app = base_common::base_app(executable!(), crate_version!(), ABOUT); + for encoding in ENCODINGS { + app = app.arg(Arg::with_name(encoding.0).long(encoding.0)); + } + app +} + +fn parse_cmd_args(args: impl uucore::Args) -> (Config, Format) { + let usage = get_usage(); + let matches = uu_app().usage(&usage[..]).get_matches_from( + args.collect_str(InvalidEncodingHandling::ConvertLossy) + .accept_any(), + ); + let format = ENCODINGS + .iter() + .find(|encoding| matches.is_present(encoding.0)) + .unwrap_or_else(|| { + show_usage_error!("missing encoding type"); + std::process::exit(1) + }) + .1; + ( + Config::from("basenc", &matches).unwrap_or_else(|s| crash!(BASE_CMD_PARSE_ERROR, "{}", s)), + format, + ) +} + +pub fn uumain(args: impl uucore::Args) -> i32 { + let name = executable!(); + let (config, format) = parse_cmd_args(args); + // Create a reference to stdin so we can return a locked stdin from + // parse_base_cmd_args + let stdin_raw = stdin(); + let mut input: Box = base_common::get_input(&config, &stdin_raw); + + base_common::handle_input( + &mut input, + format, + config.wrap_cols, + config.ignore_garbage, + config.decode, + name, + ); + + 0 +} diff --git a/src/uu/basenc/src/main.rs b/src/uu/basenc/src/main.rs new file mode 100644 index 0000000000..9a9a5f4c62 --- /dev/null +++ b/src/uu/basenc/src/main.rs @@ -0,0 +1 @@ +uucore_procs::main!(uu_basenc); diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index d62d893934..4067aae52d 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -27,7 +27,9 @@ nix = { version="<= 0.13", optional=true } platform-info = { version="<= 0.1", optional=true } time = { version="<= 0.1.43", optional=true } # * "problem" dependencies (pinned) -data-encoding = { version="~2.1", optional=true } ## data-encoding: require v2.1; but v2.2.0 breaks the build for MinSRV v1.31.0 +data-encoding = { version="2.1", optional=true } +data-encoding-macro = { version="0.1.12", optional=true } +z85 = { version="3.0.3", optional=true } libc = { version="0.2.15, <= 0.2.85", optional=true } ## libc: initial utmp support added in v0.2.15; but v0.2.68 breaks the build for MinSRV v1.31.0 [dev-dependencies] @@ -43,7 +45,7 @@ termion = "1.5" [features] default = [] # * non-default features -encoding = ["data-encoding", "thiserror"] +encoding = ["data-encoding", "data-encoding-macro", "z85", "thiserror"] entries = ["libc"] fs = ["libc"] fsext = ["libc", "time"] diff --git a/src/uucore/src/lib/features/encoding.rs b/src/uucore/src/lib/features/encoding.rs index 03fa0ed8b4..1008666092 100644 --- a/src/uucore/src/lib/features/encoding.rs +++ b/src/uucore/src/lib/features/encoding.rs @@ -5,45 +5,95 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore (strings) ABCDEFGHIJKLMNOPQRSTUVWXYZ +// spell-checker:ignore (strings) ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFGHIJKLMNOPQRSTUV +// spell-checker:ignore (encodings) lsbf msbf hexupper -extern crate data_encoding; - -use self::data_encoding::{DecodeError, BASE32, BASE64}; +use data_encoding::{self, BASE32, BASE64}; use std::io::{self, Read, Write}; +use data_encoding::{Encoding, BASE32HEX, BASE64URL, HEXUPPER}; +use data_encoding_macro::new_encoding; #[cfg(feature = "thiserror")] use thiserror::Error; #[derive(Debug, Error)] -pub enum EncodingError { +pub enum DecodeError { + #[error("{}", _0)] + Decode(#[from] data_encoding::DecodeError), #[error("{}", _0)] - Decode(#[from] DecodeError), + DecodeZ85(#[from] z85::DecodeError), #[error("{}", _0)] Io(#[from] io::Error), } -pub type DecodeResult = Result, EncodingError>; +pub enum EncodeError { + Z85InputLenNotMultipleOf4, +} + +pub type DecodeResult = Result, DecodeError>; #[derive(Clone, Copy)] pub enum Format { - Base32, Base64, + Base64Url, + Base32, + Base32Hex, + Base16, + Base2Lsbf, + Base2Msbf, + Z85, } use self::Format::*; -pub fn encode(f: Format, input: &[u8]) -> String { - match f { +const BASE2LSBF: Encoding = new_encoding! { + symbols: "01", + bit_order: LeastSignificantFirst, +}; +const BASE2MSBF: Encoding = new_encoding! { + symbols: "01", + bit_order: MostSignificantFirst, +}; + +pub fn encode(f: Format, input: &[u8]) -> Result { + Ok(match f { Base32 => BASE32.encode(input), Base64 => BASE64.encode(input), - } + Base64Url => BASE64URL.encode(input), + Base32Hex => BASE32HEX.encode(input), + Base16 => HEXUPPER.encode(input), + Base2Lsbf => BASE2LSBF.encode(input), + Base2Msbf => BASE2MSBF.encode(input), + Z85 => { + // According to the spec we should not accept inputs whose len is not a multiple of 4. + // However, the z85 crate implements a padded encoding and accepts such inputs. We have to manually check for them. + if input.len() % 4 != 0 { + return Err(EncodeError::Z85InputLenNotMultipleOf4); + } else { + z85::encode(input) + } + } + }) } pub fn decode(f: Format, input: &[u8]) -> DecodeResult { Ok(match f { Base32 => BASE32.decode(input)?, Base64 => BASE64.decode(input)?, + Base64Url => BASE64URL.decode(input)?, + Base32Hex => BASE32HEX.decode(input)?, + Base16 => HEXUPPER.decode(input)?, + Base2Lsbf => BASE2LSBF.decode(input)?, + Base2Msbf => BASE2MSBF.decode(input)?, + Z85 => { + // The z85 crate implements a padded encoding by using a leading '#' which is otherwise not allowed. + // We manually check for a leading '#' and return an error ourselves. + if input.starts_with(&[b'#']) { + return Err(z85::DecodeError::InvalidByte(0, b'#').into()); + } else { + z85::decode(input)? + } + } }) } @@ -65,6 +115,12 @@ impl Data { alphabet: match format { Base32 => b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567=", Base64 => b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=+/", + Base64Url => b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=_-", + Base32Hex => b"0123456789ABCDEFGHIJKLMNOPQRSTUV=", + Base16 => b"0123456789ABCDEF", + Base2Lsbf => b"01", + Base2Msbf => b"01", + Z85 => b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#", }, } } @@ -90,7 +146,7 @@ impl Data { decode(self.format, &buf) } - pub fn encode(&mut self) -> String { + pub fn encode(&mut self) -> Result { let mut buf: Vec = vec![]; self.input.read_to_end(&mut buf).unwrap(); encode(self.format, buf.as_slice()) diff --git a/tests/by-util/test_basenc.rs b/tests/by-util/test_basenc.rs new file mode 100644 index 0000000000..ba9eed1cd8 --- /dev/null +++ b/tests/by-util/test_basenc.rs @@ -0,0 +1,16 @@ +use crate::common::util::*; + +#[test] +fn test_z85_not_padded() { + // The z85 crate deviates from the standard in some cases; we have to catch those + new_ucmd!() + .args(&["--z85", "-d"]) + .pipe_in("##########") + .fails() + .stderr_only("basenc: error: invalid input"); + new_ucmd!() + .args(&["--z85"]) + .pipe_in("123") + .fails() + .stderr_only("basenc: error: invalid input (length must be multiple of 4 characters)"); +} diff --git a/util/build-gnu.sh b/util/build-gnu.sh index 5468cd3ea8..4d70ceccc1 100644 --- a/util/build-gnu.sh +++ b/util/build-gnu.sh @@ -123,6 +123,14 @@ test -f "${BUILDDIR}/getlimits" || cp src/getlimits "${BUILDDIR}" # When decoding an invalid base32/64 string, gnu writes everything it was able to decode until # it hit the decode error, while we don't write anything if the input is invalid. sed -i "s/\(baddecode.*OUT=>\"\).*\"/\1\"/g" tests/misc/base64.pl +sed -i "s/\(\(b2[ml]_[69]\|b32h_[56]\|z85_8\|z85_35\).*OUT=>\)[^}]*\(.*\)/\1\"\"\3/g" tests/misc/basenc.pl + +# add "error: " to the expected error message +sed -i "s/\$prog: invalid input/\$prog: error: invalid input/g" tests/misc/basenc.pl + +# basenc: swap out error message for unexpected arg +sed -i "s/ {ERR=>\"\$prog: foobar\\\\n\" \. \$try_help }/ {ERR=>\"error: Found argument '--foobar' which wasn't expected, or isn't valid in this context\n\nUSAGE:\n basenc [OPTION]... [FILE]\n\nFor more information try --help\n\"}]/" tests/misc/basenc.pl +sed -i "s/ {ERR_SUBST=>\"s\/(unrecognized|unknown) option \[-' \]\*foobar\[' \]\*\/foobar\/\"}],//" tests/misc/basenc.pl # Remove the check whether a util was built. Otherwise tests against utils like "arch" are not run. sed -i "s|require_built_ |# require_built_ |g" init.cfg