From f55ef18bab837f1bf49814d902638c553d140e79 Mon Sep 17 00:00:00 2001 From: Juan Jose Comellas Date: Fri, 20 Jul 2012 11:00:25 -0300 Subject: [PATCH] Expose getopt:tokenize/1 and command-line literals --- README.md | 36 ++++++++++++++++++++++++++++++++ src/getopt.erl | 49 ++++++++++++++++++++++---------------------- test/getopt_test.erl | 11 +++++++++- 3 files changed, 70 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 06e8a95..3478a4e 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,8 @@ The *getopt* module provides four functions: parse([{Name, Short, Long, ArgSpec, Help}], Args :: string() | [string()]) -> {ok, {Options, NonOptionArgs}} | {error, {Reason, Data}} +tokenize(CmdLine :: string()) -> [string()] + usage([{Name, Short, Long, ArgSpec, Help}], ProgramName :: string()) -> ok usage([{Name, Short, Long, ArgSpec, Help}], ProgramName :: string(), @@ -137,6 +139,21 @@ Will return: ["dummy1","dummy2"]}} ``` +The ``tokenize/1`` function will separate a command line string into +tokens, taking into account whether an argument is single or double +quoted, a character is escaped or there are environment variables to +be expanded. e.g.: + +``` erlang +getopt:tokenize(" --name John\\ Smith --path \"John's Files\" -u ${USER}"). +``` + +Will return something like: + +``` erlang +["--name","John Smith","--path","John's Files","-u","jsmith"] +``` + The other functions exported by the ``getopt`` module (``usage/2``, ``usage/3`` and ``usage/4``) are used to show the command line syntax for the program. For example, given the above-mentioned option specifications, the call to @@ -446,3 +463,22 @@ would return (depending on the value of your PATH variable) something like: ``` Currently, *getopt* does not perform wildcard expansion of file paths. + + +Escaping arguments +================== + +Any character can be escaped by prepending the \ (backslash) character +to it. + +e.g. + +``` erlang +getopt:parse(OptSpecList, "--path /john\\'s\\ files dummy"). +``` + +Will return: + +``` erlang +{ok,{[{path,"/john's files"}],["dummy"]}} +``` diff --git a/src/getopt.erl b/src/getopt.erl index 09fd194..fcd7d36 100644 --- a/src/getopt.erl +++ b/src/getopt.erl @@ -11,8 +11,7 @@ -module(getopt). -author('juanjo@comellas.org'). --export([parse/2, usage/2, usage/3, usage/4]). --export([tokenize_cmdline/1]). +-export([parse/2, usage/2, usage/3, usage/4, tokenize/1]). -export_type([arg_type/0, arg_value/0, @@ -67,7 +66,7 @@ parse(OptSpecList, CmdLine) -> try Args = if is_integer(hd(CmdLine)) -> - tokenize_cmdline(CmdLine); + tokenize(CmdLine); true -> CmdLine end, @@ -612,12 +611,12 @@ add_option_help(_Opt, _Prefix, Acc) -> %% The function also supports the expansion of environment variables in %% both the Unix (${VAR}; $VAR) and Windows (%VAR%) formats. It does NOT %% support wildcard expansion of paths. --spec tokenize_cmdline(CmdLine :: string()) -> [string()]. -tokenize_cmdline(CmdLine) -> - tokenize_cmdline(CmdLine, [], []). +-spec tokenize(CmdLine :: string()) -> [string()]. +tokenize(CmdLine) -> + tokenize(CmdLine, [], []). --spec tokenize_cmdline(CmdLine :: string(), Acc :: [string()], ArgAcc :: string()) -> [string()]. -tokenize_cmdline([Sep | Tail], Acc, ArgAcc) when Sep =:= $\s; Sep =:= $\t; Sep =:= $\n -> +-spec tokenize(CmdLine :: string(), Acc :: [string()], ArgAcc :: string()) -> [string()]. +tokenize([Sep | Tail], Acc, ArgAcc) when Sep =:= $\s; Sep =:= $\t; Sep =:= $\n -> NewAcc = case ArgAcc of [_ | _] -> %% Found separator: add to the list of arguments. @@ -626,39 +625,39 @@ tokenize_cmdline([Sep | Tail], Acc, ArgAcc) when Sep =:= $\s; Sep =:= $\t; Sep = %% Found separator with no accumulated argument; discard it. Acc end, - tokenize_cmdline(Tail, NewAcc, []); -tokenize_cmdline([QuotationMark | Tail], Acc, ArgAcc) when QuotationMark =:= $"; QuotationMark =:= $' -> + tokenize(Tail, NewAcc, []); +tokenize([QuotationMark | Tail], Acc, ArgAcc) when QuotationMark =:= $"; QuotationMark =:= $' -> %% Quoted argument (might contain spaces, tabs, etc.) tokenize_quoted_arg(QuotationMark, Tail, Acc, ArgAcc); -tokenize_cmdline([Char | _Tail] = CmdLine, Acc, ArgAcc) when Char =:= $$; Char =:= $% -> +tokenize([Char | _Tail] = CmdLine, Acc, ArgAcc) when Char =:= $$; Char =:= $% -> %% Unix and Windows environment variable expansion: ${VAR}; $VAR; %VAR% {NewCmdLine, Var} = expand_env_var(CmdLine), - tokenize_cmdline(NewCmdLine, Acc, lists:reverse(Var, ArgAcc)); -tokenize_cmdline([$\\, Char | Tail], Acc, ArgAcc) -> + tokenize(NewCmdLine, Acc, lists:reverse(Var, ArgAcc)); +tokenize([$\\, Char | Tail], Acc, ArgAcc) -> %% Escaped char. - tokenize_cmdline(Tail, Acc, [Char | ArgAcc]); -tokenize_cmdline([Char | Tail], Acc, ArgAcc) -> - tokenize_cmdline(Tail, Acc, [Char | ArgAcc]); -tokenize_cmdline([], Acc, []) -> + tokenize(Tail, Acc, [Char | ArgAcc]); +tokenize([Char | Tail], Acc, ArgAcc) -> + tokenize(Tail, Acc, [Char | ArgAcc]); +tokenize([], Acc, []) -> lists:reverse(Acc); -tokenize_cmdline([], Acc, ArgAcc) -> +tokenize([], Acc, ArgAcc) -> lists:reverse([lists:reverse(ArgAcc) | Acc]). -spec tokenize_quoted_arg(QuotationMark :: char(), CmdLine :: string(), Acc :: [string()], ArgAcc :: string()) -> [string()]. tokenize_quoted_arg(QuotationMark, [QuotationMark | Tail], Acc, ArgAcc) -> %% End of quoted argument - tokenize_cmdline(Tail, Acc, ArgAcc); -tokenize_quoted_arg($" = QuotationMark, [Char | _Tail] = CmdLine, Acc, ArgAcc) when Char =:= $$; Char =:= $% -> - %% Unix and Windows environment variable expansion: ${VAR}; $VAR; %VAR% - {NewCmdLine, Var} = expand_env_var(CmdLine), - tokenize_quoted_arg(QuotationMark, NewCmdLine, Acc, lists:reverse(Var, ArgAcc)); + tokenize(Tail, Acc, ArgAcc); tokenize_quoted_arg(QuotationMark, [$\\, Char | Tail], Acc, ArgAcc) -> %% Escaped char. tokenize_quoted_arg(QuotationMark, Tail, Acc, [Char | ArgAcc]); +tokenize_quoted_arg($" = QuotationMark, [Char | _Tail] = CmdLine, Acc, ArgAcc) when Char =:= $$; Char =:= $% -> + %% Unix and Windows environment variable expansion (only for double-quoted arguments): ${VAR}; $VAR; %VAR% + {NewCmdLine, Var} = expand_env_var(CmdLine), + tokenize_quoted_arg(QuotationMark, NewCmdLine, Acc, lists:reverse(Var, ArgAcc)); tokenize_quoted_arg(QuotationMark, [Char | Tail], Acc, ArgAcc) -> tokenize_quoted_arg(QuotationMark, Tail, Acc, [Char | ArgAcc]); tokenize_quoted_arg(_QuotationMark, CmdLine, Acc, ArgAcc) -> - tokenize_cmdline(CmdLine, Acc, ArgAcc). + tokenize(CmdLine, Acc, ArgAcc). -spec expand_env_var(CmdLine :: string()) -> string(). @@ -695,7 +694,7 @@ expand_env_var(Prefix, CmdLine, Acc) -> -spec get_env_var(Prefix :: string(), Suffix :: string(), Acc :: string()) -> string(). get_env_var(Prefix, Suffix, [_ | _] = Acc) -> Name = lists:reverse(Acc), - %% Only expand valid variables. + %% Only expand valid/existing variables. case os:getenv(Name) of false -> Prefix ++ Name ++ Suffix; Value -> Value diff --git a/test/getopt_test.erl b/test/getopt_test.erl index 66d1021..cbfd851 100644 --- a/test/getopt_test.erl +++ b/test/getopt_test.erl @@ -13,7 +13,7 @@ -include_lib("eunit/include/eunit.hrl"). --import(getopt, [parse/2]). +-import(getopt, [parse/2, tokenize/1]). -define(NAME(Opt), element(1, Opt)). -define(SHORT(Opt), element(2, Opt)). @@ -274,3 +274,12 @@ parse_variable_expansion_test_() -> ?_assertEqual({ok, {[{path, "%PATH"}], ["%DUMMY_VAR_THAT_MUST_NOT_EXIST%"]}}, parse(OptSpecList, " --path %PATH %DUMMY_VAR_THAT_MUST_NOT_EXIST% "))} ]. + + +tokenize_test_() -> + %% Path = os:getenv("PATH"), + [ + {"Tokenize", + ?_assertEqual(["ABC","abc","1234","5678","DEFGHI","\"JKL \"", "$PATH"], + tokenize(" ABC abc '1234' \"5678\" 'DEF'\"GHI\" '\"JKL \"' \\$PATH"))} + ].