Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Add parser for RFC 822-style address lists; encode all header values …
…with RFC2047.
  • Loading branch information
Arjan Scherpenisse committed Feb 27, 2013
1 parent b9f1196 commit 749b140
Show file tree
Hide file tree
Showing 5 changed files with 187 additions and 9 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Expand Up @@ -6,4 +6,5 @@ coverage/*
build
*.xcodeproj
.eunit/
ebin/gen_smtp.app
ebin/gen_smtp.app
src/smtp_rfc822_parse.erl
20 changes: 13 additions & 7 deletions src/mimemail.erl
Expand Up @@ -688,10 +688,15 @@ encode_folded_header(Header, HeaderLines) ->
encode_folded_header(TabbedRemainder, [])
end.

encode_header_value(<<"Subject">>, Value) ->
rfc2047_utf8_encode(Value);
encode_header_value(H, Value) when H =:= <<"To">>; H =:= <<"Cc">>; H =:= <<"Bcc">>;
H =:= <<"Reply-To">>; H =:= <<"From">> ->
{ok, Addresses} = smtp_util:parse_rfc822_addresses(Value),
{Names, Emails} = lists:unzip(Addresses),
NewNames = lists:map(fun rfc2047_utf8_encode/1, Names),
smtp_util:combine_rfc822_addresses(lists:zip(NewNames, Emails));

encode_header_value(_, Value) ->
Value.
rfc2047_utf8_encode(Value).

encode_component(_Type, _SubType, Headers, Params, Body) ->
if
Expand Down Expand Up @@ -833,6 +838,7 @@ fix_encoding(Encoding) ->

%% @doc Encode a binary or list according to RFC 2047. Input is
%% assumed to be in UTF-8 encoding.
rfc2047_utf8_encode(undefined) -> undefined;
rfc2047_utf8_encode(B) when is_binary(B) ->
rfc2047_utf8_encode(binary_to_list(B));
rfc2047_utf8_encode([]) ->
Expand All @@ -843,7 +849,7 @@ rfc2047_utf8_encode(Text) ->
%% Don't escape when all characters are ASCII printable
rfc2047_utf8_encode([], Text) ->
Text;
rfc2047_utf8_encode([H|T], Text) when H >= 32 andalso H =< 126 andalso H /= $= ->
rfc2047_utf8_encode([H|T], Text) when H >= 32 andalso H =< 126 ->
rfc2047_utf8_encode(T, Text);
rfc2047_utf8_encode(_, Text) ->
"=?UTF-8?Q?" ++ rfc2047_utf8_encode(Text, [], 0) ++ "?=".
Expand Down Expand Up @@ -1526,11 +1532,11 @@ encoding_test_() ->
?assertEqual(Result, encode(Email))
end
},
{"Email with UTF-8 characters in subject",
{"Email with UTF-8 characters",
fun() ->
Email = {<<"text">>, <<"plain">>, [
{<<"Subject">>, <<"Fræderik Hølljen">>},
{<<"From">>, <<"me@example.com">>},
{<<"From">>, <<"Fræderik Hølljen <me@example.com>">>},
{<<"To">>, <<"you@example.com">>},
{<<"Message-ID">>, <<"<abcd@example.com>">>},
{<<"MIME-Version">>, <<"1.0">>},
Expand All @@ -1539,7 +1545,7 @@ encoding_test_() ->
[{<<"charset">>,<<"US-ASCII">>}],
{<<"disposition">>,<<"inline">>}}],
<<"This is a plain message">>},
Result = <<"Subject: =?UTF-8?Q?Fr=C3=A6derik=20H=C3=B8lljen?=\r\nFrom: me@example.com\r\nTo: you@example.com\r\nMessage-ID: <abcd@example.com>\r\nMIME-Version: 1.0\r\nDate: Sun, 01 Nov 2009 14:44:47 +0200\r\n\r\nThis is a plain message">>,
Result = <<"Subject: =?UTF-8?Q?Fr=C3=A6derik=20H=C3=B8lljen?=\r\nFrom: =?UTF-8?Q?Fr=C3=A6derik=20H=C3=B8lljen?= <me@example.com>\r\nTo: you@example.com\r\nMessage-ID: <abcd@example.com>\r\nMIME-Version: 1.0\r\nDate: Sun, 01 Nov 2009 14:44:47 +0200\r\n\r\nThis is a plain message">>,
?assertEqual(Result, encode(Email))
end
},
Expand Down
29 changes: 29 additions & 0 deletions src/smtp_rfc822_parse.yrl
@@ -0,0 +1,29 @@
Nonterminals
addresses
address
name
names
email.

Terminals
string
',' '<' '>'.

Rootsymbol
addresses.

Endsymbol
'$end'.

addresses -> address : ['$1'].
addresses -> address ',' addresses : ['$1' | '$3'].
addresses -> '$empty' : [].

address -> email : {undefined, '$1'}.
address -> '<' email '>' : {undefined, '$2'}.
address -> names '<' email '>' : {lists:flatten('$1'), '$3'}.

email -> string : element(3, '$1').
names -> name : '$1'.
names -> name names : ['$1', " " | '$2'].
name -> string : element(3, '$1').
68 changes: 67 additions & 1 deletion src/smtp_util.erl
Expand Up @@ -26,7 +26,9 @@
-export([
mxlookup/1, guess_FQDN/0, compute_cram_digest/2, get_cram_string/1,
trim_crlf/1, rfc5322_timestamp/0, zone/0, generate_message_id/0,
generate_message_boundary/0]).
parse_rfc822_addresses/1,
combine_rfc822_addresses/1,
generate_message_boundary/0]).

%% @doc returns a sorted list of mx servers for `Domain', lowest distance first
mxlookup(Domain) ->
Expand Down Expand Up @@ -111,4 +113,68 @@ generate_message_boundary() ->
["_=", [io_lib:format("~2.36.0b", [X]) || <<X>> <= erlang:md5(term_to_binary([erlang:now(), FQDN]))], "=_"].


-define(is_whitespace(Ch), (Ch =< 32)).

combine_rfc822_addresses(Addresses) ->
[_,_|Acc] = combine_rfc822_addresses(Addresses, []),
iolist_to_binary(lists:reverse(Acc)).

combine_rfc822_addresses([], Acc) ->
Acc;
combine_rfc822_addresses([{undefined, Email}|Rest], Acc) ->
combine_rfc822_addresses(Rest, [32, $,, Email|Acc]);
combine_rfc822_addresses([{Name, Email}|Rest], Acc) ->
combine_rfc822_addresses(Rest, [32, $,, $>, Email, $<, 32, opt_quoted(Name)|Acc]).

opt_quoted(N) ->
case re:run(N, "\"") of
nomatch -> N;
{match, _} ->
[$", re:replace(N, "\"", "\\\\\"", [global]), $"]
end.

parse_rfc822_addresses(B) when is_binary(B) ->
parse_rfc822_addresses(binary_to_list(B));

parse_rfc822_addresses(S) when is_list(S) ->
Scanned = lists:reverse([{'$end', 0}|scan_rfc822(S, [])]),
smtp_rfc822_parse:parse(Scanned).

scan_rfc822([], Acc) ->
Acc;
scan_rfc822([Ch|R], Acc) when ?is_whitespace(Ch) ->
scan_rfc822(R, Acc);
scan_rfc822([$"|R], Acc) ->
{Token, Rest} = scan_rfc822_scan_endquote(R, [], false),
scan_rfc822(Rest, [{string, 0, Token}|Acc]);
scan_rfc822([$,|Rest], Acc) ->
scan_rfc822(Rest, [{',', 0}|Acc]);
scan_rfc822([$<|Rest], Acc) ->
{Token, R} = scan_rfc822_scan_endpointybracket(Rest),
scan_rfc822(R, [{'>', 0}, {string, 0, Token}, {'<', 0}|Acc]);
scan_rfc822(String, Acc) ->
case re:run(String, "(.*?)([\s<>,].*)", [{capture, all_but_first, list}]) of
{match, [Token, Rest]} ->
scan_rfc822(Rest, [{string, 0, Token}|Acc]);
nomatch ->
[{string, 0, String}|Acc]
end.

scan_rfc822_scan_endpointybracket(String) ->
case re:run(String, "(.*?)>(.*)", [{capture, all_but_first, list}]) of
{match, [Token, Rest]} ->
{Token, Rest};
nomatch ->
{String, []}
end.

scan_rfc822_scan_endquote([$\\|R], Acc, InEscape) ->
%% in escape
scan_rfc822_scan_endquote(R, Acc, not(InEscape));
scan_rfc822_scan_endquote([$"|R], Acc, true) ->
scan_rfc822_scan_endquote(R, [$"|Acc], false);
scan_rfc822_scan_endquote([$"|Rest], Acc, false) ->
%% Done!
{lists:reverse(Acc), Rest};
scan_rfc822_scan_endquote([Ch|Rest], Acc, _) ->
scan_rfc822_scan_endquote(Rest, [Ch|Acc], false).
76 changes: 76 additions & 0 deletions test/gen_smtp_util_test.erl
@@ -0,0 +1,76 @@
-module(gen_smtp_util_test).

-compile(export_all).

-include_lib("eunit/include/eunit.hrl").

test_test() ->
smtp_util:parse_rfc822_addresses("foo bar").

parse_rfc822_addresses_test_() ->
[
{"Empty address list",
fun() ->
?assertEqual({ok, []}, smtp_util:parse_rfc822_addresses(<<>>)),
?assertEqual({ok, []}, smtp_util:parse_rfc822_addresses(<<" ">>)),
?assertEqual({ok, []}, smtp_util:parse_rfc822_addresses(<<" \r\n\t ">>)),
?assertEqual({ok, []}, smtp_util:parse_rfc822_addresses(<<"
">>))
end},
{"Single addresses",
fun() ->
?assertEqual({ok, [{undefined, "john@doe.com"}]},
smtp_util:parse_rfc822_addresses(<<"john@doe.com">>)),
?assertEqual({ok, [{"Fræderik Hølljen", "me@example.com"}]},
smtp_util:parse_rfc822_addresses(<<"Fræderik Hølljen <me@example.com>">>)),
?assertEqual({ok, [{undefined, "john@doe.com"}]},
smtp_util:parse_rfc822_addresses(<<"<john@doe.com>">>)),
?assertEqual({ok, [{"John", "john@doe.com"}]},
smtp_util:parse_rfc822_addresses(<<"John <john@doe.com>">>)),
?assertEqual({ok, [{"John Doe", "john@doe.com"}]},
smtp_util:parse_rfc822_addresses(<<"John Doe <john@doe.com>">>)),
?assertEqual({ok, [{"John Doe", "john@doe.com"}]},
smtp_util:parse_rfc822_addresses(<<"\"John Doe\" <john@doe.com>">>)),
?assertEqual({ok, [{"John \"Mighty\" Doe", "john@doe.com"}]},
smtp_util:parse_rfc822_addresses(<<"\"John \\\"Mighty\\\" Doe\" <john@doe.com>">>))
end},
{"Multiple addresses",
fun() ->
?assertEqual({ok, [{undefined, "a@a.com"}, {undefined, "b@b.com"}]},
smtp_util:parse_rfc822_addresses(<<"a@a.com,b@b.com">>)),
?assertEqual({ok, [{undefined, "a,a@a.com"}, {undefined, "b@b.com"}]},
smtp_util:parse_rfc822_addresses(<<"<a,a@a.com>,b@b.com">>)),
?assertEqual({ok, [{"Jan", "a,a@a.com"}, {undefined, "b@b.com"}]},
smtp_util:parse_rfc822_addresses(<<"Jan <a,a@a.com>,b@b.com">>)),
?assertEqual({ok, [{"Jan", "a,a@a.com"}, {"Berend Botje", "b@b.com"}]},
smtp_util:parse_rfc822_addresses(<<"Jan <a,a@a.com>,\"Berend Botje\" <b@b.com>">>))
end}
].

combine_rfc822_addresses_test_() ->
[
{"One address",
fun() ->
?assertEqual(<<"john@doe.com">>,
smtp_util:combine_rfc822_addresses([{undefined, "john@doe.com"}])),
?assertEqual(<<"John <john@doe.com>">>,
smtp_util:combine_rfc822_addresses([{"John", "john@doe.com"}])),
?assertEqual(<<"\"John \\\"Foo\" <john@doe.com>">>,
smtp_util:combine_rfc822_addresses([{"John \"Foo", "john@doe.com"}]))
end},
{"Multiple addresses",
fun() ->
?assertEqual(<<"john@doe.com, foo@bar.com">>,
smtp_util:combine_rfc822_addresses([{undefined, "john@doe.com"}, {undefined, "foo@bar.com"}])),
?assertEqual(<<"John <john@doe.com>, foo@bar.com">>,
smtp_util:combine_rfc822_addresses([{"John", "john@doe.com"}, {undefined, "foo@bar.com"}]))
end}
].

rfc822_addresses_roundtrip_test() ->
Addr = <<"Jan <a,a@a.com>, Berend Botje <b@b.com>">>,
{ok, Parsed} = smtp_util:parse_rfc822_addresses(Addr),
?assertEqual(Addr, smtp_util:combine_rfc822_addresses(Parsed)),
ok.


0 comments on commit 749b140

Please sign in to comment.