Skip to content

Commit

Permalink
core: sanitize Microsoft Office comments, classes and styles.
Browse files Browse the repository at this point in the history
Fixes #1464
  • Loading branch information
mworrell committed Oct 4, 2016
1 parent a85aa5d commit d981658
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 0 deletions.
35 changes: 35 additions & 0 deletions src/support/z_sanitize.erl
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,44 @@ sanitize_element_opts({<<"a">>, Attrs, Inner} = Element, _Stack, _Opts) ->
false ->
Element
end;
sanitize_element_opts({comment, <<" [", _/binary>> = Comment} = Element, _Stack, _Opts) ->
% Conditionals by Microsoft Word: <!-- [if (..)] (..) [endif]-->
case binary:last(Comment) of
$] -> <<>>;
_ -> Element
end;
sanitize_element_opts({comment, <<"StartFragment">>}, _Stack, _Opts) ->
% Inserted by Microsoft Word: <!--StartFragment-->
<<>>;
sanitize_element_opts({comment, <<"EndFragment">>}, _Stack, _Opts) ->
% Inserted by Microsoft Word: <!--EndFragment-->
<<>>;
sanitize_element_opts({Tag, Attrs, Inner}, _Stack, _Opts) ->
Attrs1 = cleanup_microsoft_attrs(Attrs),
{Tag, Attrs1, Inner};
sanitize_element_opts(Element, _Stack, _Opts) ->
Element.

cleanup_microsoft_attrs(Attrs) ->
Attrs1 = lists:map(fun cleanup_microsoft_attr/1, Attrs),
lists:filter(fun({_,_}) -> true; (drop) -> false end, Attrs1).

cleanup_microsoft_attr({<<"class">>, Classes}) ->
Classes1 = binary:split(Classes, <<" ">>, [global, trim_all]),
case lists:filter(fun is_not_mso_class/1, Classes1) of
[] -> drop;
Cs -> iolist_to_binary(z_utils:combine(32, Cs))
end;
cleanup_microsoft_attr({<<"style">>, <<"mso-", _/binary>>}) ->
% This might need some extra parsing of the css.
% For now we just drop styles starting with a "mso-" selector.
drop;
cleanup_microsoft_attr(Attr) ->
Attr.

is_not_mso_class(<<"Mso", _/binary>>) -> false;
is_not_mso_class(_) -> true.


sanitize_script(Props, Context) ->
Src = proplists:get_value(<<"src">>, Props),
Expand Down
17 changes: 17 additions & 0 deletions src/tests/z_sanitize_tests.erl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,23 @@ youtube_object_test() ->
Out = <<"<iframe width=\"560\" height=\"315\" allowfullscreen=\"1\" frameborder=\"0\" src=\"https://www.youtube.com/embed/dQw4w9WgXcQ\"></iframe>">>,
?assertEqual(Out, z_sanitize:html(In, Context)).

mso1_test() ->
Context = z_context:new(testsandbox),
In = <<"Hello <!-- [if foo]...[endif]--> World">>,
Out = <<"Hello World">>,
?assertEqual(Out, z_sanitize:html(In, Context)).

mso2_test() ->
Context = z_context:new(testsandbox),
In = <<"Hello <!--StartFragment--> <!--EndFragment--> World">>,
Out = <<"Hello World">>,
?assertEqual(Out, z_sanitize:html(In, Context)).

mso3_test() ->
Context = z_context:new(testsandbox),
In = <<"<p class=\"MsoNormal\"><span style=\"mso-ansi-language: EN-US;\">Hello</span></p>">>,
Out = <<"<p><span>Hello</span></p>">>,
?assertEqual(Out, z_sanitize:html(In, Context)).

svg_imagetragick_test() ->
A = z_svg:sanitize(<<"
Expand Down

0 comments on commit d981658

Please sign in to comment.