Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Added markdown convertors from/to html, for use with the emailer.

  • Loading branch information...
commit d4330ae42aa5270cd0367b327880ee55c98960f8 1 parent f2bc9f1
@mworrell mworrell authored
View
1,285 src/markdown/markdown.erl
@@ -0,0 +1,1285 @@
+%%%-------------------------------------------------------------------
+%%% @author Gordon Guthrie
+%%% @copyright (C) 2009, Gordon Guthrie
+%%% @doc,
+%%%
+%%% @end
+%%% Created : 10 Sep 2009 by gordonguthrie@backawinner.gg
+%%%-------------------------------------------------------------------
+
+-module(markdown).
+
+-export([conv/1,
+ conv_utf8/1,
+ conv_file/2]).
+
+-import(lists, [flatten/1, reverse/1]).
+
+-include_lib("eunit/include/eunit.hrl").
+
+-define(SPACE, 32).
+-define(TAB, 9).
+-define(LF, 10).
+-define(CR, 13).
+-define(NBSP, 160).
+-define(AMP, $&, $a, $m, $p, $;).
+-define(COPY, $&, $c, $o, $p, $y, $;).
+
+%%% the lexer first lexes the input
+%%% make_lines does 2 passes:
+%%% * it chops the lexed strings into lines which it represents as a
+%%% list of lists
+%%% * it then types the lines into the following:
+%%% * normal lines
+%%% * reference style links
+%%% * reference style images
+%%% * special line types
+%%% - blank
+%%% - SETEXT header lines
+%%% - ATX header lines
+%%% - blockquote
+%%% - unordered lists
+%%% - ordered lists
+%%% - code blocks
+%%% - horizontal rules
+%%% the parser then does its magic interpolating the references as appropriate
+conv(String) -> Lex = lex(String),
+ % io:format("Lex is ~p~n", [Lex]),
+ UntypedLines = make_lines(Lex),
+ % io:format("UntypedLines are ~p~n", [UntypedLines]),
+ {TypedLines, Refs} = type_lines(UntypedLines),
+ % io:format("TypedLines are ~p~nRefs is ~p~n",
+ % [TypedLines, Refs]),
+ parse(TypedLines, Refs).
+
+-spec conv_utf8(list()) -> list().
+conv_utf8(Utf8) ->
+ Str = xmerl_ucs:from_utf8(Utf8),
+ Res = conv(Str),
+ xmerl_ucs:to_utf8(Res).
+
+conv_file(FileIn, FileOut) ->
+ case file:open(FileIn, [read]) of
+ {ok, Device} -> Input = get_all_lines(Device,[]),
+ Output = conv(Input),
+ write(FileOut, Output);
+ _ -> error
+ end.
+
+get_all_lines(Device, Accum) ->
+ case io:get_line(Device,"") of
+ eof -> file:close(Device),
+ Accum;
+ Line ->
+ get_all_lines(Device,Accum ++ Line)
+ end.
+
+write(File, Text) ->
+ _Return=filelib:ensure_dir(File),
+ case file:open(File, [write]) of
+ {ok, Id} ->
+ io:fwrite(Id, "~s~n", [Text]),
+ file:close(Id);
+ _ ->
+ error
+ end.
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%
+%%% Parse the lines interpolating the references as appropriate
+%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+parse(TypedLines, Refs) ->
+ string:strip(p1(TypedLines, Refs, 0, []), both, $\n).
+
+%% goes through the lines
+%% Variable 'R' contains the References and 'I' is the indent level
+
+%% Terminal clause
+p1([], _R, _I, Acc) -> flatten(reverse(Acc));
+
+%% Tags have the highest precedence...
+p1([{tag, Tag} | T], R, I, Acc) ->
+ case T of
+ [] -> p1([], R, I,
+ ["</p>", make_tag_str(Tag, R), "<p>" | Acc]);
+ [{blank, _} | T2] -> p1(T2, R, I,
+ [make_tag_str(Tag, R) | Acc]);
+ _Other -> p1(T, R, I,
+ [pad(I) ++ make_tag_str(Tag, R) | Acc])
+ end;
+
+p1([{blocktag, [{{{tag, open}, Type}, Tg}] = _Tag} | T], R, I, Acc) ->
+ {Block, Rest} = grab_for_blockhtml(T, Type, []),
+ Str = lists:flatten([Tg, "\n" | Block]),
+ p1(Rest, R, I, [Str | Acc]);
+
+%% blank lines/linefeeds are gobbled down
+p1([{Type, _} | T], R, I, Acc)
+ when Type == blank orelse Type == linefeed ->
+ Rest = grab_empties(T),
+ p1(Rest, R, I, [pad(I) ++ "\n" | Acc]);
+
+%% two consecutive normal lines should be concatenated...
+%% remembering the pad the second line with the indent...
+p1([{normal, P1}, {normal, P2} | T], R, I, Acc) ->
+ p1([{normal, merge(P1, pad(I), P2)} | T], R, I, Acc);
+%% as should a normal and linefeed
+
+%% setext h1 is a look behind and it overrides blockquote and code...
+p1([{normal, P}, {setext_h1, _} | T], R, I, Acc) ->
+ p1(T, R, I, [pad(I) ++ "<h1>" ++ make_str(snip(P), R)
+ ++ "</h1>\n\n" | Acc]);
+p1([{blockquote, P}, {setext_h1, _} | T], R, I, Acc) ->
+ p1(T, R, I, [pad(I) ++ "<h1>" ++ make_str(snip(P), R)
+ ++ "</h1>\n\n" | Acc]);
+p1([{{codeblock, P}, _}, {setext_h1, _} | T], R, I, Acc) ->
+ p1(T, R, I, [pad(I) ++ "<h1>" ++ make_str(snip(P), R)
+ ++ "</h1>\n\n" | Acc]);
+p1([{blockquote, P}, {h2_or_hr, _} | T], R, I, Acc) ->
+ p1(T, R, I, [pad(I) ++ "<h2>" ++ make_str(snip(P), R)
+ ++ "</h2>\n\n" | Acc]);
+p1([{{codeblock, P}, _}, {h2_or_hr, _} | T], R, I, Acc) ->
+ p1(T, R, I, [pad(I) ++ "<h2>" ++ make_str(snip(P), R)
+ ++ "</h2>\n\n" | Acc]);
+
+%% but a setext with no lookbehind is just rendered as a normal line,
+%% so change its type and rethrow it
+p1([{setext_h1, P} | T], R, I, Acc) ->
+ p1([{normal, P} | T], R, I, Acc);
+
+%% setext h2 might be a look behind
+p1([{normal, P}, {h2_or_hr, _} | T], R, I, Acc) ->
+ P2 = string:strip(make_str(snip(P), R), both, ?SPACE),
+ p1(T, R, I, [pad(I) ++ "<h2>" ++ P2 ++ "</h2>\n\n" | Acc]);
+
+%% blockquotes swallow each other
+%% replace the first blockquote mark with a space...
+p1([{blockquote, P1}, {blockquote, [_ | P2]} | T], R, I, Acc) ->
+ p1([{blockquote, merge(P1, pad(I), [{{ws, sp}, " "} | P2])} | T], R, I, Acc);
+%% blockquotes swallow normal
+p1([{blockquote, P1}, {normal, P2} | T], R, I, Acc) ->
+ p1([{blockquote, merge(P1, pad(I + 1), P2)} | T], R, I, Acc);
+%% blockquote
+p1([{blockquote, P} | T], R, I, Acc) ->
+ [{{md, gt}, _} | T1] = P,
+ T2 = string:strip(make_str(T1, R)),
+ p1(T, R, I,
+ ["\n<blockquote>\n" ++ pad(I + 1) ++ "<p>" ++ T2 ++ "</p>\n</blockquote>" | Acc]);
+
+%% one normal is just normal...
+p1([{normal, P} | T], R, I, Acc) ->
+ P2 = string:strip(make_str(snip(P), R), both, ?SPACE),
+ p1(T, R, I, [pad(I) ++ "<p>" ++ P2 ++ "</p>\n" | Acc]);
+
+%% atx headings
+p1([{{h1, P}, _} | T], R, I, Acc) ->
+ NewP = string:strip(make_str(snip(P), R), right),
+ p1(T, R, I, [pad(I) ++ "<h1>" ++ NewP ++ "</h1>\n\n" | Acc]);
+p1([{{h2, P}, _} | T], R, I, Acc) ->
+ NewP = string:strip(make_str(snip(P), R), right),
+ p1(T, R, I, [pad(I) ++ "<h2>" ++ NewP ++ "</h2>\n\n" | Acc]);
+p1([{{h3, P}, _} | T], R, I, Acc) ->
+ NewP = string:strip(make_str(snip(P), R), right),
+ p1(T, R, I, [pad(I) ++ "<h3>" ++ NewP ++ "</h3>\n\n" | Acc]);
+p1([{{h4, P}, _} | T], R, I, Acc) ->
+ NewP = string:strip(make_str(snip(P), R), right),
+ p1(T, R, I, [pad(I) ++ "<h4>" ++ NewP ++ "</h4>\n\n" | Acc]);
+p1([{{h5, P}, _} | T], R, I, Acc) ->
+ NewP = string:strip(make_str(snip(P), R), right),
+ p1(T, R, I, [pad(I) ++ "<h5>" ++ NewP ++ "</h5>\n\n" | Acc]);
+p1([{{h6, P}, _} | T], R, I, Acc) ->
+ NewP = string:strip(make_str(snip(P), R), right),
+ p1(T, R, I, [pad(I) ++ "<h6>" ++ NewP ++ "</h6>\n\n" | Acc]);
+
+%% unordered lists swallow normal and codeblock lines
+p1([{{ul, P1}, S1}, {{normal, P2}, S2} | T], R, I , Acc) ->
+ p1([{{ul, merge(P1, pad(I), P2)}, S1 ++ S2} | T], R, I, Acc);
+p1([{{ul, P1}, S1}, {{codeblock, P2}, S2} | T], R, I , Acc) ->
+ p1([{{ul, merge(P1, pad(I), P2)}, S1 ++ S2} | T], R, I, Acc);
+p1([{{ul, _P}, _} | _T] = List, R, I, Acc) ->
+ {Rest, NewAcc} = parse_list(ul, List, R, I, [], false),
+ p1(Rest, R, I, [pad(I) ++ "<ul>\n" ++ NewAcc
+ ++ pad(I) ++ "</ul>\n" | Acc]);
+
+%% ordered lists swallow normal and codeblock lines
+p1([{{ol, P1}, S1}, {{normal, P2}, S2} | T], R, I , Acc) ->
+ p1([{{ol, merge(P1, pad(I), P2)}, S1 ++ S2} | T], R, I, Acc);
+p1([{{ol, P1}, S1}, {{codeblock, P2}, S2} | T], R, I , Acc) ->
+ p1([{{ol, merge(P1, pad(I), P2)}, S1 ++ S2} | T], R, I, Acc);
+p1([{{ol, _P}, _} | _T] = List, R, I, Acc) ->
+ {Rest, NewAcc} = parse_list(ol, List, R, I, [], false),
+ p1(Rest, R, I, [pad(I) ++ "<ol>\n" ++ NewAcc
+ ++ pad(I) ++ "</ol>\n" | Acc]);
+
+%% codeblock consumes any following empty lines
+%% and other codeblocks
+p1([{{codeblock, P1}, S1}, {{codeblock, P2}, S2} | T], R, I, Acc) ->
+ p1([{{codeblock, merge(P1, pad(I), P2)}, S1 ++ S2} | T], R, I, Acc);
+p1([{{codeblock, P}, _} | T], R, I, Acc) ->
+ Rest = grab_empties(T),
+ p1(Rest, R, I, ["<pre><code>" ++ make_str(snip(P), R)
+ ++ "\n</code></pre>\n\n" | Acc]);
+
+%% horizontal rules
+p1([{hr, _} | T], R, I, Acc) ->
+ p1(T, R, I, ["<hr />" | Acc]);
+%% h2_or_hr is greedy for normal lines
+p1([{h2_or_hr, P1}, {normal, P2} | T], R, I, Acc) ->
+ p1([{normal, flatten([P1 | P2])} | T], R, I, Acc);
+%% the clause with a normal before an 'h2_or_hr' has already been
+%% handled further up the tree, so this is a bona fide 'hr'...
+p1([{h2_or_hr, _} | T], R, I, Acc) ->
+ p1(T, R, I, ["<hr />" | Acc]);
+
+%% Now start pulling out inline refs etc, etc
+p1([{inlineref, _P} | T], R, I, Acc) ->
+ p1(T, R, I, Acc).
+
+grab_for_blockhtml([], Type, Acc) ->
+ {lists:reverse(["</" ++ Type ++ ">" | Acc]), []};
+grab_for_blockhtml([{blocktag, [{{{tag, close}, Type}, Tg}]}
+ | T], Type, Acc) ->
+ {lists:reverse([Tg | Acc]), T};
+grab_for_blockhtml([{blocktag, [{{{tag, _}, GrabType}, Tg}]}
+ | T], Type, Acc) when GrabType =/= Type ->
+ % blocktags grabbed in a blocktag need a line ending pushed
+ grab_for_blockhtml(T, Type, ["\n", Tg | Acc]);
+grab_for_blockhtml([{tag, {{{tag, self_closing}, _Ty}, Tg}}
+ | T], Type, Acc) ->
+ grab_for_blockhtml(T, Type, [Tg | Acc]);
+grab_for_blockhtml([H | T], Type, Acc) ->
+ {_Type, Content} = H,
+ Str = make_plain_str(Content),
+ grab_for_blockhtml(T, Type, [Str | Acc]).
+
+grab_empties([{linefeed, _} | T]) -> grab_empties(T);
+grab_empties([{blank, _} | T]) -> grab_empties(T);
+grab_empties(List) -> List.
+
+merge(P1, Pad, P2) ->
+ NewP1 = make_br(P1),
+ flatten([NewP1, {string, Pad} | P2]).
+
+make_br(List) -> make_br1(reverse(List)).
+
+make_br1([{{lf, _}, _},
+ {{ws, comp}, _} | T]) -> reverse([{tags, " <br />\n"} | T]);
+make_br1([{{lf, _}, _},
+ {{ws, tab}, _} | T]) -> reverse([{tags, " <br />\n"} | T]);
+make_br1(List) -> reverse(List).
+
+pad(N) -> pad1(N, []).
+
+pad1(0, Acc) -> Acc;
+pad1(N, Acc) when N > 0 -> pad1(N - 1, [" " | Acc]).
+
+%% this is a bit messy because of the way that hard lines are treated...
+%% If your li's have a blank line between them the item gets wrapped in a para,
+%% if not, they don't
+%% BUT if one item is <p> wrapped then the next is too
+parse_list(_Type, [], _R, _I, A, _) ->
+ {[], reverse(A)};
+parse_list(Type, [{{Type, P}, _} | T], R, I, A, Wrap) ->
+ {Rest, NewP, NewWrap} = grab(T, R, [], Wrap),
+ Li = case NewWrap of
+ false -> Ret = parse([{normal, P}], R),
+ % need to strip off the extra <p></p>'s
+ Ret2 = string:left(Ret, length(Ret) - 4),
+ Ret3 = string:right(Ret2, length(Ret2) -3),
+ Ret3 ++ "\n" ++ NewP ++ pad(I);
+ true -> string:strip(parse([{normal, P}], R), right, ?LF)
+ ++ NewP ++ pad(I)
+ end,
+ NewWrap2 = case T of
+ [] -> false; % doesnt matter
+ [H2 | _T2] -> case H2 of
+ {linefeed, _} -> true;
+ _ -> false
+ end
+ end,
+ parse_list(Type, Rest, R, I, [pad(I) ++ "<li>"
+ ++ string:strip(Li, right, ?LF)
+ ++ "</li>\n" | A], NewWrap2);
+parse_list(_Type, List, _R, _I, A, _) ->
+ {List, reverse(A)}.
+
+%% grab grabs normals, double codeblocks, linefeeds and blanks
+%% BUT stop grabbing if a normal if preceeded by a linefeed or blank
+%% UNLESS the normal starts with white space :(
+%% the third return parameter is 'true' if the 'li' should be
+%% wrapped in '<p></p>' and false if it shouldn't
+grab([{{codeblock, _}, S} | T] = List, R, Acc, W) ->
+ case is_blockquote(S, T) of
+ {{true, R1}, T2} -> grab(T2, R,
+ ["</blockquote>",
+ make_esc_str(R1, R),
+ "<blockquote>" | Acc], W);
+ {{esc_false, R1}, _T2} -> {R1, reverse(Acc), false};
+ {false, T2} ->
+ case is_double_indent(S) of
+ false ->
+ {List, reverse(Acc), false};
+ {true, R2} ->
+ % if it is a double indent - delete 4 spaces
+ % no it makes not sense to me neither :(
+ grab(T2, R, [" " ++ make_esc_str(R2, R) | Acc], W)
+ end
+ end;
+grab([{linefeed, _} | T], R, Acc, false) ->
+ grab2(T, R, Acc, T, Acc, true);
+grab([{linefeed, _} | T], R, Acc, true) ->
+ grab2(T, R, ["\n" | Acc], T, Acc, true);
+grab([{blank, _} | T], R, Acc, false) ->
+ grab2(T, R, Acc, T, Acc, true);
+grab([{blank, _} | T], R, Acc, true) ->
+ grab2(T, R, ["\n" | Acc], T, Acc, true);
+grab([{normal, P} | T], R, Acc, W) ->
+ Li = case W of
+ false -> make_esc_str(P, R);
+ true -> "<p>"++ string:strip(make_esc_str(P, R), right, ?LF)
+ ++ "</p>"
+ end,
+ grab(T, R, [Li | Acc], W);
+grab(List, _R, Acc, W) ->
+ {List, reverse(Acc), W}.
+
+%% the problem is knowing when to grab, if the list is followed by a long
+%% string of blank lines and linefeeds and a normal then the linefeeds aren't
+%% grabbed
+%% if the list if followed by blank lines and linefeeds and a normal with an
+%% initial whitespace it is grabbed...
+grab2([{normal, P2} | T], R, Acc, LO, AO, W) ->
+ case P2 of
+ [{{ws, _}, _} | T2] ->
+ Li = case W of
+ false -> make_esc_str(T2, R);
+ true -> "<p>" ++
+ string:strip(make_esc_str(T2, R), right, ?LF)
+ ++ "</p>"
+ end,
+ grab(T, R, [Li | Acc], W);
+ _ ->
+ {LO, AO, false}
+ end;
+grab2([{linefeed, _} | T], R, Acc, LO, AO, _W) ->
+ grab2(T, R, ["\n" | Acc], LO, AO, true);
+grab2([{blank, _} | T], R, Acc, LO, AO, _W) ->
+ grab2(T, R, ["\n" | Acc], LO, AO, true);
+%% We dont want to grab this stuff so return the old list and the old acc
+grab2(_List, _R, _Acc, LO, AO, _W) ->
+ {LO, AO, true}.
+
+is_double_indent(List) -> is_double_indent1(List, 0).
+
+%% double indent is any combination of tabs and spaces that add
+%% up to 8
+is_double_indent1([], _N) -> false;
+is_double_indent1(Rest, N) when N > 7 -> {true, Rest};
+is_double_indent1([{{ws, sp}, _} | T], N) -> is_double_indent1(T, N + 1);
+is_double_indent1([{{ws, tab}, _} | T], N) -> is_double_indent1(T, N + 4);
+is_double_indent1(_List, _N) -> false.
+
+is_blockquote(List, T) ->
+ case is_bq1(List, 0) of
+ false -> {false, T};
+ {esc_false, R} -> {{esc_false, R}, T};
+ {true, R} -> {NewT, NewR} = grab2(T, R),
+ {{true, NewR}, NewT}
+ end.
+
+is_bq1([], _N) -> false;
+is_bq1([{{ws, sp}, _} | T], N) -> is_bq1(T, N + 1);
+is_bq1([{{ws, tab}, _} | T], N) -> is_bq1(T, N + 4);
+is_bq1([{{md, gt}, _},
+ {{ws, _}, _} | T], N) when N > 3 -> {true, T};
+is_bq1([{{punc, bslash}, _},
+ {{md, gt}, GT},
+ {{ws, _}, WS} | T], N) when N > 3 -> {esc_false, [GT, WS | T]};
+is_bq1(_List, _N) -> false.
+
+grab2(List, R) -> gb2(List, reverse(R)).
+
+gb2([], Acc) -> {[], flatten(reverse(Acc))};
+gb2([{blank, _} | T], Acc) -> {T, flatten(reverse(Acc))};
+gb2([{_Type, P} | T], Acc) -> gb2(T, [P | Acc]).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%
+%%% Make the lines from the raw tokens
+%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+make_lines(Tokens) -> ml1(Tokens, [], []).
+
+ml1([], [], A2) -> reverse(A2);
+ml1([], A1, A2) -> ml1([], [], [reverse(A1) | A2]);
+ml1([{{lf, _}, _} = H | T], A1, A2) -> ml1(T, [], [ml2(H, A1) | A2]);
+ml1([H | T], A1, A2) -> ml1(T, [H | A1], A2).
+
+ml2(H, List) -> reverse([H | List]).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%
+%%% Process the lines and give each line a type. The valid types are:
+%%% * normal line
+%%% * reference style links
+%%% * reference style images
+%%% * special line types
+%%% - blank
+%%% - SETEXT header lines
+%%% - ATX header lines
+%%% - unordered lists (including code blocks)
+%%% - ordered lists (including code blocks)
+%%% - blockquotes
+%%% - code blocks
+%%% - horizontal rules
+%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+type_lines(Lines) ->
+ {Refs, TypedLines} = t_l1(Lines, [], []),
+ % io:format("TypedLines before stripping ~p~n", [TypedLines]),
+ {strip_lines(TypedLines), Refs}.
+
+t_l1([], A1, A2) -> {A1, reverse(A2)};
+%% this clause extracts URL and Image refs
+%% (it is the only one that uses A1 and A2...
+%% inlines can have up to 3 spaces before it
+t_l1([[{{ws, sp}, _},
+ {{inline, open}, _} | T1] = H | T2], A1, A2) ->
+ t_inline(H, T1, T2, A1, A2);
+t_l1([[{{ws, tab}, _},
+ {{inline, open}, _} | T1] = H | T2], A1, A2) ->
+ t_inline(H, T1, T2, A1, A2);
+t_l1([[{{ws, comp}, W},
+ {{inline, open}, _} | T1] = H | T2], A1, A2) ->
+ case gt(W, 3) of
+ {true, _R} -> t_inline(H, T1, T2, A1, A2);
+ false -> t_l1(T1, A1, [{normal , H} | A2]) % same exit at the final clause!
+ end,
+ t_inline(H, T1, T2, A1, A2);
+t_l1([[{{inline, open}, _} | T1] = H | T2], A1, A2) ->
+ t_inline(H, T1, T2, A1, A2);
+
+%% types setext lines
+t_l1([[{{md, eq}, _} | _T] = H | T], A1, A2) ->
+ t_l1(T, A1, [type_setext_h1(H) | A2]);
+%% NOTE 1: generates a ul as the default not a normal line
+%% NOTE 2: depending on the context this might generate an <h2> header
+%% or an <hr />
+%% NOTE 3: space - is typed to a bullet down in <ul> land...
+t_l1([[{{md, dash}, _} | _T] = H | T], A1, A2) ->
+ t_l1(T, A1, [type_setext_h2(H) | A2]);
+
+%% types atx lines
+t_l1([[{{md, atx}, _} | _T] = H | T], A1, A2) ->
+ t_l1(T, A1, [type_atx(H) | A2]);
+
+%% types blockquotes
+%% a blockquote on its own or followed by a linefeed is
+%% displayed 'as is' by showdown
+t_l1([[{{md, gt}, _} | []] = H | T], A1, A2) ->
+ t_l1(T, A1, [{normal, H} | A2]);
+t_l1([[{{md, gt}, _}, {{lf, _}, _} | []] = H | T], A1, A2) ->
+ t_l1(T, A1, [{normal, H} | A2]);
+%% one with anything after it starts a blockquote
+t_l1([[{{md, gt}, _} | _T1] = H | T], A1, A2) ->
+ t_l1(T, A1, [{blockquote, H} | A2]);
+
+%% types unordered lists lines
+%% NOTE 1: the dashed version is generated in type_setext_h2
+%% NOTE 2: the asterix version also might generate a horizontal rule
+%% which is why it jumps to type_star2 <-- note the 2!!
+t_l1([[{{ws, _}, _}, {{md, star}, _} = ST1,
+ {{ws, _}, _} = WS1 | T1] = H | T], A1, A2) ->
+ t_l1(T, A1, [{type_star2([ST1, WS1 | T1]), H} | A2]);
+t_l1([[{{md, star}, _}, {{ws, _}, _} | _T1] = H | T], A1, A2) ->
+ t_l1(T, A1, [{type_star2(H), H} | A2]);
+t_l1([[{{ws, _}, _}, {{md, plus}, _},
+ {{ws, _}, _} = W | T1] = H | T], A1, A2) ->
+ t_l1(T, A1, [{{ul, make_list_str([W | T1])}, H} | A2]);
+t_l1([[{{md, plus}, _}, {{ws, _}, _} = W | T1] = H | T], A1, A2) ->
+ t_l1(T, A1, [{{ul, make_list_str([W | T1])}, H} | A2]);
+%% UL based on dashes
+t_l1([[{{ws, _}, _}, {{md, dash}, _},
+ {{ws, _}, _} = W | T1] = H | T], A1, A2) ->
+ t_l1(T, A1, [{{ul, make_list_str([W | T1])}, H} | A2]);
+
+%% types ordered lists...
+t_l1([[{{ws, _}, _}, {num, _} = N1| T1] | T], A1, A2) ->
+ t_l1(T, A1, [type_ol([N1 | T1]) | A2]);
+t_l1([[{num, _} | _T] = H | T], A1, A2) ->
+ t_l1(T, A1, [type_ol(H) | A2]);
+
+%% types horizontal rules for stars and underscores
+%% dashes and some stars are done elsewhere...
+t_l1([[{{md, underscore}, _} | _T1] = H | T], A1, A2) ->
+ t_l1(T, A1, [type_underscore(H) | A2]);
+t_l1([[{{md, star}, _} | _T1] = H | T], A1, A2) ->
+ t_l1(T, A1, [type_star(H) | A2]);
+
+%% Block level tags - these are look ahead they must be
+%% on a single line (ie directly followed by a lf and nothing else
+t_l1([[{{{tag, _Type}, Tag}, _ } = H | T1] = List | T], A1, A2) ->
+ case is_blank(T1) of
+ false -> t_l1(T, A1, [{normal , List} | A2]);
+ true -> case is_block_tag(Tag) of
+ true -> t_l1(T, A1, [{blocktag , [H]} | A2]);
+ false -> t_l1(T, A1, [{tag, [H | T1]} | A2])
+ end
+ end;
+
+%% types a blank line or a code block
+t_l1([[{{lf, _}, _}| []] = H | T], A1, A2) ->
+ t_l1(T, A1, [{linefeed, H} | A2]);
+t_l1([[{{ws, _}, _} | _T1] = H | T], A1, A2) ->
+ t_l1(T, A1, [type_ws(H) | A2]);
+
+%% Final clause...
+t_l1([H | T], A1, A2) ->
+ t_l1(T, A1, [{normal , H} | A2]).
+
+t_inline(H, T1, T2, A1, A2) ->
+ case snip_ref(T1) of
+ {Type, {Id, {Url, Title}}} -> t_l1(T2, flatten([{Id, {Url, Title}} | A1]),
+ [{Type, H} | A2]);
+ normal -> t_l1(T2, A1, [{normal, H} | A2])
+ end.
+
+%% strips blanks from the beginning and end
+strip_lines(List) -> reverse(strip_l1(reverse(strip_l1(List)))).
+
+strip_l1([{linefeed, _} | T]) -> strip_l1(T);
+strip_l1([{blank, _} | T]) -> strip_l1(T);
+strip_l1(List) -> List.
+
+%%
+%% Loads of type rules...
+%%
+
+is_blank([]) -> true;
+is_blank([{{lf, _}, _} | []]) -> true;
+is_blank([{{ws, _}, _} | T]) -> is_blank(T);
+is_blank(_List) -> false.
+
+is_block_tag("address") -> true;
+is_block_tag("blockquote") -> true;
+is_block_tag("center") -> true;
+is_block_tag("dir") -> true;
+is_block_tag("div") -> true;
+is_block_tag("dl") -> true;
+is_block_tag("fieldset") -> true;
+is_block_tag("form") -> true;
+is_block_tag("h1") -> true;
+is_block_tag("h2") -> true;
+is_block_tag("h3") -> true;
+is_block_tag("h4") -> true;
+is_block_tag("h5") -> true;
+is_block_tag("h6") -> true;
+is_block_tag("hr") -> true;
+is_block_tag("isindex") -> true;
+is_block_tag("menu") -> true;
+is_block_tag("noframes") -> true;
+is_block_tag("noscript") -> true;
+is_block_tag("ol") -> true;
+is_block_tag("p") -> true;
+is_block_tag("pre") -> true;
+is_block_tag("table") -> true;
+is_block_tag("thead") -> true;
+is_block_tag("tbody") -> true;
+is_block_tag("tr") -> true;
+is_block_tag("td") -> true;
+is_block_tag("ul") -> true;
+is_block_tag(_Other) -> false.
+
+type_underscore(List) ->
+ case type_underscore1(trim_right(List)) of
+ hr -> {hr, List};
+ maybe -> {type_underscore2(List), List}
+ end.
+
+type_underscore1([]) -> hr;
+type_underscore1([{{md, underscore}, _} | T]) -> type_underscore1(T);
+type_underscore1(_List) -> maybe.
+
+type_underscore2(List) ->
+ case trim_right(List) of % be permissive of trailing spaces
+ [{{md, underscore}, _}, {{ws, _}, _},
+ {{md, underscore}, _}, {{ws, _}, _},
+ {{md, underscore}, _}] -> hr;
+ _Other -> normal
+ end.
+
+type_star(List) ->
+ Trim = trim_right(List),
+ case type_star1(Trim) of % be permssive of trailing spaces
+ hr -> {hr, trim_right(Trim)};
+ maybe -> Type = type_star2(List),
+ % if it is a normal line we prepend it with a special
+ % non-space filling white space character
+ case Type of
+ normal -> {normal, [{{ws, none}, none} | List]};
+ _ -> {Type, List}
+ end
+ end.
+
+type_star1([]) -> hr;
+type_star1([{{md, star}, _} | T]) -> type_star1(T);
+type_star1(_List) -> maybe.
+
+type_star2(List) ->
+ case trim_right(List) of
+ [{{md, star}, _}, {{ws, _}, _},
+ {{md, star}, _}, {{ws, _}, _},
+ {{md, star}, _}] -> hr;
+ _Other ->
+ case List of
+ [{{md, star}, _},
+ {{ws, _}, _}= WS | T] -> {ul, make_list_str([WS | T])};
+ _Other2 -> normal
+ end
+ end.
+
+type_ol(List) ->
+ case type_ol1(List, []) of
+ normal -> {normal, List};
+ {ol, Str} -> {{ol, Str}, List};
+ {esc_normal, Str} -> {normal, Str}
+ end.
+
+
+%% this line terminates on an escaped fullstop after a number
+%% (but you need to drop the bslash...)
+type_ol1([{num, _} = N,
+ {{punc, bslash}, _},
+ {{punc, fullstop}, _} = P | T], Acc) ->
+ {esc_normal, flatten([reverse(Acc), N, P | T])};
+%% we accumulate the digits in case we need to escape a full stop in a normal line
+type_ol1([{num, _} = H | T], Acc) -> type_ol1(T, [H | Acc]);
+type_ol1([{{punc, fullstop}, _},
+ {{ws, _}, _} | T], _Acc) -> {ol, T};
+type_ol1(_List, _Acc) -> normal.
+
+%% You need to understand what this function is trying to d...
+%% '### blah' is fine
+%% '### blah ###' is reduced to '### blah' because trailing #'s are
+%% just for show but...
+%% '##' is like appling '#' to '#' <-- applying 1 less styling to a single #
+%% and '###' is like appling '##' to '#' etc, etc
+%% but after you hit 6#'s you just get this for a single hash
+%% ie '#############' is like applying '######' to a single '#'
+%% but/and '######## blah' is like apply '######' to '## blah'
+%% strip trailing #'s as they are decorative only...
+type_atx(List) ->
+ {Sz, R} = get_atx_size(List),
+ A = [{{md, atx}, "#"}],
+ Type =
+ case is_all_hashes(R) of
+ true ->
+ if
+ Sz == 1 ->
+ normal;
+ ((Sz > 1) andalso (Sz < 6)) ->
+ Ns = integer_to_list(Sz - 1),
+ Hn = list_to_atom("h" ++ Ns),
+ {Hn, A};
+ ((Sz == 6) andalso (R == [])) ->
+ {h5, A};
+ ((Sz == 6) andalso (R == [{{lf, lf}, "\n"}])) ->
+ {h5, A};
+ ((Sz == 6) andalso (R == [{{lf, crlf}, "\r\n"}])) ->
+ {h5, A};
+ ((Sz == 6) andalso (R =/= [])) ->
+ {h6, A}
+ end;
+ false ->
+ Ns = integer_to_list(Sz),
+ Hn = list_to_atom("h" ++ Ns),
+ {Hn, strip_atx(R)}
+ end,
+ {Type, List}.
+
+is_all_hashes([]) -> true;
+is_all_hashes([{{md, atx}, _} | T]) -> is_all_hashes(T);
+is_all_hashes([{{lf, _}, _} | []]) -> true;
+is_all_hashes(_List) -> false.
+
+get_atx_size(List) -> g_atx_size1(List, 0).
+
+% this function also strips whitespace to the left...
+g_atx_size1([{{md, atx}, _} = A | T], N) when N == 6 -> {6, [A | T]};
+g_atx_size1([{{md, atx}, _} | T], N) -> g_atx_size1(T, N + 1);
+g_atx_size1([{{ws, _}, _} | T], N) -> g_atx_size1(T, N);
+g_atx_size1(List, N) -> {N, List}.
+
+strip_atx(List) -> reverse(s_atx1(reverse(List))).
+
+s_atx1([{{lf, _}, _}, {{md, atx}, _} | T]) -> s_atx1(T);
+s_atx1([{{md, atx}, _} | T]) -> s_atx1(T);
+s_atx1(List) -> List.
+
+type_setext_h1(List) -> type_s_h1_1(List, []).
+
+%% terminates on running out or new line
+type_s_h1_1([{{lf, _}, _} = L | []], Acc) -> {setext_h1, reverse([L | Acc])};
+type_s_h1_1([], Acc) -> {setext_h1, reverse(Acc)};
+type_s_h1_1([[] | T], Acc) -> type_s_h1_1(T, Acc);
+type_s_h1_1([{{md, eq}, _} = H | T], Acc) -> type_s_h1_1(T, [H | Acc]);
+type_s_h1_1(L, Acc) -> {normal, flatten([Acc | L])}.
+
+type_setext_h2(List) ->
+ case type_s_h2_1(List) of
+ h2_or_hr -> {h2_or_hr, List};
+ not_h2 -> {type_s_h2_2(trim_right(List)), List}
+ end.
+%% terminates on running out or new line
+type_s_h2_1([{{lf, _}, _} | []]) -> h2_or_hr;
+type_s_h2_1([]) -> h2_or_hr;
+type_s_h2_1([[] | T]) -> type_s_h2_1(T);
+type_s_h2_1([{{md, dash}, _} | T]) -> type_s_h2_1(T);
+type_s_h2_1(_L) -> not_h2.
+
+type_s_h2_2([{{md, dash}, _}, {{ws,_}, _},
+ {{md, dash}, _}, {{ws, _}, _},
+ {{md, dash}, _}]) -> hr;
+type_s_h2_2([{{md, dash}, _},
+ {{ws, _}, _} = WS | T]) -> {ul, make_list_str([WS | T])};
+type_s_h2_2(_List) -> normal.
+
+type_ws(List) ->
+ case type_ws1(List) of
+ blank -> {blank, List};
+ try_codeblock ->
+ case type_ws2(List) of
+ normal -> {normal, List};
+ {codeblock, Ret} -> {{codeblock, Ret}, List}
+ end
+ end.
+
+type_ws1([]) -> blank;
+type_ws1([{{lf, _}, _} | []]) -> blank;
+type_ws1([[] | T]) -> type_ws1(T);
+type_ws1([{{ws, _}, _} | T]) -> type_ws1(T);
+type_ws1(_L) -> try_codeblock.
+
+%% 4 or more spaces takes you over the limit
+%% (a tab is 4...)
+type_ws2([{{ws, tab}, _} | T]) -> {codeblock, T};
+type_ws2([{{ws, comp}, W} | T]) -> case gt(W, 4) of
+ {true, R} -> {codeblock, [R| T]};
+ false -> normal
+ end;
+type_ws2([{{ws, sp}, _} | _T]) -> normal.
+
+gt(String, Len) ->
+ ExpString = re:replace(String, "\t", " ", [{return, list}]),
+ ExpStringLen = length(ExpString),
+ if
+ ExpStringLen >= Len -> WS = string:substr(ExpString, Len + 1,
+ ExpStringLen),
+ {true, {{ws, sp}, WS}};
+ ExpStringLen < Len -> false
+ end.
+
+%% make a tag into a string
+make_tag_str(L, R) -> make_tag1(L, R, []).
+
+make_tag1([], _R, Acc) -> lists:reverse(Acc);
+make_tag1([{{{tag, _Type}, _Tag}, B} | T], R, Acc) ->
+ make_tag1(T, R, [B | Acc]);
+make_tag1([H | T], R, Acc) ->
+ make_tag1(T, R, [make_str([H], R) | Acc]).
+
+esc_tag(String) -> esc_t1(String, []).
+
+esc_t1([], Acc) -> lists:reverse(Acc);
+esc_t1([?NBSP | T], Acc) -> esc_t1(T, [?SPACE | Acc]); % non-breaking space to space
+esc_t1([H | T], Acc) -> esc_t1(T, [H | Acc]).
+
+%% if it is a list we need to discard the initial white space...
+make_list_str([{{ws, _}, _} | T] = List) ->
+ case is_double_indent(List) of
+ false -> T;
+ {true, R} -> flatten([{tags, "<pre><code>"} ,R ,
+ {tags, "</code></pre>\n\n"} | []])
+ end.
+
+%% All ref processing can ignore the original values 'cos those
+%% have already been captured at a higher level
+snip_ref(List) ->
+ case get_id(List) of
+ {[{_, Id}], Rest} -> {_Rest2, Ref, Title} = parse_inline(Rest),
+ Ref2 = trim(Ref),
+ Rs = htmlencode(make_plain_str(Ref2)),
+ Ts = make_plain_str(Title),
+ {inlineref, {Id, {Rs, Ts}}};
+ normal -> normal
+ end.
+
+get_id(List) -> g_id1(List, []).
+
+g_id1([], _Acc) -> normal;
+g_id1([{{inline, close}, _},
+ {{punc, colon}, _}, {{ws, _}, _}
+ | T], Acc) -> {reverse(Acc), T};
+g_id1([H | T], Acc) -> g_id1(T, [H | Acc]).
+
+parse_inline(List) -> p_in1(List, []).
+
+%% snip off the terminal linefeed (if there is one...)
+p_in1([{{lf, _}, _} | []], A) -> {[], reverse(A), []};
+p_in1([], A) -> {[], reverse(A), []};
+%% brackets can be escaped
+p_in1([{{punc, bslash}, _},
+ {bra, _} = B | T], A) -> p_in1(T, [B | A]);
+p_in1([{{punc, bslash}, _},
+ {ket, _} = B | T], A) -> p_in1(T, [B | A]);
+p_in1([{{punc, bslash}, _},
+ {{punc, doubleq}, _} = Q | T], A) -> p_in1(T, [Q | A]);
+p_in1([{{punc, bslash}, _},
+ {{punc, singleq}, _} = Q | T], A) -> p_in1(T, [Q | A]);
+%% these clauses capture the start of the title...
+p_in1([{{punc, doubleq}, _} | T], A) -> p_in2(T, reverse(A), doubleq, []);
+p_in1([{{punc, singleq}, _} | T], A) -> p_in2(T, reverse(A), singleq, []);
+p_in1([{bra, _} | T], A) -> p_in2(T, reverse(A), brackets, []);
+p_in1([{ket, _} | T], A) -> {T, reverse(A), []};
+p_in1([H | T], A) -> p_in1(T, [H | A]).
+
+%% this gets titles in single and double quotes
+%% the delimiter type is passed in as 'D'
+p_in2([], Url, _D, A) -> {[], Url, flatten(reverse(A))};
+%% brackets can be escaped
+p_in2([{{punc, bslash}, _},
+ {bra, _} = B | T], Url, D, A) -> p_in2(T, Url, D, [B | A]);
+p_in2([{{punc, bslash}, _},
+ {ket, _} = B | T], Url, D, A) -> p_in2(T, Url, D, [B | A]);
+%% quotes can be escaped
+p_in2([{{punc, bslash}, _},
+ {{punc, doubleq}, _}= Q | T], Url, D, A) -> p_in2(T, Url, D, [Q | A]);
+p_in2([{{punc, bslash}, _},
+ {{punc, singleq}, _} = Q | T], Url, D, A) -> p_in2(T, Url, D, [Q | A]);
+%% these clauses capture the end of the title and drop the delimiter...
+p_in2([{{punc, doubleq}, _} | T], Url, doubleq, A) -> p_in2(T, Url, none, A);
+p_in2([{{punc, singleq}, _} | T], Url, singleq, A) -> p_in2(T, Url, none, A);
+p_in2([{ket, _} | T], Url, brackets, A) -> p_in2(T, Url, none, A);
+%% terminator clause
+p_in2([{ket, _} | T], Url, none, A) -> {T, Url, flatten(reverse(A))};
+%% this clause silently discards stuff after the delimiter...
+p_in2([_H | T], Url, none, A) -> p_in2(T, Url, none, [A]);
+p_in2([H | T], Url, D, A) -> p_in2(T, Url, D, [H | A]).
+
+trim(String) -> trim_left(trim_right(String)).
+
+trim_right(String) -> reverse(trim_left(reverse(String))).
+
+trim_left([{{ws, _}, _} | T]) -> trim_left(T);
+trim_left([[] | T]) -> trim_left(T);
+trim_left(List) -> List.
+
+snip(List) -> List2 = reverse(List),
+ case List2 of
+ [{{lf, _}, _} | T] -> lists:reverse(T);
+ _ -> List
+ end.
+
+%% end of ref processing
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%
+%%% Build the Lexed Token List
+%%% This is a two part lexer, first it chunks the input and then on the second
+%%% pass it gathers it into lines and types the lines
+%%%
+%%% NOTE that there are two different styles of processing lines:
+%%% * markdown transformed
+%%% * block
+%%% inside block processing the whole text is dumped and just url encoded
+%%% and the original text is always maintained during the lexing/parsing
+%%% so that it can be recreated if the context requires it...
+%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+lex(String) -> merge_ws(l1(String, [], [])).
+
+merge_ws(List) -> m_ws1(List, []).
+
+m_ws1([], Acc) -> reverse(Acc);
+m_ws1([{{ws, _}, W1}, {{ws, _}, W2} | T], Acc) ->
+ m_ws1([{{ws, comp}, W1 ++ W2} | T], Acc);
+m_ws1([H | T], Acc) -> m_ws1(T, [H | Acc]).
+
+%% this is the terminal head which ends the parsing...
+l1([], [], A2) -> flatten(reverse(A2));
+l1([], A1, A2) -> l1([], [], [l2(A1) | A2]);
+%% these two heads capture opening and closing tags
+l1([$<, $/|T], A1, A2) -> {Tag, NewT} = closingdiv(T, []),
+ l1(NewT, [], [Tag, l2(A1) | A2]);
+l1([$< | T], A1, A2) -> {Tag, NewT} = openingdiv(T),
+ l1(NewT, [], [Tag , l2(A1) | A2]);
+%% these clauses are the normal lexer clauses
+l1([$= | T], A1, A2) -> l1(T, [], [{{md, eq}, "="}, l2(A1) | A2]);
+l1([$- | T], A1, A2) -> l1(T, [], [{{md, dash}, "-"}, l2(A1) | A2]);
+l1([$# | T], A1, A2) -> l1(T, [], [{{md, atx}, "#"}, l2(A1) | A2]);
+l1([$> | T], A1, A2) -> l1(T, [], [{{md, gt}, ">"}, l2(A1) | A2]);
+l1([$+ | T], A1, A2) -> l1(T, [], [{{md, plus}, "+"}, l2(A1) | A2]);
+l1([$* | T], A1, A2) -> l1(T, [], [{{md, star}, "*"}, l2(A1) | A2]);
+l1([$_ | T], A1, A2) -> l1(T, [], [{{md, underscore}, "_"}, l2(A1) | A2]);
+l1([$1 | T], A1, A2) -> l1(T, [], [{num, "1"}, l2(A1) | A2]);
+l1([$2 | T], A1, A2) -> l1(T, [], [{num, "2"}, l2(A1) | A2]);
+l1([$3 | T], A1, A2) -> l1(T, [], [{num, "3"}, l2(A1) | A2]);
+l1([$4 | T], A1, A2) -> l1(T, [], [{num, "4"}, l2(A1) | A2]);
+l1([$5 | T], A1, A2) -> l1(T, [], [{num, "5"}, l2(A1) | A2]);
+l1([$6 | T], A1, A2) -> l1(T, [], [{num, "6"}, l2(A1) | A2]);
+l1([$7 | T], A1, A2) -> l1(T, [], [{num, "7"}, l2(A1) | A2]);
+l1([$8 | T], A1, A2) -> l1(T, [], [{num, "8"}, l2(A1) | A2]);
+l1([$9 | T], A1, A2) -> l1(T, [], [{num, "9"}, l2(A1) | A2]);
+l1([$0 | T], A1, A2) -> l1(T, [], [{num, "0"}, l2(A1) | A2]);
+l1([$. | T], A1, A2) -> l1(T, [], [{{punc, fullstop}, "."}, l2(A1) | A2]);
+l1([$: | T], A1, A2) -> l1(T, [], [{{punc, colon}, ":"}, l2(A1) | A2]);
+l1([$' | T], A1, A2) -> l1(T, [], [{{punc, singleq}, "'"}, l2(A1) | A2]); %'
+l1([$" | T], A1, A2) -> l1(T, [], [{{punc, doubleq}, "\""}, l2(A1) | A2]); %"
+l1([$` | T], A1, A2) -> l1(T, [], [{{punc, backtick}, "`"}, l2(A1) | A2]); %"
+l1([$! | T], A1, A2) -> l1(T, [], [{{punc, bang}, "!"}, l2(A1) | A2]); %"
+l1([$\\ | T], A1, A2) -> l1(T, [], [{{punc, bslash}, "\\"}, l2(A1) | A2]); %"
+l1([$/ | T], A1, A2) -> l1(T, [], [{{punc, fslash}, "/"}, l2(A1) | A2]); %"
+l1([$( | T], A1, A2) -> l1(T, [], [{bra, "("}, l2(A1) | A2]);
+l1([$) | T], A1, A2) -> l1(T, [], [{ket, ")"}, l2(A1) | A2]);
+l1([$[ | T], A1, A2) -> l1(T, [], [{{inline, open}, "["}, l2(A1) | A2]);
+l1([$] | T], A1, A2) -> l1(T, [], [{{inline, close}, "]"}, l2(A1) | A2]);
+%% note there is a special 'whitespace' {{ws, none}, ""} which is used to generate non-space
+%% filling whitespace for cases like '*bob* is great' which needs a non-space filling
+%% whitespace prepended to trigger emphasis so it renders as "<em>bob</em> is great...
+%% that 'character' doesn't exist so isn't in the lexer but appears in the parser
+l1([?SPACE | T], A1, A2) -> l1(T, [], [{{ws, sp}, " "}, l2(A1) | A2]);
+l1([?TAB | T], A1, A2) -> l1(T, [], [{{ws, tab}, "\t"}, l2(A1) | A2]);
+l1([?NBSP | T], A1, A2) -> l1(T, [], [{{ws, sp}, "&nbsp"}, l2(A1) | A2]);
+l1([?CR, ?LF | T], A1, A2) -> l1(T, [], [{{lf, crlf}, [?CR , ?LF]}, l2(A1) | A2]);
+l1([?LF | T], A1, A2) -> l1(T, [], [{{lf, lf}, [?LF]}, l2(A1) | A2]);
+%% l1([?CR | T], A1, A2) -> l1(T, [], [{{lf, cr}, [?CR]}, l2(A1) | A2]);
+%% this final clause accumulates line fragments
+l1([H|T], A1, A2) -> l1(T, [H |A1] , A2).
+
+l2([]) -> [];
+l2(List) -> {string, flatten(reverse(List))}.
+
+%% need to put in regexes for urls and e-mail addies
+openingdiv(String) ->
+ case get_url(String) of
+ {{url, URL}, R1} -> {{url, URL}, R1};
+ not_url ->
+ case get_email_addie(String) of
+ {{email, EM}, R2} -> {{email, EM}, R2};
+ not_email -> openingdiv1(String, [])
+ end
+ end.
+
+% dumps out a list if it is not an opening div
+openingdiv1([], Acc) -> {flatten([{{punc, bra}, "<"}
+ | lex(reverse(Acc))]), []};
+openingdiv1([$/,$>| T], Acc) -> Acc2 = flatten(reverse(Acc)),
+ Acc3 = string:to_lower(Acc2),
+ [Tag | _T] = string:tokens(Acc3, " "),
+ {{{{tag, self_closing}, Tag}, "<"
+ ++ Acc2 ++ "/>"}, T};
+%% special for non-tags
+openingdiv1([$>| T], []) -> {[{{punc, bra}, "<"},
+ {{punc, ket}, ">"}], T};
+openingdiv1([$>| T], Acc) -> Acc2 = flatten(reverse(Acc)),
+ Acc3 = string:to_lower(Acc2),
+ [Tag | _T] = string:tokens(Acc3, " "),
+ {{{{tag, open}, Tag}, "<"
+ ++ Acc2 ++ ">"}, T};
+openingdiv1([H|T], Acc) -> openingdiv1(T, [H | Acc]).
+
+% dumps out a list if it is not an closing div
+closingdiv([], Acc) -> {flatten([{{punc, bra}, "<"},
+ {{punc, fslash}, "/"}
+ | lex(reverse(Acc))]), []};
+closingdiv([$>| T], Acc) -> Acc2 = flatten(reverse(Acc)),
+ Acc3 = string:to_lower(Acc2),
+ [Tag | _T] = string:tokens(Acc3, " "),
+ {{{{tag, close}, Tag}, "</"
+ ++ Acc2 ++ ">"}, T};
+closingdiv([H|T], Acc) -> closingdiv(T, [H | Acc]).
+
+get_url(String) -> HTTP_regex = "^(H|h)(T|t)(T|t)(P|p)(S|s)*://",
+ case re:run(String, HTTP_regex) of
+ nomatch -> not_url;
+ {match, _} -> get_url1(String, [])
+ end.
+
+get_url1([], Acc) -> URL = flatten(reverse(Acc)),
+ {{url, URL}, []};
+% allow escaped kets
+get_url1([$\\, $> | T], Acc) -> get_url1(T, [$>, $\\ | Acc]);
+get_url1([$> | T], Acc) -> URL = flatten(reverse(Acc)),
+ {{url, URL}, T};
+get_url1([H | T], Acc) -> get_url1(T, [H | Acc]).
+
+get_email_addie(String) ->
+ Snip_regex = ">",
+ case re:run(String, Snip_regex) of
+ nomatch -> not_email;
+ {match, [{N, _} | _T]} ->
+ {Possible, [$> | T]} = lists:split(N, String),
+ EMail_regex = "[a-z0-9!#$%&'*+/=?^_`{|}~-]+"
+ ++ "(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*"
+ ++ "@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+"
+ ++ "(?:[a-zA-Z]{2}|com|org|net|gov|mil"
+ ++ "|biz|info|mobi|name|aero|jobs|museum)",
+ case re:run(Possible, EMail_regex) of
+ nomatch -> not_email;
+ {match, _} -> {{email, Possible}, T}
+ end
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%
+%%% Internal functions
+%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+make_plain_str(List) -> m_plain(List, []).
+
+m_plain([], Acc) -> flatten(reverse(Acc));
+m_plain([{{ws, none}, none} | T], Acc) -> m_plain(T, [" " | Acc]);
+m_plain([{_, Str} | T], Acc) -> m_plain(T, [Str | Acc]).
+
+make_esc_str(List, Refs) -> m_esc(List, Refs, []).
+
+m_esc([], _R, A) -> flatten(reverse(A));
+m_esc([{tags, Tag} | T], R, A) -> m_esc(T, R, [{tags, Tag} | A]);
+m_esc([H | T], R, A) -> m_esc(T, R, [make_str([H], R) | A]).
+
+
+make_str(List, Refs) -> m_str1(List, Refs, []).
+
+m_str1([], _R, A) ->
+ Flat = flatten(reverse(A)),
+ htmlchars(Flat);
+m_str1([{{punc, bang}, B}, {{inline, open}, O} | T], R, A) ->
+ case get_inline(T, R, [], img) of
+ {Rest, {Url, Title, Acc}} -> Tag = [make_img_tag(Url, Acc, Title)],
+ m_str1(Rest, R, [Tag | A]);
+ {Rest, Tag} -> m_str1(Rest, R, [Tag, O, B | A])
+ end;
+%% escape inline open's...
+m_str1([{{punc, bslash}, _}, {{inline, open}, O} | T], R, A) ->
+ m_str1(T, R, [O | A]);
+m_str1([{{inline, open}, O} | T], R, A) ->
+ case get_inline(T, R, [], url) of
+ {Rest, {Url, Title, Acc}} ->
+ Tit = case Title of
+ [] -> [];
+ _ -> " title=\"" ++ Title ++ "\""
+ end,
+ Tag = [{tags, "<a href=\"" ++ Url ++ "\""
+ ++ Tit ++ ">"}, Acc,
+ {tags, "</a>"} | []],
+ m_str1(Rest, R, [Tag | A]);
+ {Rest, Tag} ->
+ m_str1(Rest, R, [Tag, O | A])
+ end;
+m_str1([{email, Addie} | T], R, A) ->
+ m_str1(T, R, [{tags, "\" />"}, Addie, {tags, "<a href=\"mailto:"}| A]);
+m_str1([{url, Url} | T], R, A) ->
+ m_str1(T, R, [ {tags, "</a>"}, Url, {tags, "\">"}, Url,
+ {tags, "<a href=\""} | A]);
+m_str1([{tags, _} = Tag | T], R, A) ->
+ m_str1(T, R, [Tag | A]);
+m_str1([{{{tag, Type}, Tag}, _} | T], R, A) ->
+ Tag2 = esc_tag(Tag),
+ TagStr = case Type of
+ open -> {tags, "&lt;" ++ Tag2 ++ "&gt;"};
+ close -> {tags, "&lt;/" ++ Tag2 ++ "&gt;"};
+ self_closing -> {tags, "&lt;" ++ Tag2 ++ " /&gt;"}
+ end,
+ m_str1(T, R, [TagStr | A]);
+m_str1([{_, Orig} | T], R, A) ->
+ m_str1(T, R, [Orig | A]).
+
+% if the inline doesn't terminate its not an inline...
+get_inline([], _R, A, _) ->
+ {[], make_plain_str(reverse(A))};
+% a url can contain an image inline
+get_inline([{{punc, bang}, _B}, {{inline, open}, _O} | T], R, A, url) ->
+ {Rest, {Url, Title, Acc}} = get_inline(T, R, A, img),
+ Tag = make_img_tag(Url, Acc, Title),
+ % We double tag the tag so that it can get through the flatteners..
+ get_inline(Rest, R, [{tags, Tag} | A], url);
+get_inline([{{inline, close}, _}, {bra, _} | T], _R, A, _) ->
+ {Rest, Url, Title} = parse_inline(T),
+ Tag = {string:strip(make_plain_str(Url)),
+ make_plain_str(Title),
+ make_plain_str(reverse(A))},
+ {Rest, Tag};
+%% for img's but not url's you need to allow a single space between them
+%% to be compatible with showdown :(
+get_inline([{{inline, close}, _}, {{ws, sp}, _}, {bra, _} | T], _R, A, img) ->
+ {Rest, Url, Title} = parse_inline(T),
+ Tag = {string:strip(make_plain_str(Url)),
+ make_plain_str(Title),
+ make_plain_str(reverse(A))},
+ {Rest, Tag};
+%% this clause detects references to images/links...
+get_inline([{{inline, close}, _}, {{inline, open}, _} | T], R, A, _) ->
+ Text = make_plain_str(reverse(A)),
+ case get_id_diff(T) of
+ normal -> {[], make_plain_str(reverse(A))};
+ {[{_, Id}], Rest} ->
+ {Url, Title} = case lists:keyfind(Id, 1, R) of
+ false -> {"", ""};
+ {Id, {U, Tit}} -> {U, Tit}
+ end,
+ Tag = {Url, Title, Text},
+ {Rest, Tag};
+ _Other -> {[], make_plain_str(reverse(A))} % random failing id's
+ end;
+%% so does this one - just delete the space and rethrow it
+get_inline([{{inline, close}, _} = C , {{ws, _}, _},
+ {{inline, open}, _} = O | T], R, A, Type) ->
+ get_inline([C, O | T], R, A, Type);
+%% this is the markdown extension clause that takes an id in square brackets without
+%% any additional stuff as a valid id marker
+get_inline([{{inline, close}, _} | T], R, A, _) ->
+ Id = make_plain_str(reverse(A)),
+ case lists:keyfind(Id, 1, R) of
+ false -> {T, flatten([Id , $]])};
+ {Id, {Url, Title}} -> Tag = {Url, Title, Id},
+ {T, Tag}
+ end;
+get_inline([H | T], R, A, Type) ->
+ get_inline(T, R, [H | A], Type).
+
+get_id_diff(List) -> g_id_diff1(List, []).
+
+g_id_diff1([], _Acc) -> normal;
+g_id_diff1([{{inline, close}, _}| T], Acc) -> {reverse(Acc), T};
+g_id_diff1([H | T], Acc) -> g_id_diff1(T, [H | Acc]).
+
+%% convert ascii into html characters
+htmlencode(List) ->
+ htmlencode(List, []).
+
+htmlencode([], Acc) ->
+ lists:flatten(lists:reverse(Acc));
+
+htmlencode([$& | Rest], Acc) -> htmlencode(Rest, ["&amp;" | Acc]);
+htmlencode([$< | Rest], Acc) -> htmlencode(Rest, ["&lt;" | Acc]);
+htmlencode([$> | Rest], Acc) -> htmlencode(Rest, ["&gt;" | Acc]);
+htmlencode([160 | Rest], Acc) -> htmlencode(Rest, ["&nbsp;" | Acc]);
+htmlencode([Else | Rest], Acc) -> htmlencode(Rest, [Else | Acc]).
+
+htmlchars(List) -> htmlchars1(List, []).
+
+htmlchars1([], Acc) -> flatten(reverse(Acc));
+%% tags are just wheeched out unescaped
+htmlchars1([{tags, Tag} | T], Acc) -> htmlchars1(T, [Tag | Acc]);
+%% line ends are pushed to a space..
+htmlchars1([?CR, ?LF | T], Acc) -> htmlchars1(T, ["\n" | Acc]);
+htmlchars1([?LF | T], Acc) -> htmlchars1(T, ["\n" | Acc]);
+htmlchars1([?CR | T], Acc) -> htmlchars1(T, ["\r" | Acc]);
+%% emphasis is a bit strange - must be preceeded by or followed by
+%% white space to work and can also be escaped
+%% there is a non-space filling white space represented by the atom 'none'
+%% which is created in the parser (NOT IN THE LEXER!) and which triggers
+%% emphasis or strong tags being turned on...
+htmlchars1([$\\, $*, $*, $* | T], A) -> htmlchars1(T, [$*, $*, $* | A]);
+htmlchars1([$*, $*, $* | T], A) -> {T2, NewA} = superstrong(T, $*),
+ htmlchars1(T2, [NewA | A]);
+% repeat for strong
+htmlchars1([$\\, $*, $* | T], A) -> htmlchars1(T, [$*, $* | A]);
+htmlchars1([$*, $* | T], A) -> {T2, NewA} = strong(T, $*),
+ htmlchars1(T2, [NewA | A]);
+%% likewise for strong
+htmlchars1([$\\, $* | T], A) -> htmlchars1(T, [$* | A]);
+htmlchars1([$* | T], A) -> {T2, NewA} = emphasis(T, $*),
+ htmlchars1(T2, [NewA | A]);
+%% and again for underscores
+htmlchars1([$\\, $_, $_, $_ | T], A) -> htmlchars1(T, [$_, $_, $_ | A]);
+%% the none atom is the non-space filling whitespace
+htmlchars1([$_, $_, $_ | T], A) -> {T2, NewA} = superstrong(T, $_),
+ htmlchars1(T2, [NewA | A]);
+% and strong
+%% and again for underscores
+htmlchars1([$\\, $_, $_ | T], A) -> htmlchars1(T, [$_, $_ | A]);
+htmlchars1([$_, $_ | T], A) -> {T2, NewA} = strong(T, $_),
+ htmlchars1(T2, [NewA | A]);
+%% likewise for strong
+htmlchars1([$\\, $_ | T], A) -> htmlchars1(T, [$_ | A]);
+htmlchars1([$_ | T], A) -> {T2, NewA} = emphasis(T, $_),
+ htmlchars1(T2, [NewA | A]);
+%% handle backtick escaping
+htmlchars1([$\\, $` | T], A) -> htmlchars1(T, [$` | A]);
+htmlchars1([$`, $` | T], A) -> {T2, NewA} = dblcode(T),
+ htmlchars1(T2, [NewA | A]);
+htmlchars1([$` | T], A) -> {T2, NewA} = code(T),
+ htmlchars1(T2, [NewA | A]);
+htmlchars1([?COPY | T], A) -> htmlchars1(T, ["&copy;" | A]);
+htmlchars1([?AMP | T], A) -> htmlchars1(T, ["&amp;" | A]);
+htmlchars1([$& | T], A) -> htmlchars1(T, ["&amp;" | A]);
+htmlchars1([$< | T], A) -> htmlchars1(T, ["&lt;" | A]);
+htmlchars1([?NBSP | T], A) -> htmlchars1(T, ["&nbsp;" | A]);
+htmlchars1([?TAB | T], A) -> htmlchars1(T, [" " | A]);
+htmlchars1([none | T], A) -> htmlchars1(T, A);
+htmlchars1([H | T], A) -> htmlchars1(T, [H | A]).
+
+emphasis(List, Delim) -> interpolate(List, Delim, "em", "" ,[]).
+strong(List, Delim) -> interpolate2(List, Delim, "strong", "", []).
+superstrong(List, Delim) -> interpolate3(List, Delim, "strong", "em", "", []).
+dblcode(List) -> {T, Tag} = interpolate2(List, $`, "code", "" ,[]),
+ {T, "<pre>" ++ Tag ++ "</pre>"}.
+code(List) -> interpolateX(List, $`, "code", "", []).
+
+%% pain in the arse - sometimes the closing tag should be preceded by
+%% a "\n" and sometimes not in showdown.js
+%% interpolate is for single delimiters...
+interpolateX([], Delim, _Tag, _X, Acc) ->
+ {[], [Delim] ++ htmlchars(reverse(Acc))};
+interpolateX([Delim | T], Delim, Tag, X, Acc) ->
+ {T, "<" ++ Tag ++ ">" ++ htmlchars(reverse(Acc)) ++ X ++
+ "</" ++ Tag ++ ">"};
+interpolateX([H | T], Delim, Tag, X, Acc) ->
+ interpolateX(T, Delim, Tag, X, [H | Acc]).
+
+interpolate([], Delim, _Tag, _X, Acc) ->
+ {[], [Delim] ++ htmlchars(reverse(Acc))};
+interpolate([Delim | T], Delim, Tag, X, Acc) ->
+ {T, "<" ++ Tag ++ ">" ++ htmlchars(reverse(Acc)) ++ X ++
+ "</" ++ Tag ++ ">"};
+interpolate([H | T], Delim, Tag, X, Acc) ->
+ interpolate(T, Delim, Tag, X, [H | Acc]).
+
+%% interpolate two is for double delimiters...
+interpolate2([], Delim, _Tag, _X, Acc) ->
+ {[], [Delim] ++ [Delim] ++ htmlchars(reverse(Acc))};
+interpolate2([Delim, Delim | T], Delim, Tag, X, Acc) ->
+ {T, "<" ++ Tag ++ ">" ++ htmlchars(reverse(Acc)) ++ X ++
+ "</" ++ Tag ++ ">"};
+interpolate2([H | T], Delim, Tag, X, Acc) ->
+ interpolate2(T, Delim, Tag, X, [H | Acc]).
+
+%% interpolate three is for double delimiters...
+interpolate3([], D, _Tag1, Tag2, _X, Acc) ->
+ {[], "<" ++ Tag2 ++ ">" ++ [D] ++ "</" ++ Tag2 ++ ">"
+ ++ htmlchars(reverse(Acc))};
+interpolate3([D, D, D | T], D, Tag1, Tag2, _X, Acc) ->
+ {T, "<" ++ Tag1 ++ ">" ++ "<" ++ Tag2 ++ ">"
+ ++ htmlchars(reverse(Acc)) ++ "</" ++ Tag2 ++ ">"
+ ++ "</" ++ Tag1 ++ ">"};
+interpolate3([H | T], D, Tag1, Tag2, X, Acc) ->
+ interpolate3(T, D, Tag1, Tag2, X, [H | Acc]).
+
+make_img_tag(Url, Acc, Title) ->
+ {tags, "<img src=\"" ++ Url ++ "\""
+ ++ " alt=\"" ++ Acc ++ "\""
+ ++ " title=\"" ++ Title ++ "\""
+ ++ " />"}.
+
+%%%-------------------------------------------------------------------
+%%%
+%%% Unit Tests
+%%%
+%%%-------------------------------------------------------------------
+
+% -include("markdown_tests.hrl").
View
12 src/markdown/markdown_LICENSE
@@ -0,0 +1,12 @@
+License
+
+Copyright © 2009, Gordon Guthrie
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+ * Neither the name “erlmarkdown” nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+This software is provided by the copyright holders and contributors “as is” and any express or implied warranties, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose are disclaimed. In no event shall the copyright owner or contributors be liable for any direct, indirect, incidental, special, exemplary, or consequential damages (including, but not limited to, procurement of substitute goods or services; loss of use, data, or profits; or business interruption) however caused and on any theory of liability, whether in contract, strict liability, or tort (including negligence or otherwise) arising in any way out of the use of this software, even if advised of the possibility of such damage.
View
306 src/markdown/z_html2markdown.erl
@@ -0,0 +1,306 @@
+%% @author Marc Worrell <marc@worrell.nl>
+%% @copyright 2011 Marc Worrell
+
+%% @doc Convert a html text to markdown syntax.
+%% This is used when editing TinyMCE texts with the markdown editor.
+
+%% Copyright 2011 Marc Worrell
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+
+-module(z_html2markdown).
+-author("Marc Worrell <marc@worrell.nl>").
+
+-export([
+ convert/1,
+ convert/2
+]).
+
+-include("zotonic.hrl").
+
+% Accumulated markdown state (tree walker)
+-record(md, {a=[]}).
+
+% Recursive context dependent markdown state (context)
+-record(ms, {li=none, indent=[], allow_html=true}).
+
+-compile({no_auto_import,[max/2]}).
+
+convert(Html) ->
+ convert(Html, []).
+
+%% @doc Convert a html text to markdown format. Assumes the html has been sanitized and normalized.
+convert(Html, Options) when is_binary(Html) ->
+ convert1(<<"<sanitize>",Html/binary,"</sanitize>">>, Options);
+convert(Html, Options) when is_list(Html) ->
+ convert1(iolist_to_binary(["<sanitize>", Html, "</sanitize>"]), Options).
+
+convert1(Html, Options) ->
+ Parsed = mochiweb_html:parse(Html),
+ {Text, M} = to_md(Parsed, #md{}, set_options(Options, #ms{})),
+ list_to_binary([trimnl(iolist_to_binary(Text)), expand_anchors(M)]).
+
+ set_options([], S) ->
+ S;
+ set_options([no_html|T], S) ->
+ set_options(T, S#ms{allow_html=false}).
+
+
+to_md(B, M, _S) when is_binary(B) ->
+ {escape_html_text(B, <<>>), M};
+to_md({comment, _Text}, M, _S) ->
+ {<<>>, M};
+
+to_md({<<"h1">>, _Args, Enclosed}, M, S) ->
+ header($=, Enclosed, M, S);
+to_md({<<"h2">>, _Args, Enclosed}, M, S) ->
+ header($-, Enclosed, M, S);
+to_md({<<"h",N>>, _Args, Enclosed}, M, S) when N >= $1, N =< $6 ->
+ {EncText, M1} = to_md(Enclosed, M, S),
+ {[nl(S), nl(S), lists:duplicate(N-$0, "#"), 32, EncText, nl(S), nl(S)], M1};
+
+to_md({<<"hr">>, [], []}, M, S) ->
+ {[nl(S), nl(S), <<"---">>, nl(S), nl(S)], M};
+
+to_md({<<"br">>, [], []}, M, S) ->
+ {[32, 32, nl(S)], M};
+to_md({<<"em">>, _Args, Enclosed}, M, S) ->
+ {EncText, M1} = to_md(Enclosed, M, S),
+ {[$*, trl(EncText), $*], M1};
+to_md({<<"i">>, _Args, Enclosed}, M, S) ->
+ {EncText, M1} = to_md(Enclosed, M, S),
+ {[$*, trl(EncText), $*], M1};
+to_md({<<"strong">>, _Args, Enclosed}, M, S) ->
+ {EncText, M1} = to_md(Enclosed, M, S),
+ {[$*, $*, trl(EncText), $*, $*], M1};
+to_md({<<"b">>, _Args, Enclosed}, M, S) ->
+ {EncText, M1} = to_md(Enclosed, M, S),
+ {[$*, $*, trl(EncText), $*, $*], M1};
+
+to_md({<<"p">>, _Args, Enclosed}, M, S) ->
+ {EncText, M1} = to_md(Enclosed, M, S),
+ {[trl(EncText), nl(S), nl(S)], M1};
+
+to_md({<<"a">>, Args, Enclosed}, M, S) ->
+ case proplists:get_value(<<"href">>, Args) of
+ undefined ->
+ to_md(Enclosed, M, S);
+ Href ->
+ {EncText, M1} = to_md(Enclosed, M, S),
+ {M2,RefNr} = add_anchor(Href, M1),
+ {[ $[, trl(EncText), $],$[,integer_to_list(RefNr),$] ], M2}
+ end;
+
+to_md({<<"code">>, _Args, Enclosed}, M, S) ->
+ {EncText, M1} = to_md(Enclosed, M, S),
+ {[$`, z_string:trim(EncText), $`], M1};
+to_md({<<"pre">>, _Args, [{<<"code">>, _, Enclosed}]}, M, S) ->
+ S1 = S#ms{indent=[code|S#ms.indent]},
+ {EncText, M1} = to_md(Enclosed, M, S1),
+ {[nl(S1), trl(EncText), nl(S)], M1};
+to_md({<<"pre">>, _Args, Enclosed}, M, S) ->
+ S1 = S#ms{indent=[code|S#ms.indent]},
+ {EncText, M1} = to_md(Enclosed, M, S1),
+ {[nl(S1), trl(EncText), nl(S)], M1};
+
+to_md({<<"quote">>, _Args, Enclosed}, M, S) ->
+ S1 = S#ms{indent=[quote|S#ms.indent]},
+ {EncText, M1} = to_md(Enclosed, M, S1),
+ {[nl(S1), trl(EncText), nl(S)], M1};
+
+to_md({<<"ul">>, _Args, Enclosed}, M, S) ->
+ {EncText, M1} = to_md(Enclosed, M, S#ms{li=ul}),
+ {[nl(S), trl(EncText), nl(S)], M1};
+to_md({<<"ol">>, _Args, Enclosed}, M, S) ->
+ {EncText, M1} = to_md(Enclosed, M, S#ms{li=ol}),
+ {[nl(S), trl(EncText), nl(S)], M1};
+to_md({<<"li">>, _Args, Enclosed}, M, S) ->
+ Bullet = case S#ms.li of
+ ol -> "1. ";
+ ul -> "* "
+ end,
+ {EncText, M1} = to_md(Enclosed, M, S#ms{li=none, indent=[S#ms.li|S#ms.indent]}),
+ {[nl(S), Bullet, 32, trl(EncText)], M1};
+
+to_md({<<"table">>, _Args, _Enclosed} = Html, M, S) when S#ms.allow_html ->
+ {flatten_html(Html), M};
+
+to_md({<<"head">>, _Args, _Enclosed}, M, _S) ->
+ {[], M};
+to_md({<<"script">>, _Args, _Enclosed}, M, _S) ->
+ {[], M};
+
+to_md({_, _, Enclosed}, M, S) ->
+ to_md(Enclosed, M, S);
+to_md(L, M, S) when is_list(L) ->
+ lists:foldl(fun(Elt,{AT,AM}) ->
+ {AT1,AM1} = to_md(Elt, AM, S),
+ {AT++[AT1], AM1}
+ end, {[], M}, L).
+
+
+header(Char, Enclosed, M, S) ->
+ {EncText, M1} = to_md(Enclosed, M, S),
+ Trimmed = trl(EncText),
+ case trl(EncText) of
+ [] ->
+ {[], M1};
+ <<>> ->
+ {[], M1};
+ Trimmed ->
+ {[nl(S), nl(S), Trimmed, nl(S), lists:duplicate(max(len(Trimmed), 3), [Char]), nl(S), nl(S)], M1}
+ end.
+
+
+max(A,B) when A > B -> A;
+max(_A,B) -> B.
+
+
+nl(#ms{indent=[]}) ->
+ $\n;
+nl(#ms{indent=Indent}) ->
+ nl1(Indent, []).
+
+ nl1([], Acc) ->
+ [$\n|Acc];
+ nl1([ul|Rest], Acc) ->
+ nl1(Rest, [" "|Acc]);
+ nl1([ol|Rest], Acc) ->
+ nl1(Rest, [" "|Acc]);
+ nl1([code|Rest], Acc) ->
+ nl1(Rest, [" "|Acc]);
+ nl1([quote|Rest], Acc) ->
+ nl1(Rest, ["> "|Acc]).
+
+
+%% @doc Simple recursive length of an iolist
+len(EncText) when is_binary(EncText) ->
+ size(EncText);
+len(N) when is_integer(N) ->
+ 1;
+len([H|L]) ->
+ len(H) + len(L);
+len([]) ->
+ 0.
+
+
+
+%% @doc Escape <, >, ' and " in texts (& is already removed or escaped).
+escape_html_text(<<>>, Acc) ->
+ Acc;
+escape_html_text(<<${, T/binary>>, Acc) ->
+ escape_html_text(T, <<Acc/binary, "\\{">>);
+escape_html_text(<<$}, T/binary>>, Acc) ->
+ escape_html_text(T, <<Acc/binary, "\\}">>);
+escape_html_text(<<$[, T/binary>>, Acc) ->
+ escape_html_text(T, <<Acc/binary, "\\[">>);
+escape_html_text(<<$], T/binary>>, Acc) ->
+ escape_html_text(T, <<Acc/binary, "\\]">>);
+escape_html_text(<<$_, T/binary>>, Acc) ->
+ escape_html_text(T, <<Acc/binary, "\\_">>);
+escape_html_text(<<$*, T/binary>>, Acc) ->
+ escape_html_text(T, <<Acc/binary, "\\*">>);
+escape_html_text(<<$`, T/binary>>, Acc) ->
+ escape_html_text(T, <<Acc/binary, "``">>);
+escape_html_text(<<$<, T/binary>>, Acc) ->
+ escape_html_text(T, <<Acc/binary, "&lt;">>);
+escape_html_text(<<$>, T/binary>>, Acc) ->
+ escape_html_text(T, <<Acc/binary, "&gt;">>);
+escape_html_text(<<$", T/binary>>, Acc) ->
+ escape_html_text(T, <<Acc/binary, "&quot;">>);
+escape_html_text(<<$', T/binary>>, Acc) ->
+ escape_html_text(T, <<Acc/binary, "&#39;">>);
+escape_html_text(<<32, T/binary>>, Acc) ->
+ escape_html_text(trl(T), <<Acc/binary, 32>>);
+escape_html_text(<<9, T/binary>>, Acc) ->
+ escape_html_text(trl(T), <<Acc/binary, 32>>);
+escape_html_text(<<$\n, T/binary>>, Acc) ->
+ escape_html_text(trl(T), <<Acc/binary, 32>>);
+escape_html_text(<<C, T/binary>>, Acc) ->
+ escape_html_text(T, <<Acc/binary, C>>).
+
+%% @doc Escape <, > (for in comments)
+escape_html_comment(<<>>, Acc) ->
+ Acc;
+escape_html_comment(<<$<, T/binary>>, Acc) ->
+ escape_html_comment(T, <<Acc/binary, "&lt;">>);
+escape_html_comment(<<$>, T/binary>>, Acc) ->
+ escape_html_comment(T, <<Acc/binary, "&gt;">>);
+escape_html_comment(<<C, T/binary>>, Acc) ->
+ escape_html_comment(T, <<Acc/binary, C>>).
+
+
+trimnl(<<$\n, Rest/binary>>) ->
+ trimnl(Rest);
+trimnl(B) ->
+ B.
+
+trl(B) ->
+ z_string:trim_left(B).
+
+
+
+% @todo: check if the Href is already defined, if so return existing index
+add_anchor(Href, M) ->
+ case indexof(Href, M#md.a, 1) of
+ undefined ->
+ {M#md{a=M#md.a ++ [Href]}, length(M#md.a)+1};
+ N ->
+ {M, N}
+ end.
+
+ indexof(_A, [], _N) -> undefined;
+ indexof(A, [A|_], N) -> N;
+ indexof(A, [_|R], N) -> indexof(A, R, N+1).
+
+
+expand_anchors(#md{a = []}) ->
+ [];
+expand_anchors(#md{a = As}) ->
+ [10 | expand_anchor(As, 1, []) ].
+
+ expand_anchor([], _, Acc) ->
+ lists:reverse(Acc);
+ expand_anchor([A|As], N, Acc) ->
+ Link = [ 32, 32, $[, integer_to_list(N), $], $:, 32, A, 10 ],
+ expand_anchor(As, N+1, [Link|Acc]).
+
+
+
+flatten_html(Text) when is_binary(Text) ->
+ z_html:escape(Text);
+flatten_html({comment, _Text}) ->
+ [];
+flatten_html({Tag, Args, Enclosed}) ->
+ case Enclosed == [] andalso is_self_closing(Tag) of
+ true ->
+ [ $<, Tag, flatten_args(Args), $/, $> ];
+ false ->
+ [
+ $<, Tag, flatten_args(Args), $>,
+ [ flatten_html(Enc) || Enc <- Enclosed ],
+ $<, $/, Tag, $>
+ ]
+ end.
+
+ is_self_closing(<<"img">>) -> true;
+ is_self_closing(<<"br">>) -> true;
+ is_self_closing(<<"hr">>) -> true;
+ is_self_closing(_) -> false.
+
+ flatten_args(Args) ->
+ [ flatten_arg(Arg) || Arg <- Args ].
+
+ flatten_arg({Name, Value}) ->
+ [ 32, Name, $=, $", z_html:escape(Value), $" ].
View
40 src/markdown/z_markdown.erl
@@ -0,0 +1,40 @@
+%% @author Marc Worrell <marc@worrell.nl>
+%% @copyright 2011 Marc Worrell
+
+%% @doc Convert markdown to/from html.
+
+%% Copyright 2011 Marc Worrell
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+-module(z_markdown).
+
+-export([
+ to_html/1,
+ to_markdown/1,
+ to_markdown/2
+]).
+
+
+to_markdown(Html) ->
+ z_html2markdown:convert(Html).
+
+to_markdown(Html, Options) ->
+ z_html2markdown:convert(Html, Options).
+
+
+to_html(Markdown) when is_binary(Markdown) ->
+ markdown:conv(binary_to_list(Markdown));
+to_html(Markdown) ->
+ markdown:conv(Markdown).
+
Please sign in to comment.
Something went wrong with that request. Please try again.