Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Backport fixes to Twitter filter to 0.8.

  • Loading branch information...
commit 4f2e6540627178967d0b7791fd9195352ccec2b3 1 parent 10e59ee
@arjan arjan authored
Showing with 265 additions and 56 deletions.
  1. +94 −56 modules/mod_twitter/filters/filter_twitter.erl
  2. +171 −0 src/support/z_url.erl
View
150 modules/mod_twitter/filters/filter_twitter.erl
@@ -1,8 +1,8 @@
%% @author Arjan Scherpenisse <arjan@scherpenisse.net>
-%% @copyright 2010 Arjan Scherpenisse
+%% @copyright 2010-2012 Arjan Scherpenisse
%% @doc 'twitter' filter, make a tweet from twitter look nice
-%% Copyright 2010 Arjan Scherpenisse
+%% Copyright 2010-2012 Arjan Scherpenisse
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -17,96 +17,134 @@
%% limitations under the License.
-module(filter_twitter).
--export([twitter/2]).
+-export([
+ twitter/2,
+ twitter/3
+]).
+%% Number of characters for truncating an url
+-define(URL_TRUNCATE, 50).
-twitter(undefined, _Context) ->
- undefined;
-twitter(Input, Context) when is_binary(Input) ->
- twitter1(Input, 0, Context);
-twitter(Input, _Context) ->
- Input.
-twitter1(Input, Index, Context) when is_binary(Input) ->
+twitter(undefined, _Context) -> undefined;
+twitter(Input, Context) when is_binary(Input) -> iolist_to_binary(twitter1(Input, 0, [], Context));
+twitter(Input, _Context) -> Input.
+
+%% @doc filter with options, only option for now: url_location (follows url shorteners)
+twitter(Input, Options, Context) ->
+ twitter1(Input, 0, Options, Context).
+
+
+twitter1(Input, Index, Opts, Context) when is_binary(Input) ->
case Input of
- <<Pre:Index/binary, "http://", Post/binary>> ->
- process_binary_match(Pre, <<>>, size(Post), twitter1_url("http://", Post, 0, Context));
- <<Pre:Index/binary, "https://", Post/binary>> ->
- process_binary_match(Pre, <<>>, size(Post), twitter1_url("https://", Post, 0, Context));
- <<Pre:Index/binary, $@, Post/binary>> ->
- process_binary_match(Pre, <<>>, size(Post), twitter1_at(Post, 0, Context));
- <<_:Index/binary, $&, $#, _/binary>> ->
- twitter1(Input, Index + 2, Context);
- <<Pre:Index/binary, $#, Post/binary>> ->
- process_binary_match(Pre, <<>>, size(Post), twitter1_hash(Post, 0, Context));
- <<_:Index/binary, _/binary>> ->
- twitter1(Input, Index + 1, Context);
- _ ->
- Input
+ <<Pre:Index/binary, "http://", Post/binary>> -> [Pre, twitter1_url(<<"http://">>, Post, 0, Opts, Context)];
+ <<Pre:Index/binary, "https://", Post/binary>> -> [Pre, twitter1_url(<<"https://">>, Post, 0, Opts, Context)];
+ <<Pre:Index/binary, "ftp://", Post/binary>> -> [Pre, twitter1_url(<<"ftp://">>, Post, 0, Opts, Context)];
+ <<Pre:Index/binary, "ftps://", Post/binary>> -> [Pre, twitter1_url(<<"ftps://">>, Post, 0, Opts, Context)];
+ <<Pre:Index/binary, "mailto:", Post/binary>> -> [Pre, twitter1_url(<<"mailto:">>, Post, 0, Opts, Context)];
+ <<Pre:Index/binary, $&, $#, Post/binary>> -> [Pre, $&, $#, twitter1(Post, 0, Opts, Context)];
+ <<Pre:Index/binary, $@, Post/binary>> -> [Pre, twitter1_at(Post, 0, Opts, Context)];
+ <<Pre:Index/binary, $#, Post/binary>> -> [Pre, twitter1_hash(Post, 0, Opts, Context)];
+ <<_:Index/binary, _/binary>> -> twitter1(Input, Index + 1, Opts, Context);
+ _ -> Input
end.
-twitter1_url(Pre, Input, Index, Context) ->
+twitter1_url(Pre, Input, Index, Opts, Context) ->
case Input of
+ <<_:Index/binary, "&#38;", _/binary>> ->
+ twitter1_url(Pre, Input, Index + 5, Opts, Context);
+ <<_:Index/binary, "&amp;", _/binary>> ->
+ twitter1_url(Pre, Input, Index + 5, Opts, Context);
<<Url:Index/binary, Char, Post/binary>> ->
- case z_utils:url_valid_char(Char) of
- true ->
- twitter1_url(Pre, Input, Index + 1, Context);
- false ->
- Html = ["<a href=\"", Pre, Url, "\">", Pre, Url, "</a>"],
- process_binary_match(<<>>, [Html, Char], size(Post), twitter1(Post, 0, Context))
+ case Char /= $& andalso z_url:url_valid_char(Char) of
+ true -> twitter1_url(Pre, Input, Index + 1, Opts, Context);
+ false -> twitter1_url_anchor(Pre, Url, <<Char, Post/binary>>, Opts, Context)
end;
<<Url:Index/binary>> ->
- Html = ["<a href=\"", Pre, Url, "\">", Pre, Url, "</a>"],
- process_binary_match(<<>>, Html, 0, <<>>);
+ twitter1_url_anchor(Pre, Url, <<>>, Opts, Context);
_ ->
Input
end.
-twitter1_at(Input, Index, Context) ->
+ twitter1_url_anchor(Pre, <<>>, Post, Opts, Context) ->
+ [Pre, twitter1(Post, 0, Opts, Context)];
+ twitter1_url_anchor(Pre, Url, Post, Opts, Context) ->
+ Length1 = size(Url) - 1,
+ <<Url1:Length1/binary,LastChar>> = Url,
+ Html = case is_url_truncatable(LastChar) of
+ true -> [ twitter1_url_html(Pre, Url1, Opts), LastChar];
+ false -> twitter1_url_html(Pre, Url, Opts)
+ end,
+ [Html, twitter1(Post, 0, Opts, Context)].
+
+
+ % Create the html link, follow the url to remove any url shortener.
+ twitter1_url_html(Pre, Url, Opts) ->
+ case proplists:get_value(url_location, Opts, false) of
+ true ->
+ Url2 = z_url:location(<<Pre/binary,Url/binary>>),
+ Text = z_string:truncate(z_url:remove_protocol(Url2), ?URL_TRUNCATE),
+ ["<a href=\"", Url2, "\">", Text, "</a>"];
+ false ->
+ ["<a href=\"", Pre, Url, "\">", Url, "</a>"]
+ end.
+
+
+ is_url_truncatable($.) -> true;
+ is_url_truncatable($;) -> true;
+ is_url_truncatable($#) -> true;
+ is_url_truncatable($,) -> true;
+ is_url_truncatable($') -> true;
+ is_url_truncatable($") -> true;
+ is_url_truncatable($?) -> true;
+ is_url_truncatable($!) -> true;
+ is_url_truncatable($/) -> true;
+ is_url_truncatable($+) -> true;
+ is_url_truncatable($%) -> true;
+ is_url_truncatable(_) -> false.
+
+
+twitter1_at(Input, Index, Opts, Context) ->
case Input of
- <<Name:Index/binary, Char, Post/binary>> when not(Char >= $a andalso Char < $z+1
+ <<Name:Index/binary, Char, Post/binary>> when not(Char >= $a andalso Char =< $z
orelse
- Char >= $A andalso Char < $Z+1
+ Char >= $A andalso Char =< $Z
orelse
- Char >= $0 andalso Char < $9+1
+ Char >= $0 andalso Char =< $9
orelse Char =:= $_ orelse Char =:= $.
) ->
Html = twitter_at_url(Name),
- process_binary_match(<<>>, [Html, Char], size(Post), twitter1(Post, 0, Context));
- <<_:Index/binary, _/binary>> ->
- twitter1_at(Input, Index + 1, Context);
+ [Html, twitter1(<<Char, Post/binary>>, 0, Opts, Context)];
+ <<Name:Index/binary>> ->
+ twitter_at_url(Name);
+ <<_:Index/binary, _/binary>> ->
+ twitter1_at(Input, Index + 1, Opts, Context);
_ ->
Input
end.
twitter_at_url(Name) ->
- ["@<a href=\"http://twitter.com/", Name, "\">", Name, "</a>"].
+ ["<a href=\"http://twitter.com/", Name, "\">@", Name, "</a>"].
-twitter1_hash(Input, Index, Context) ->
+twitter1_hash(Input, Index, Opts, Context) ->
case Input of
- <<Name:Index/binary, Char, Post/binary>> when not(Char >= $a andalso Char < $z+1
+ <<Name:Index/binary, Char, Post/binary>> when not(Char >= $a andalso Char =< $z
orelse
- Char >= $A andalso Char < $Z+1
+ Char >= $A andalso Char =< $Z
orelse
- Char >= $0 andalso Char < $9+1
+ Char >= $0 andalso Char =< $9
orelse Char =:= $_ orelse Char =:= $.
) ->
Html = twitter_hash_url(Name),
- process_binary_match(<<>>, [Html, Char], size(Post), twitter1(Post, 0, Context));
+ [[Html, Char], twitter1(Post, 0, Opts, Context)];
+ <<Name:Index/binary>> ->
+ twitter_hash_url(Name);
<<_:Index/binary, _/binary>> ->
- twitter1_hash(Input, Index + 1, Context);
+ twitter1_hash(Input, Index + 1, Opts, Context);
_ ->
Input
end.
twitter_hash_url(Hash) ->
- ["#<a href=\"http://twitter.com/#search?q=%23", Hash, "\">", Hash, "</a>"].
-
+ ["<a href=\"http://twitter.com/#search?q=%23", Hash, "\">#", Hash, "</a>"].
-process_binary_match(Pre, Insertion, SizePost, Post) ->
- case {size(Pre), SizePost} of
- {0, 0} -> Insertion;
- {0, _} -> [Insertion, Post];
- {_, 0} -> [Pre, Insertion];
- _ -> [Pre, Insertion, Post]
- end.
View
171 src/support/z_url.erl
@@ -0,0 +1,171 @@
+%% @author Marc Worrell
+%% @copyright 2012 Marc Worrell
+%% @doc Misc utility URL functions for zotonic
+
+%% Copyright 2012 Marc Worrell
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+-module(z_url).
+-include("zotonic.hrl").
+
+-author("Marc Worrell <marc@worrell.nl>").
+
+-export ([
+ url_encode/1,
+ url_decode/1,
+ url_path_encode/1,
+ url_valid_char/1,
+ percent_encode/1,
+ percent_encode/2,
+
+ remove_protocol/1,
+
+ location/1
+]).
+
+
+-define(is_uppercase_alpha(C), C >= $A, C =< $Z).
+-define(is_lowercase_alpha(C), C >= $a, C =< $z).
+-define(is_alpha(C), ?is_uppercase_alpha(C); ?is_lowercase_alpha(C)).
+-define(is_digit(C), C >= $0, C =< $9).
+-define(is_unreserved(C), ?is_alphanumeric(C); C =:= $-; C =:= $_; C =:= $.; C =:= $~).
+-define(is_alphanumeric(C), ?is_alpha(C); ?is_digit(C)).
+
+%%% URL ENCODE %%%
+
+url_encode(S) ->
+ %% @todo possible speedups for binaries
+ mochiweb_util:quote_plus(S).
+
+url_decode(S) ->
+ lists:reverse(url_decode(S, [])).
+
+ url_decode([], Acc) ->
+ Acc;
+ url_decode([$%, A, B|Rest], Acc) ->
+ Ch = erlang:list_to_integer([A, B], 16),
+ url_decode(Rest, [Ch|Acc]);
+ url_decode([$+|Rest], Acc) ->
+ url_decode(Rest, [32|Acc]);
+ url_decode([Ch|Rest], Acc) ->
+ url_decode(Rest, [Ch|Acc]).
+
+
+%%% URL PATH ENCODE %%%
+
+%% url spec for path part
+url_path_encode(L) when is_list(L) ->
+ url_path_encode(L, []);
+url_path_encode(L) ->
+ url_path_encode(z_convert:to_list(L)).
+
+url_path_encode([], Acc) ->
+ lists:reverse(Acc);
+url_path_encode([$/|R], Acc) ->
+ url_path_encode(R, [$/|Acc]);
+url_path_encode([C|R], Acc) when (C==$: orelse C==$@ orelse C==$& orelse C==$= orelse C==$+ orelse C==$$ orelse C==$ orelse C==$;) ->
+ url_path_encode(R, [C|Acc]);
+url_path_encode([C|R], Acc)->
+ case url_unreserved_char(C) of
+ true ->
+ url_path_encode(R, [C|Acc]);
+ false ->
+ <<Hi:4, Lo:4>> = <<C>>,
+ url_path_encode(R, [hexdigit(Lo), hexdigit(Hi), $% | Acc])
+ end.
+
+% hexdigit is from Mochiweb.
+
+hexdigit(C) when C < 10 -> $0 + C;
+hexdigit(C) when C < 16 -> $A + (C - 10).
+
+
+%%% PERCENT encode ENCODE %%%
+
+%% @doc Percent encoding/decoding as defined by RFC 3986 (http://tools.ietf.org/html/rfc3986).
+percent_encode(Chars) when is_list(Chars) ->
+ percent_encode(Chars, []);
+percent_encode(Chars) ->
+ percent_encode(z_convert:to_list(Chars)).
+
+percent_encode([], Encoded) ->
+ lists:flatten(lists:reverse(Encoded));
+percent_encode([C|Etc], Encoded) when ?is_unreserved(C) ->
+ percent_encode(Etc, [C|Encoded]);
+percent_encode([C|Etc], Encoded) ->
+ Value = [io_lib:format("%~s", [z_utils:encode([Char], 16)])
+ || Char <- binary_to_list(unicode:characters_to_binary([C]))],
+ percent_encode(Etc, [lists:flatten(Value)|Encoded]).
+
+
+%% @doc Naive function to remove the protocol from an Url
+remove_protocol("://" ++ Rest) -> Rest;
+remove_protocol(":" ++ Rest) -> Rest;
+remove_protocol([_|T]) -> remove_protocol(T);
+remove_protocol([]) -> [];
+
+remove_protocol(<<"://", Rest/binary>>) -> Rest;
+remove_protocol(<<":", Rest/binary>>) -> Rest;
+remove_protocol(<<_, Rest/binary>>) -> remove_protocol(Rest);
+remove_protocol(<<>>) -> <<>>.
+
+
+%% VALID URL CHARACTERS
+%% RFC 3986
+url_valid_char(Char) ->
+ url_reserved_char(Char) orelse url_unreserved_char(Char).
+
+url_reserved_char($!) -> true;
+url_reserved_char($*) -> true;
+url_reserved_char($") -> true;
+url_reserved_char($') -> true;
+url_reserved_char($() -> true;
+url_reserved_char($)) -> true;
+url_reserved_char($;) -> true;
+url_reserved_char($:) -> true;
+url_reserved_char($@) -> true;
+url_reserved_char($&) -> true;
+url_reserved_char($=) -> true;
+url_reserved_char($+) -> true;
+url_reserved_char($$) -> true;
+url_reserved_char($,) -> true;
+url_reserved_char($/) -> true;
+url_reserved_char($?) -> true;
+url_reserved_char($%) -> true;
+url_reserved_char($#) -> true;
+url_reserved_char($[) -> true;
+url_reserved_char($]) -> true;
+url_reserved_char(_) -> false.
+
+url_unreserved_char(Ch) when ?is_unreserved(Ch) ->
+ true;
+url_unreserved_char(_) ->
+ false.
+
+
+%% @doc Find the definitive location of an url, removing url shorteners in the process
+location(Url) when is_binary(Url) ->
+ location(z_convert:to_list(Url));
+location(Url) ->
+ case httpc:request(head, {Url, []}, [{autoredirect, false}], []) of
+ {ok, {{_HTTP,301,_Moved}, Hs, _}} ->
+ case proplists:get_value("location", Hs) of
+ undefined -> Url;
+ Url1 -> location(Url1)
+ end;
+ _ ->
+ Url
+ end.
+
+
Please sign in to comment.
Something went wrong with that request. Please try again.