Browse files

ewgi_api:

  - rolled back the changes regarding charset conversion to unicode;
ewgi_post:
  - the input charset conversion to unicode landed on this module;
  - all content-type key/value pairs are now passed along for processing within the content-type handler;
  - changed the response content-type to "text/html; charset=utf8".
  • Loading branch information...
1 parent d107fe0 commit 73bef492c6e52ed52c0b8f4f91e6ba94f27912dc @davide davide committed Nov 4, 2009
Showing with 42 additions and 41 deletions.
  1. +12 −29 src/ewgi_api.erl
  2. +30 −12 src/middleware/ewgi_post/ewgi_post.erl
View
41 src/ewgi_api.erl
@@ -61,8 +61,8 @@
-export([server_request_foldl/4]).
%% Utility methods
--export([parse_qs/1, parse_post/1, parse_post/2, urlencode/1, quote/1,
- normalize_header/1, unquote_path/1, path_components/3, urlsplit/1]).
+-export([parse_qs/1, parse_post/1, urlencode/1, quote/1, normalize_header/1,
+ unquote_path/1, path_components/3, urlsplit/1]).
%% Stream methods
-export([
@@ -463,43 +463,26 @@ parse_qs(ToParse) ->
%% @end
%%--------------------------------------------------------------------
parse_post(ToParse) ->
- parse_data(ToParse, "ISO-8859-1").
-
-%%--------------------------------------------------------------------
-%% @spec parse_post(string()|binary(), InEncoding) -> [proplist()]
-%%
-%% @doc Parse application/x-www-form-urlencoded data.
-%% Calls parse_data to do the job.
-%% @end
-%%--------------------------------------------------------------------
-parse_post(ToParse, InEncoding) ->
- parse_data(ToParse, InEncoding).
+ parse_data(ToParse).
%%--------------------------------------------------------------------
%% @spec parse_data(string()|binary()) -> [proplist()]
%%
%% @doc Parse a query string or application/x-www-form-urlencoded data.
%% @end
%%--------------------------------------------------------------------
-parse_data(undefined, _InEncoding) ->
+parse_data(undefined) ->
[];
-parse_data(Data, InEncoding) ->
- UTFData = unicode_data(Data, string:to_lower(InEncoding)),
- kv_data(UTFData, []).
-
-unicode_data(Data, "iso-8859-1") ->
- unicode:characters_to_list(Data, latin1);
-unicode_data(Data, "utf8") ->
- unicode:characters_to_list(Data, utf8);
-unicode_data(Data, "utf-8") ->
- unicode:characters_to_list(Data, utf8).
-%% TODO: add support for more charsets
-
-kv_data([], Acc) ->
+parse_data(Binary) when is_binary(Binary) ->
+ parse_data(binary_to_list(Binary), []);
+parse_data(String) ->
+ parse_data(String, []).
+
+parse_data([], Acc) ->
lists:reverse(Acc);
-kv_data(String, Acc) ->
+parse_data(String, Acc) ->
{{Key, Val}, Rest} = parse_kv(String),
- kv_data(Rest, [{Key, Val} | Acc]).
+ parse_data(Rest, [{Key, Val} | Acc]).
%%--------------------------------------------------------------------
View
42 src/middleware/ewgi_post/ewgi_post.erl
@@ -51,26 +51,35 @@ post_parse_middleware(MaxLength, App, ErrApp)
parse_ct(L) when is_list(L) ->
case string:tokens(L, ";") of
[CT|Vars] ->
- Vars1 = [string:tokens(VarStr, "=") || VarStr <- Vars],
- Vars2 = [{string:strip(Name), Value} || [Name, Value] <- Vars1],
- %% http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html ->
- %% When no explicit charset parameter is provided by the sender,
- %% media subtypes of the "text" type are defined to have a default
- %% charset value of "ISO-8859-1" when received via HTTP.
- Charset = proplists:get_value("charset", Vars2, "ISO-8859-1"),
- {CT, Charset};
+ Vars1 = [string:tokens(VarStr, "=") || VarStr <- Vars],
+ Vars2 = [{string:strip(Name), Value} || [Name, Value] <- Vars1],
+ {CT, Vars2};
_ ->
undefined
end.
-parse_post(Ctx, App, ErrApp, {"application/x-www-form-urlencoded", Charset}, Max) ->
+parse_post(Ctx, App, ErrApp, {"application/x-www-form-urlencoded", Vars}, Max) ->
case ewgi_api:content_length(Ctx) of
L when is_integer(L), L > Max ->
- %% shouldn't we set an error message here?
+ %% shouldn't we set an error message here?
ErrApp(Ctx);
L when is_integer(L), L > 0 ->
Input = read_input_string(Ctx, L),
- Vals = ewgi_api:parse_post(Input, Charset),
+ %% http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html
+ %% When no explicit charset parameter is provided by the sender,
+ %% media subtypes of the "text" type are defined to have a default
+ %% charset value of "ISO-8859-1" when received via HTTP.
+ case proplists:get_value("charset", Vars) of
+ undefined -> InCharset = "iso-8859-1"
+ ;Charset -> InCharset = string:to_lower(Charset)
+ end,
+ UnicodeInput = to_unicode(Input, InCharset),
+ Vals = ewgi_api:parse_post(UnicodeInput),
+ Ctx1 = ewgi_api:remote_user_data(Vals, Ctx),
+ App(Ctx1);
+ _ ->
+ ErrApp(Ctx)
+ end;
Ctx1 = ewgi_api:remote_user_data(Vals, Ctx),
App(Ctx1);
_ ->
@@ -91,6 +100,15 @@ read_input_string_cb(Acc) ->
read_input_string_cb([B|Acc])
end.
+%% Transforms the data from the given charset to unicode
+%% Todo: add support for other charset as needed.
+to_unicode(Data, "iso-8859-1") ->
+ unicode:characters_to_list(Data, latin1);
+to_unicode(Data, "utf8") ->
+ unicode:characters_to_list(Data, utf8);
+to_unicode(Data, "utf-8") ->
+ unicode:characters_to_list(Data, utf8).
+
%%
%% example functions on how to use the post handling middleware
%%
@@ -113,7 +131,7 @@ display_form_data({ewgi_context, Request, _Response}=Ctx) ->
Body1 ->
io_lib:format("~p", [Body1])
end,
- ResponseHeaders = [{"Content-type", "text/plain"}],
+ ResponseHeaders = [{"Content-type", "text/html; charset=utf8"}],
Response = {ewgi_response,
{200, "OK"},
ResponseHeaders,

0 comments on commit 73bef49

Please sign in to comment.