Permalink
Browse files

amf3 read uint 29 and utf8 read

  • Loading branch information...
1 parent 384b16e commit 6882faae1b3e9b7e36ec22a36919f4c92194b317 @trung committed Sep 28, 2009
Showing with 137 additions and 0 deletions.
  1. +20 −0 temp/sample_parsed_request.txt
  2. +37 −0 temp/server/amf3.erl
  3. +80 −0 temp/server/utf8.erl
@@ -0,0 +1,20 @@
+[0,3, // FP version 9 and above -> AMF3 spec
+0,0, // 0 header
+0,1, // 1 body
+0,4,110,117,108,108, // targetUri = "null"
+0,2,47,49, // responseUri = "/1"
+[0,0,0,224, // message length = 224
+10, // marker = object-marker = 0x0A (for AMF3)
+0,0,0,1,17,10,129,19,77,102,108,101,120,46,109,101,115,115,97,103,
+105,110,103,46,109,101,115,115,97,103,101,115,46,67,111,109,
+109,97,110,100,77,101,115,115,97,103,101,19,111,112,101,114,
+97,116,105,111,110,27,99,111,114,114,101,108,97,116,105,111,
+110,73,100,9,98,111,100,121,19,109,101,115,115,97,103,101,73,
+100,17,99,108,105,101,110,116,73,100,19,116,105,109,101,115,
+116,97,109,112,21,116,105,109,101,84,111,76,105,118,101,15,
+104,101,97,100,101,114,115,23,100,101,115,116,105,110,97,116,
+105,111,110,4,5,6,1,10,11,1,1,6,73,57,52,70,49,51,48,67,53,45,
+53,70,51,53,45,54,70,48,57,45,53,48,69,70,45,69,70,51,68,53,
+54,54,51,70,65,67,52,1,4,0,4,0,10,5,37,68,83,77,101,115,115,
+97,103,105,110,103,86,101,114,115,105,111,110,4,1,9,68,83,73,
+100,6,7,110,105,108,1,6,1]
View
@@ -0,0 +1,37 @@
+-module(amf3).
+-export([read_uint_29/1]).
+
+%% The high bit of the first 3 bytes are used as flags to determine
+%% whether the next byte is part of the integer.
+%% With up to 3 bits of the 32 bits being used as flags,
+%% only 29 significant bits remain for encoding an integer.
+%% This means the largest unsigned integer value that can be represented is 2^29 - 1.
+%% (hex) : (binary)
+%% 0x00000000 - 0x0000007F : 0xxxxxxx
+%% 0x00000080 - 0x00003FFF : 1xxxxxxx 0xxxxxxx
+%% 0x00004000 - 0x001FFFFF : 1xxxxxxx 1xxxxxxx 0xxxxxxx
+%% 0x00200000 - 0x3FFFFFFF : 1xxxxxxx 1xxxxxxx 1xxxxxxx xxxxxxxx
+%% 0x40000000 - 0xFFFFFFFF : throw range exception
+%% return {ok, Value, RemainBin} or {bad, Reason}
+read_uint_29(Bin, _LastByte, Acc, Count) when Count =:= 4 ->
+ {ok, Acc, Bin};
+
+read_uint_29(<<>>, LastByte, Acc, _Count) ->
+ {ok, (Acc bsl 8) bor LastByte, <<>>};
+
+read_uint_29(Bin, LastByte, Acc, _Count) when LastByte < 128 ->
+ read_uint_29(Bin, 0, (Acc bsl 8) bor LastByte, 4);
+
+read_uint_29(Bin, LastByte, Acc, Count) when LastByte >= 128 ->
+ <<First:8, Rest/binary>> = Bin,
+ io:fwrite("~p - ~p - ~p - ~p - ~p~n", [Count, First, Rest, LastByte, Acc]),
+ read_uint_29(Rest, First, (Acc bsl 8) bor LastByte, Count + 1).
+
+read_uint_29(Bin) ->
+ case is_binary(Bin) of
+ true ->
+ <<First:8, Rest/binary>> = Bin,
+ read_uint_29(Rest, First, 0, 0);
+ false ->
+ {bad, "Input is not a binary"}
+ end.
View
@@ -0,0 +1,80 @@
+-module(utf8).
+-export([from_binary/1, to_binary/1]).
+
+%% Given a binary of UTF-8 encoded text, return a UTF-32 String
+%% (i.e. each element is a unicode code point).
+from_binary(Bin) ->
+ decode_binary(Bin, []).
+
+decode_binary(<<>>, Str) ->
+ {ok, lists:reverse(Str)};
+
+decode_binary(Bin, Str) ->
+ {B1,B2} = split_binary(Bin, 1),
+ case B1 of
+ %% 0-7F 0zzzzzzz
+ <<0:1,Z:7>> ->
+ decode_binary(B2, [Z|Str]);
+
+ %% 110yyyyy 10zzzzzz
+ <<2#110:3,Y:5>> ->
+ {<<2#10:2,Z:6>>,B3} = split_binary(B2, 1),
+ U32 = (Y bsl 6) bor Z,
+ decode_binary(B3, [U32|Str]);
+
+ %% 1110xxxx 10yyyyyy 10zzzzzz
+ <<2#1110:4,X:4>> ->
+ {<<2#10:2,Y:6,2#10:2,Z:6>>,B3} = split_binary(B2, 2),
+ U32 = (X bsl 12) bor (Y bsl 6) bor Z,
+ decode_binary(B3, [U32|Str]);
+
+ %% 11110www 10xxxxxx 10yyyyyy 10zzzzzz
+ <<2#11110:5,W:3>> ->
+ {<<2#10:2,X:6,2#10:2,Y:6,2#10:2,Z:6>>,B3} = split_binary(B2, 3),
+ U32 = (W bsl 18) bor (X bsl 12) bor (Y bsl 6) bor Z,
+ decode_binary(B3, [U32|Str]);
+
+ %% an exception will be raised if the utf8 encoding is off
+ %% and causes a match error
+ true ->
+ {bad_octet, B1}
+ end.
+
+%% Given a list of unicode code points, return a binary of UTF-8
+%% encoded text.
+to_binary(Str) ->
+ encode_utf32(Str, []).
+
+encode_utf32([], Utf8) ->
+ {ok, list_to_binary(lists:reverse(Utf8))};
+encode_utf32([U32|Str], Utf8) ->
+ if
+ %% 0-7F 0zzzzzzz
+ U32 < 16#80 ->
+ encode_utf32(Str, [U32|Utf8]);
+
+ %% 110yyyyy 10zzzzzz
+ U32 < 16#800 ->
+ Y = 2#11000000 bor ((U32 band 16#7c0) bsr 6),
+ Z = 2#10000000 bor (U32 band 16#3f),
+ encode_utf32(Str, [Z|[Y|Utf8]]);
+
+ %% 1110xxxx 10yyyyyy 10zzzzzz
+ U32 < 16#10000 ->
+ X = 2#11100000 bor ((U32 band 16#f000) bsr 12),
+ Y = 2#10000000 bor ((U32 band 16#fc0) bsr 6),
+ Z = 2#10000000 bor (U32 band 16#3f),
+ encode_utf32(Str, [Z|[Y|[X|Utf8]]]);
+
+ %% 11110www 10xxxxxx 10yyyyyy 10zzzzzz
+ U32 < 16#110000 ->
+ W = 2#11110000 bor ((U32 band 16#1c0000) bsr 18),
+ X = 2#10000000 bor ((U32 band 16#3f000) bsr 12),
+ Y = 2#10000000 bor ((U32 band 16#fc0) bsr 6),
+ Z = 2#10000000 bor (U32 band 16#3f),
+ encode_utf32(Str, [Z|[Y|[X|[W|Utf8]]]]);
+
+ %% past allocated code points
+ true ->
+ {bad_code_point, U32}
+ end.

0 comments on commit 6882faa

Please sign in to comment.