Skip to content

Commit

Permalink
amf3 read uint 29 and utf8 read
Browse files Browse the repository at this point in the history
  • Loading branch information
trung committed Sep 28, 2009
1 parent 384b16e commit 6882faa
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 0 deletions.
20 changes: 20 additions & 0 deletions temp/sample_parsed_request.txt
@@ -0,0 +1,20 @@
[0,3, // FP version 9 and above -> AMF3 spec
0,0, // 0 header
0,1, // 1 body
0,4,110,117,108,108, // targetUri = "null"
0,2,47,49, // responseUri = "/1"
[0,0,0,224, // message length = 224
10, // marker = object-marker = 0x0A (for AMF3)
0,0,0,1,17,10,129,19,77,102,108,101,120,46,109,101,115,115,97,103,
105,110,103,46,109,101,115,115,97,103,101,115,46,67,111,109,
109,97,110,100,77,101,115,115,97,103,101,19,111,112,101,114,
97,116,105,111,110,27,99,111,114,114,101,108,97,116,105,111,
110,73,100,9,98,111,100,121,19,109,101,115,115,97,103,101,73,
100,17,99,108,105,101,110,116,73,100,19,116,105,109,101,115,
116,97,109,112,21,116,105,109,101,84,111,76,105,118,101,15,
104,101,97,100,101,114,115,23,100,101,115,116,105,110,97,116,
105,111,110,4,5,6,1,10,11,1,1,6,73,57,52,70,49,51,48,67,53,45,
53,70,51,53,45,54,70,48,57,45,53,48,69,70,45,69,70,51,68,53,
54,54,51,70,65,67,52,1,4,0,4,0,10,5,37,68,83,77,101,115,115,
97,103,105,110,103,86,101,114,115,105,111,110,4,1,9,68,83,73,
100,6,7,110,105,108,1,6,1]
37 changes: 37 additions & 0 deletions temp/server/amf3.erl
@@ -0,0 +1,37 @@
-module(amf3).
-export([read_uint_29/1]).

%% The high bit of the first 3 bytes are used as flags to determine
%% whether the next byte is part of the integer.
%% With up to 3 bits of the 32 bits being used as flags,
%% only 29 significant bits remain for encoding an integer.
%% This means the largest unsigned integer value that can be represented is 2^29 - 1.
%% (hex) : (binary)
%% 0x00000000 - 0x0000007F : 0xxxxxxx
%% 0x00000080 - 0x00003FFF : 1xxxxxxx 0xxxxxxx
%% 0x00004000 - 0x001FFFFF : 1xxxxxxx 1xxxxxxx 0xxxxxxx
%% 0x00200000 - 0x3FFFFFFF : 1xxxxxxx 1xxxxxxx 1xxxxxxx xxxxxxxx
%% 0x40000000 - 0xFFFFFFFF : throw range exception
%% return {ok, Value, RemainBin} or {bad, Reason}
read_uint_29(Bin, _LastByte, Acc, Count) when Count =:= 4 ->
{ok, Acc, Bin};

read_uint_29(<<>>, LastByte, Acc, _Count) ->
{ok, (Acc bsl 8) bor LastByte, <<>>};

read_uint_29(Bin, LastByte, Acc, _Count) when LastByte < 128 ->
read_uint_29(Bin, 0, (Acc bsl 8) bor LastByte, 4);

read_uint_29(Bin, LastByte, Acc, Count) when LastByte >= 128 ->
<<First:8, Rest/binary>> = Bin,
io:fwrite("~p - ~p - ~p - ~p - ~p~n", [Count, First, Rest, LastByte, Acc]),
read_uint_29(Rest, First, (Acc bsl 8) bor LastByte, Count + 1).

read_uint_29(Bin) ->
case is_binary(Bin) of
true ->
<<First:8, Rest/binary>> = Bin,
read_uint_29(Rest, First, 0, 0);
false ->
{bad, "Input is not a binary"}
end.
80 changes: 80 additions & 0 deletions temp/server/utf8.erl
@@ -0,0 +1,80 @@
-module(utf8).
-export([from_binary/1, to_binary/1]).

%% Given a binary of UTF-8 encoded text, return a UTF-32 String
%% (i.e. each element is a unicode code point).
from_binary(Bin) ->
decode_binary(Bin, []).

decode_binary(<<>>, Str) ->
{ok, lists:reverse(Str)};

decode_binary(Bin, Str) ->
{B1,B2} = split_binary(Bin, 1),
case B1 of
%% 0-7F 0zzzzzzz
<<0:1,Z:7>> ->
decode_binary(B2, [Z|Str]);

%% 110yyyyy 10zzzzzz
<<2#110:3,Y:5>> ->
{<<2#10:2,Z:6>>,B3} = split_binary(B2, 1),
U32 = (Y bsl 6) bor Z,
decode_binary(B3, [U32|Str]);

%% 1110xxxx 10yyyyyy 10zzzzzz
<<2#1110:4,X:4>> ->
{<<2#10:2,Y:6,2#10:2,Z:6>>,B3} = split_binary(B2, 2),
U32 = (X bsl 12) bor (Y bsl 6) bor Z,
decode_binary(B3, [U32|Str]);

%% 11110www 10xxxxxx 10yyyyyy 10zzzzzz
<<2#11110:5,W:3>> ->
{<<2#10:2,X:6,2#10:2,Y:6,2#10:2,Z:6>>,B3} = split_binary(B2, 3),
U32 = (W bsl 18) bor (X bsl 12) bor (Y bsl 6) bor Z,
decode_binary(B3, [U32|Str]);

%% an exception will be raised if the utf8 encoding is off
%% and causes a match error
true ->
{bad_octet, B1}
end.

%% Given a list of unicode code points, return a binary of UTF-8
%% encoded text.
to_binary(Str) ->
encode_utf32(Str, []).

encode_utf32([], Utf8) ->
{ok, list_to_binary(lists:reverse(Utf8))};
encode_utf32([U32|Str], Utf8) ->
if
%% 0-7F 0zzzzzzz
U32 < 16#80 ->
encode_utf32(Str, [U32|Utf8]);

%% 110yyyyy 10zzzzzz
U32 < 16#800 ->
Y = 2#11000000 bor ((U32 band 16#7c0) bsr 6),
Z = 2#10000000 bor (U32 band 16#3f),
encode_utf32(Str, [Z|[Y|Utf8]]);

%% 1110xxxx 10yyyyyy 10zzzzzz
U32 < 16#10000 ->
X = 2#11100000 bor ((U32 band 16#f000) bsr 12),
Y = 2#10000000 bor ((U32 band 16#fc0) bsr 6),
Z = 2#10000000 bor (U32 band 16#3f),
encode_utf32(Str, [Z|[Y|[X|Utf8]]]);

%% 11110www 10xxxxxx 10yyyyyy 10zzzzzz
U32 < 16#110000 ->
W = 2#11110000 bor ((U32 band 16#1c0000) bsr 18),
X = 2#10000000 bor ((U32 band 16#3f000) bsr 12),
Y = 2#10000000 bor ((U32 band 16#fc0) bsr 6),
Z = 2#10000000 bor (U32 band 16#3f),
encode_utf32(Str, [Z|[Y|[X|[W|Utf8]]]]);

%% past allocated code points
true ->
{bad_code_point, U32}
end.

0 comments on commit 6882faa

Please sign in to comment.