Compute sizes of bit expressions and patterns
This allows us to do more exact checking of bit expressions and patterns.

Fixes #75.
Fixes #89.
zuiderkwast committed Nov 19, 2018
1 parent d74675a commit 5e1c425
Showing 8 changed files with 234 additions and 19 deletions.
120 changes: 120 additions & 0 deletions src/gradualizer_bin.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
%% Helper module for binaries/bitstrings


%% Computes the type of a bitstring expression or pattern based on the sizes
%% of the elements. The returned type is a normalized bitstring type.
-spec compute_type(ExprOrPat) -> erl_parse:abstract_type()
when ExprOrPat :: {bin, _, _},
ExprOrPat :: erl_parse:abstract_expr().
compute_type(Bin) ->
View = bin_view(Bin),

%% <<_:B, _:_*U>> is represented as {B, U} (fixed base + multiple of unit)
-type bitstr_view() :: {non_neg_integer(), non_neg_integer()} | none.

bitstr_concat({B1, U1}, {B2, U2}) ->
{B1 + B2, gcd(U1, U2)};
bitstr_concat(none, _) -> none;
bitstr_concat(_, none) -> none.

-spec bitstr_view_to_type(bitstr_view()) -> erl_parse:abstract_type().
bitstr_view_to_type({B, U}) ->
Anno = erl_anno:new(0),
{type, Anno, binary, [{integer, Anno, B}, {integer, Anno, U}]};
bitstr_view_to_type(none) ->
{type, erl_anno:new(0), none, []}.

%% Returns the view of a bit expression or pattern, i.e. computes its size
-spec bin_view({bin, _, _}) -> bitstr_view().
bin_view({bin, _, BinElements}) ->
ElementViews = [bin_element_view(E) || E <- BinElements],
lists:foldl(fun bitstr_concat/2, {0, 0}, ElementViews).

bin_element_view({bin_element, Anno, {Lit, _, _}, default, _Spec} = BinElem)
when Lit == integer; Lit == char; Lit == string ->
%% Literal with default size, i.e. no variables to consider.
%% Size it not allowed for utf8/utf16/utf32.
Bin = {bin, Anno, [BinElem]},
{value, Value, []} = erl_eval:expr(Bin, []),
{bit_size(Value), 0};
bin_element_view({bin_element, Anno, {string, _, Chars}, Size, Spec}) ->
%% Expand <<"ab":32/float>> to <<$a:32/float, $b:32/float>>
%% FIXME: Not true for float, integer
Views = [bin_element_view({bin_element, Anno, {char, Anno, Char}, Size, Spec})
|| Char <- Chars],
lists:foldl(fun bitstr_concat/2, {0, 0}, Views);
bin_element_view({bin_element, _Anno, _Expr, default, Specifiers}) ->
%% Default size
%% <<1/integer-unit:2>> gives the following error:
%% * 1: a bit unit size must not be specified unless a size is specified too
%% However <<(<<9:9>>)/binary-unit:3>> gives no error.
%% The type specifier 'binary' seems to be the only exception though.
case get_type_specifier(Specifiers) of
integer -> {8, 0};
float -> {64, 0};
binary -> {0, get_unit(Specifiers)};
bytes -> {0, 8};
bitstring -> {0, 1};
bits -> {0, 1};
utf8 -> {0, 8}; %% 1-4 bytes
utf16 -> {0, 16}; %% 2-4 bytes
utf32 -> {32, 0} %% 4 bytes, fixed
bin_element_view({bin_element, _Anno, _Expr, SizeSpec, Specifiers}) ->
%% Non-default size, possibly a constant expression
try erl_eval:expr(SizeSpec, []) of
{value, Sz, _VarBinds} ->
{Sz * get_unit(Specifiers), 0}
error:{unbound_var, _} ->
%% Variable size
U = get_unit(Specifiers),
case get_type_specifier(Specifiers) of
float when U == 64 -> {64, 0}; %% size must be 1 in this case
float -> {32, 32}; %% a float must be 32 or 64 bits
_OtherType -> {0, U} %% any multiple of the unit

-spec get_type_specifier(Specifiers :: [atom() | {unit, non_neg_integer()}] |
default) -> atom().
get_type_specifier(Specifiers) when is_list(Specifiers) ->
case [S || S <- Specifiers,
S == integer orelse S == float orelse
S == binary orelse S == bytes orelse
S == bitstring orelse S == bits orelse
S == utf8 orelse S == utf16 orelse
S == utf32] of
[S|_] -> S;
[] -> integer %% default
get_type_specifier(default) -> integer.

get_unit(Specifiers) when is_list(Specifiers) ->
case [U || {unit, U} <- Specifiers] of
[U|_] -> U;
[] -> get_default_unit(Specifiers)
get_unit(default) -> 1.

get_default_unit(Specifiers) when is_list(Specifiers) ->
case get_type_specifier(Specifiers) of
binary -> 8;
bytes -> 8;
_Other -> 1

-spec gcd(non_neg_integer(), non_neg_integer()) -> non_neg_integer().
gcd(A, B) when B > A -> gcd1(B, A);
gcd(A, B) -> gcd1(A, B).

-spec gcd1(non_neg_integer(), non_neg_integer()) -> non_neg_integer().
gcd1(A, 0) -> A;
gcd1(A, B) ->
case A rem B of
0 -> B;
X -> gcd1(B, X)
43 changes: 25 additions & 18 deletions src/typechecker.erl
Original file line number Diff line number Diff line change
Expand Up @@ -1220,7 +1220,7 @@ type_check_expr(Env, {cons, _, Head, Tail}) ->
{{type, erl_anno:new(0), nonempty_list, [ElemTy]}, VB, Cs}
type_check_expr(Env, {bin, _, BinElements}) ->
type_check_expr(Env, {bin, _, BinElements} = BinExpr) ->
%% <<Expr:Size/TypeSpecifierList, ...>>
VarBindAndCsList =
lists:map(fun ({bin_element, _P, Expr, _Size, _Specif} = BinElem) ->
Expand All @@ -1232,10 +1232,8 @@ type_check_expr(Env, {bin, _, BinElements}) ->
{VarBinds, Css} = lists:unzip(VarBindAndCsList),
RetTy = if
Env#env.infer ->
%% TODO: Infer the size parameters of the bitstring
{type, erl_anno:new(0), binary,
[{integer, erl_anno:new(0), 0},
{integer, erl_anno:new(0), 1}]};
%% Infer the size parameters of the bitstring
not Env#env.infer ->
Expand Down Expand Up @@ -1874,16 +1872,12 @@ do_type_check_expr_in(Env, Ty, {string, LINE, String}) ->
throw({type_error, string, LINE, String, Ty})
do_type_check_expr_in(Env, Ty, {bin, LINE, _BinElements} = Bin) ->
%% Accept any binary type regardless of bit size parameters.
%% TODO: If we can compute the length of the bit expression, we get the
%% exact type and can require that it's a subtype of Ty.
Cs1 = case subtype(Ty, {type, LINE, binary, [{integer, LINE, 0},
{integer, LINE, 1}]},
Env#env.tenv) of
BinTy = gradualizer_bin:compute_type(Bin),
Cs1 = case subtype(BinTy, Ty, Env#env.tenv) of
{true, Cs0} ->
false ->
throw({type_error, bin, LINE, Ty})
throw({type_error, bin, LINE, BinTy, Ty})
{_Ty, VarBinds, Cs2} = type_check_expr(Env, Bin),
{VarBinds, constraints:combine(Cs1, Cs2)};
Expand Down Expand Up @@ -2925,16 +2919,24 @@ add_type_pat(String = {string, P, _}, Ty, _TEnv, VEnv) ->
false ->
throw({type_error, pattern, P, String, Ty})
add_type_pat({bin, _, BinElements}, {type, _, binary, [_,_]}, TEnv, VEnv) ->
%% TODO: Consider the bit size parameters
add_type_pat({bin, P, BinElements} = Bin, Ty, TEnv, VEnv) ->
%% Check the size parameters of the bit pattern
BinTy = gradualizer_bin:compute_type(Bin),
Cs1 = case subtype(BinTy, Ty, TEnv) of
{true, Cs0} ->
false ->
throw({type_error, bin, P, BinTy, Ty})
%% Check the elements
lists:foldl(fun ({bin_element, _, Pat, _Size, _Specifiers} = BinElem,
{VEnv1, Cs1}) ->
{VEnvAcc, CsAcc}) ->
%% Check Pat against the bit syntax type specifiers
ElemTy = type_of_bin_element(BinElem),
{VEnv2, Cs2} = add_type_pat(Pat, ElemTy, TEnv, VEnv1),
{VEnv2, constraints:combine(Cs1, Cs2)}
{VEnv2, Cs2} = add_type_pat(Pat, ElemTy, TEnv, VEnvAcc),
{VEnv2, constraints:combine(CsAcc, Cs2)}
{VEnv, constraints:empty()},
{VEnv, Cs1},
add_type_pat({record, P, Record, Fields}, Ty, TEnv, VEnv) ->
case expect_record_type(Record, Ty, TEnv) of
Expand Down Expand Up @@ -3494,6 +3496,11 @@ handle_type_error({type_error, tuple, LINE, Ty}) ->
[LINE, typelib:pp_type(Ty)]);
handle_type_error({unknown_variable, P, Var}) ->
io:format("Unknown variable ~p on line ~p.~n", [Var, P]);
handle_type_error({type_error, bin, P, ActualTy, ExpectTy}) ->
io:format("The bit expression on line ~p is expected "
"to have type ~s but it has type ~s~n",
typelib:pp_type(ExpectTy), typelib:pp_type(ActualTy)]);
handle_type_error({type_error, bit_type, Expr, P, Ty1, Ty2}) ->
io:format("The expression ~s inside the bit expression on line ~p has type ~s "
"but the type specifier indicates ~s~n",
Expand Down
69 changes: 69 additions & 0 deletions test/gradualizer_bin_tests.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@


%% Parse type and expression
t(Str) -> typelib:remove_pos(typelib:parse_type(Str)).
e(Str) -> merl:quote(Str).

-define(_assert_bin_type(T, E),
{??E, ?_assertEqual(t(??T), gradualizer_bin:compute_type(e(??E)))}).

compute_type_combined_test_() ->
?_assert_bin_type(<<_:14, _:_*3>>, <<0:14, N/binary-unit:3>>),
?_assert_bin_type(<<_:14, _:_*3>>, <<0:N/integer-unit:3, "ab":7/integer>>)

compute_type_utf_test_() ->
?_assert_bin_type(<<_:24>>, <<"abc"/utf8>>),
?_assert_bin_type(<<_:48>>, <<"abc"/utf16>>),
?_assert_bin_type(<<_:96>>, <<"abc"/utf32>>),
?_assert_bin_type(<<_:_*8>>, <<X/utf8>>),
?_assert_bin_type(<<_:_*16>>, <<X/utf16>>),
?_assert_bin_type(<<_:32>>, <<X/utf32>>)

compute_type_test_() ->
?_assert_bin_type(<<_:8>>, <<X>>),
?_assert_bin_type(<<_:3>>, <<X:3>>),
?_assert_bin_type(<<_:_*1>>, <<X:N>>),
?_assert_bin_type(<<_:12>>, <<X:3/unit:4>>),
?_assert_bin_type(<<_:_*4>>, <<X:N/unit:4>>)

compute_type_bitstring_test_() ->
?_assert_bin_type(<<_:8>>, <<X:1/binary>>),
?_assert_bin_type(<<_:42>>, <<X:7/binary-unit:6>>),
?_assert_bin_type(<<_:_*8>>, <<X/binary>>),
?_assert_bin_type(<<_:_*2>>, <<X/binary-unit:2>>),
?_assert_bin_type(<<_:_*8>>, <<X/bytes>>),
?_assert_bin_type(<<_:16>>, <<X:2/bytes>>),
?_assert_bin_type(<<_:_*1>>, <<X/bitstring>>),
?_assert_bin_type(<<_:_*1>>, <<X/bits>>)

compute_type_float_test_() ->
?_assert_bin_type(<<_:64>>, <<X/float>>),
?_assert_bin_type(<<_:32>>, <<X:32/float>>),
?_assert_bin_type(<<_:32, _:_*32>>, <<X:S/float>>),
?_assert_bin_type(<<_:32, _:_*32>>, <<X:S/float-unit:16>>),
?_assert_bin_type(<<_:64>>, <<X:S/float-unit:64>>)

%% Run only in OTP 21
compute_type_float_string_test_() ->
?_assert_bin_type(<<_:192>>, <<"abc"/float>>),
?_assert_bin_type(<<_:96>>, <<"abc":32/float>>),
?_assert_bin_type(<<_:96, _:_*32>>, <<"abc":S/float>>)
compute_type_float_string_test_() ->
{"Skipping <<\"str\"/float>> tests in this OTP release", []}.
5 changes: 5 additions & 0 deletions test/should_fail/bin_expression_1.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@

-spec bin_1() -> binary().
bin_1() ->
5 changes: 5 additions & 0 deletions test/should_fail/bin_expression_2.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@

-spec bin_2(any(), any()) -> <<_:_*6>>.
bin_2(A, B) ->
<<0:A/integer-unit:27, 1:B/integer-unit:30>>.
File renamed without changes.
4 changes: 4 additions & 0 deletions test/should_pass/bitstring.erl
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,7 @@ bin4(A,B) ->
bin5() ->
<<"abc", 42, "abc"/utf32, "abc"/float, 42/float-little,
(<<"abc">>):8/bits, (<<"abc">>)/bytes>>.

-spec bin6(any(), any()) -> <<_:_*6>>.
bin6(A, B) ->
<<0:A/integer-unit:36, 1:B/integer-unit:30>>.
7 changes: 6 additions & 1 deletion test/typechecker_tests.erl
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,12 @@ infer_types_test_() ->
?_assertNot(type_check_forms(["f() -> V = [1, 2], g(V).",
"-spec g(integer()) -> any().",
"g(Int) -> Int + 1."],
%% infer exact type of bitstrings
?_assertMatch("<<_:7, _:_*16>>",
type_check_expr(_Env = "f() -> receive X -> X end.",
_Expr = "<<(f())/utf16, 7:7>>",

type_check_call_test_() ->
Expand Down

