Skip to content

Commit ff10beb

Browse files
committed
INCOMPATIBILITY: changed encoding of bignums
Bignums were encoded in a way that didn't preserve the sorting property. To fix this, I have prepended a length indicator. In order to be able to read legacy-encoded bignums, I made use of the fact that the first byte returned in the return value from encode_big1/1 was never > 254. Thus, 255 is used to indicate the new format. I.e. <<11, 255, ...length indicator..., ...encoded bignum...>> The length indicator gives the number of bytes, and will usually be one byte long. If the number of bytes is > 127, the length indicator will be encoded as a sequence of 7-bit "septets", where each except the last is tagged with 1 in the MSB. The decode function removes the size indicator, and also recognizes that if the first byte is =< 254, there is no size indicator, and decodes a legacy bignum. This means that a data set can be converted by simply decoding and encoding each value. For further backwards compatibility, the function sext:legacy_encode_bignum/1 encodes a bignum in the old format.
1 parent a4162b7 commit ff10beb

File tree

2 files changed

+91
-4
lines changed

2 files changed

+91
-4
lines changed

src/sext.erl

Lines changed: 73 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
-export([to_sb32/1, from_sb32/1]).
2929
-export([to_hex/1, from_hex/1]).
3030

31+
-export([legacy_encode_bignum/1]).
32+
3133
-define(negbig , 8).
3234
-define(neg4 , 9).
3335
-define(pos4 , 10).
@@ -422,15 +424,83 @@ encode_neg_int(I,R) when I < -16#7fFFffFF ->
422424
%% RAdj = MaxR - Ri,
423425
%% ?dbg("RAdj = ~p~n", [<<RAdj:Sz>>]),
424426
%% encode_bits_elems(<<RAdj:Sz>>).
427+
legacy_encode_bignum(I) when is_integer(I), I > 16#7fffffff ->
428+
Bl = encode_big1(I),
429+
?dbg("Bl = ~p~n", [Bl]),
430+
Bb = list_to_binary(Bl),
431+
?dbg("Bb = ~p~n", [Bb]),
432+
<<?posbig, (encode_bin_elems(Bb))/binary, 0:8>>.
425433

426434

427435
encode_big(I) ->
428436
Bl = encode_big1(I),
429437
?dbg("Bl = ~p~n", [Bl]),
430-
Bb = list_to_binary(Bl),
438+
Bb = prepend_size(list_to_binary(Bl)),
431439
?dbg("Bb = ~p~n", [Bb]),
432440
encode_bin_elems(Bb).
433441

442+
prepend_size(B) ->
443+
Sz = byte_size(B),
444+
<<255, (encode_size(Sz))/binary, B/binary>>.
445+
446+
remove_size_bits(<<255, T/binary>>) ->
447+
{_, Rest} = untag_7bits(T, <<>>),
448+
Rest;
449+
remove_size_bits(B) ->
450+
%% legacy bignum
451+
B.
452+
453+
454+
encode_size(I) when I > 127 ->
455+
B = int_to_binary(I),
456+
tag_7bits(B);
457+
%% %% <<1:1, (I band 127):7, (encode_size(I bsr 7))/binary>>;
458+
%% <<1:1, (H - 127):7, (encode_size(I bsr 8))/binary>>;
459+
encode_size(I) ->
460+
<<I>>.
461+
462+
tag_7bits(B) when bit_size(B) > 7 ->
463+
<<H:7, T/bitstring>> = B,
464+
<<1:1, H:7, (tag_7bits(T))/binary>>;
465+
tag_7bits(B) ->
466+
Sz = bit_size(B),
467+
<<I:Sz>> = B,
468+
<<0:1, I:7>>.
469+
470+
untag_7bits(<<1:1, H:7, T/binary>>, Acc) ->
471+
untag_7bits(T, <<Acc/bitstring, H:7>>);
472+
untag_7bits(<<0:1, H:7, T/binary>>, Acc) ->
473+
AccBits = bit_size(Acc),
474+
HBits = 8 - (AccBits rem 8),
475+
{<<Acc/bitstring, H:HBits>>, T}.
476+
477+
478+
int_to_binary(I) when I =< 16#ff -> <<I:8>>;
479+
int_to_binary(I) when I =< 16#ffff -> <<I:16>>;
480+
int_to_binary(I) when I =< 16#ffffff -> <<I:24>>;
481+
int_to_binary(I) when I =< 16#ffffffff -> <<I:32>>;
482+
int_to_binary(I) when I =< 16#ffffffffff -> <<I:40>>;
483+
int_to_binary(I) when I =< 16#ffffffffffff -> <<I:48>>;
484+
int_to_binary(I) when I =< 16#ffffffffffffff -> <<I:56>>;
485+
int_to_binary(I) when I =< 16#ffffffffffffffff -> <<I:64>>;
486+
int_to_binary(I) ->
487+
%% Realm of the ridiculous
488+
list_to_binary(
489+
lists:dropwhile(fun(X) -> X==0 end, binary_to_list(<<I:256>>))).
490+
491+
492+
%% This function exists for documentation, but not used right now.
493+
%% It's the reverse of encode_size/1, used for encoding bignums.
494+
%%
495+
%% decode_size(<<1:1, _/bitstring>> = T) ->
496+
%% {SzBin, Rest} = untag_7bits(T, <<>>),
497+
%% Bits = bit_size(SzBin),
498+
%% <<Sz:Bits>> = SzBin,
499+
%% {Sz, Rest};
500+
%% decode_size(<<0:1, H:7, T/binary>>) ->
501+
%% {H, T}.
502+
503+
434504
encode_big_neg(I) ->
435505
{Words, Max} = get_max(-I),
436506
?dbg("Words = ~p | Max = ~p~n", [Words,Max]),
@@ -735,7 +805,8 @@ decode_pos(I, 1, Bin) -> % float > 1
735805

736806
decode_pos_big(Bin) ->
737807
?dbg("decode_pos_big(~p)~n", [Bin]),
738-
{Ib, Rest} = decode_binary(Bin),
808+
{Ib0, Rest} = decode_binary(Bin),
809+
Ib = remove_size_bits(Ib0),
739810
?dbg("Ib = ~p~n", [Ib]),
740811
ISz = size(Ib) * 8,
741812
?dbg("ISz = ~p~n", [ISz]),

test/sext_eqc.erl

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ sext_test_() ->
2727
{timeout, 60,
2828
[
2929
fun() -> t(run(N, prop_encode, fun prop_encode/0)) end
30+
, fun() -> t(run(N, prop_decode_legacy_big, fun prop_decode_legacy_big/0)) end
3031
, fun() -> t(run(N, prop_prefix_equiv,fun prop_prefix_equiv/0))end
3132
, fun() -> t(run(N, prop_sort, fun prop_sort/0)) end
3233
, fun() -> t(run(N, prop_encode_sb32, fun prop_encode_sb32/0)) end
@@ -51,9 +52,12 @@ good_number_of_tests() ->
5152

5253
run(Num) ->
5354
[
54-
run (Num, prop_encode , fun prop_encode/0)
55+
run (Num, prop_encode , fun prop_encode/0)
56+
, run(Num, prop_decode_legacy_big, fun prop_decode_legacy_big/0)
5557
, run(Num, prop_prefix_equiv,fun prop_prefix_equiv/0)
56-
, run(Num, prop_sort , fun prop_encode/0)
58+
%% , run(Num, prop_prefix_equiv,fun prop_prefix_equiv/0)
59+
, run(Num, prop_sort , fun prop_sort/0)
60+
, run(Num, prop_sort_big, fun prop_sort_big/0)
5761
, run(Num, prop_encode_sb32, fun prop_encode_sb32/0)
5862
, run(Num, prop_sort_sb32 , fun prop_sort_sb32/0)
5963
, run(Num, prop_is_prefix1, fun prop_is_prefix1/0)
@@ -82,6 +86,14 @@ prop_sort() ->
8286
comp(X1,X2) == comp_i(T1,T2))
8387
end).
8488

89+
prop_sort_big() ->
90+
?FORALL({T1,T2}, {big(), big()},
91+
begin
92+
{X1,X2} = {sext:encode(T1), sext:encode(T2)},
93+
collect(size(term_to_binary({T1,T2})),
94+
comp(X1,X2) == comp_i(T1,T2))
95+
end).
96+
8597
prop_sort_sb32() ->
8698
?FORALL({T1,T2}, {term(), term()},
8799
begin
@@ -117,6 +129,10 @@ prop_encode() ->
117129
?FORALL(T, term(),
118130
sext:decode(sext:encode(T)) == T).
119131

132+
prop_decode_legacy_big() ->
133+
?FORALL(T, big(),
134+
sext:decode(sext:legacy_encode_bignum(T)) == T).
135+
120136
prop_encode_sb32() ->
121137
?FORALL(T, term(),
122138
sext:decode_sb32(sext:encode_sb32(T)) == T).

0 commit comments

Comments
 (0)