Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for encoding and decoding Decimals #93

Merged
merged 4 commits into from
Mar 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions lib/avro_ex.ex
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,14 @@ defmodule AvroEx do
values as a tagged tuple of `{name, value}` instead of just the plain `value`.
This allows to retain the information about which union schema was used for
encoding when this cannot be infered from the `value` alone.

## Decimals

Specify the option `decimals: :exact` to use `Decimal.new/3` to parse decimals
into a Decimal struct with arbitrary precision.

Otherwise, an approximate number is calculated.

"""
@spec decode(Schema.t(), encoded_avro, keyword()) ::
{:ok, term}
Expand Down
25 changes: 25 additions & 0 deletions lib/avro_ex/decode.ex
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,31 @@ defmodule AvroEx.Decode do
{date_time, rest}
end

defp do_decode(
%Primitive{type: :bytes, metadata: %{"logicalType" => "decimal"} = metadata},
%Context{} = context,
data,
opts
)
when is_binary(data) do
scale = Map.get(metadata, "scale", 0)
{bytes, rest} = do_decode(%Primitive{type: :bytes}, context, data, opts)

size = bit_size(bytes)
<<unscaled::big-signed-integer-size(size)>> = bytes

number =
if :exact == Keyword.get(opts, :decimals) do
# avoid undefined cross reference for optional dependency
decimal = Decimal
decimal.new(if(unscaled >= 0, do: 1, else: -1), abs(unscaled), -scale)
else
unscaled * :math.pow(10, -scale)
end

{number, rest}
end

defp do_decode(%Primitive{type: :long}, %Context{}, data, _) when is_binary(data) do
{val, rest} = variable_integer_decode(data, 0, 0, 64)
{zigzag_decode(val), rest}
Expand Down
35 changes: 35 additions & 0 deletions lib/avro_ex/encode.ex
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,33 @@ defmodule AvroEx.Encode do
|> encode_integer(schema)
end

defp do_encode(
%Primitive{type: :bytes, metadata: %{"logicalType" => "decimal"} = metadata},
%Context{} = context,
value,
opts
) do
scale = Map.get(metadata, "scale", 0)

unscaled =
case value do
value when is_number(value) ->
trunc(value / :math.pow(10, -scale))

%struct{} when struct == Decimal ->
if value.exp != -scale do
error("Incompatible decimal: expected scale #{-scale}, got #{value.exp}")
end

value.coef * value.sign
end

number_of_bits = value_size(unscaled)

bin = <<unscaled::big-signed-integer-size(number_of_bits)>>
do_encode(%Primitive{type: :bytes}, context, bin, opts)
end

defp do_encode(%Primitive{type: :long} = schema, %Context{}, long, _) when is_integer(long) do
encode_integer(long, schema)
end
Expand Down Expand Up @@ -291,6 +318,14 @@ defmodule AvroEx.Encode do
|> variable_integer_encode
end

defp value_size(value, bits \\ 8) when is_number(value) do
if :math.pow(2, bits) > abs(value) do
bits
else
value_size(value, bits + 8)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are you sure that this will terminate for all input values?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hum, I am not. Should we stop when we reach 64 bytes ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On second thought: I don't actually see a situation where this won't terminate. The value of bits can only by positive multiples of 8, so we're always bound to eventually get a 2^bits that's bigger than any arbitrary value.

WDYT ?

end
end

@compile {:inline, error: 1}
defp error(error) do
error |> AvroEx.EncodeError.new() |> throw()
Expand Down
3 changes: 2 additions & 1 deletion mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ defmodule AvroEx.Mixfile do
{:credo, "~> 1.0", only: :dev, runtime: false},
{:dialyxir, "~> 1.1", only: :dev, runtime: false},
{:ex_doc, "~> 0.20", only: :dev, runtime: false},
{:stream_data, "~> 0.5", only: [:dev, :test]}
{:stream_data, "~> 0.5", only: [:dev, :test]},
{:decimal, "~> 2.0", optional: true}
]
end

Expand Down
1 change: 1 addition & 0 deletions mix.lock
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
%{
"bunt": {:hex, :bunt, "0.2.0", "951c6e801e8b1d2cbe58ebbd3e616a869061ddadcc4863d0a2182541acae9a38", [:mix], [], "hexpm", "7af5c7e09fe1d40f76c8e4f9dd2be7cebd83909f31fee7cd0e9eadc567da8353"},
"credo": {:hex, :credo, "1.6.4", "ddd474afb6e8c240313f3a7b0d025cc3213f0d171879429bf8535d7021d9ad78", [:mix], [{:bunt, "~> 0.2.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2.8", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "c28f910b61e1ff829bffa056ef7293a8db50e87f2c57a9b5c3f57eee124536b7"},
"decimal": {:hex, :decimal, "2.1.1", "5611dca5d4b2c3dd497dec8f68751f1f1a54755e8ed2a966c2633cf885973ad6", [:mix], [], "hexpm", "53cfe5f497ed0e7771ae1a475575603d77425099ba5faef9394932b35020ffcc"},
"dialyxir": {:hex, :dialyxir, "1.1.0", "c5aab0d6e71e5522e77beff7ba9e08f8e02bad90dfbeffae60eaf0cb47e29488", [:mix], [{:erlex, ">= 0.2.6", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "07ea8e49c45f15264ebe6d5b93799d4dd56a44036cf42d0ad9c960bc266c0b9a"},
"earmark_parser": {:hex, :earmark_parser, "1.4.20", "89970db71b11b6b89759ce16807e857df154f8df3e807b2920a8c39834a9e5cf", [:mix], [], "hexpm", "1eb0d2dabeeeff200e0d17dc3048a6045aab271f73ebb82e416464832eb57bdd"},
"erlex": {:hex, :erlex, "0.2.6", "c7987d15e899c7a2f34f5420d2a2ea0d659682c06ac607572df55a43753aa12e", [:mix], [], "hexpm", "2ed2e25711feb44d52b17d2780eabf998452f6efda104877a3881c2f8c0c0c75"},
Expand Down
28 changes: 28 additions & 0 deletions test/decode_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,34 @@ defmodule AvroEx.Decode.Test do

assert Time.truncate(time, :millisecond) == now
end

test "decimal" do
schema = "test/fixtures/decimal.avsc" |> File.read!() |> AvroEx.decode_schema!()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it would be useful to test if round tripping the value through the encoder results in the same result

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed. There are now some round-trip tests in the encode_test. Do you think it is sufficient our should I add some here as well ?

# This reference file was encoded using avro's reference implementation:
#
# ```java
# Conversions.DecimalConversion conversion = new Conversions.DecimalConversion();
# BigDecimal bigDecimal = new BigDecimal(valueInString);
# return conversion.toBytes(bigDecimal, schema, logicalType);
# ```
result = AvroEx.decode!(schema, File.read!("test/fixtures/decimal.avro"), decimals: :exact)

assert result == %{
"decimalField1" => Decimal.new("1.23456789E-7"),
"decimalField2" => Decimal.new("4.54545454545E-35"),
"decimalField3" => Decimal.new("-111111111.1"),
"decimalField4" => Decimal.new("5.3E-11")
}

result_approximate_values = AvroEx.decode!(schema, File.read!("test/fixtures/decimal.avro"))

assert result_approximate_values == %{
"decimalField1" => 1.2345678900000002e-7,
"decimalField2" => 4.54545454545e-35,
"decimalField3" => -111_111_111.10000001,
"decimalField4" => 5.3e-11
}
end
end

describe "DecodingError" do
Expand Down
44 changes: 44 additions & 0 deletions test/encode_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,50 @@ defmodule AvroEx.Encode.Test do
date2 = ~D[1970-03-01]
assert {:ok, "v"} = AvroEx.encode(schema, date2)
end

test "decimal" do
schema = "test/fixtures/decimal.avsc" |> File.read!() |> AvroEx.decode_schema!()

payload = %{
"decimalField1" => Decimal.new("1.23456789E-7"),
"decimalField2" => Decimal.new("4.54545454545E-35"),
"decimalField3" => Decimal.new("-111111111.1"),
"decimalField4" => Decimal.new("5.3E-11")
}

# Round-trip the encoder/decoder
encoded = AvroEx.encode!(schema, payload)
assert AvroEx.decode!(schema, encoded, decimals: :exact) == payload

# This reference file was encoded using avro's reference implementation:
#
# ```java
# Conversions.DecimalConversion conversion = new Conversions.DecimalConversion();
# BigDecimal bigDecimal = new BigDecimal(valueInString);
# return conversion.toBytes(bigDecimal, schema, logicalType);
# ```
assert encoded == File.read!("test/fixtures/decimal.avro")
end

test "decimal without using the Decimal library" do
schema = "test/fixtures/decimal.avsc" |> File.read!() |> AvroEx.decode_schema!()

encoded =
AvroEx.encode!(schema, %{
"decimalField1" => 1.23456789e-7,
"decimalField2" => 4.54545454545e-35,
"decimalField3" => -111_111_111.1,
"decimalField4" => 5.3e-11
})

# Without using decimals, the results are inevitably approximate
assert AvroEx.decode!(schema, encoded) == %{
"decimalField1" => 1.2345678800000002e-7,
"decimalField2" => 4.54545454545e-35,
"decimalField3" => -111_111_111.0,
"decimalField4" => 5.3e-11
}
end
end

describe "variable_integer_encode" do
Expand Down
2 changes: 2 additions & 0 deletions test/fixtures/decimal.avro
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[Í
iÕ5ѽÅÊ95
43 changes: 43 additions & 0 deletions test/fixtures/decimal.avsc
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"namespace": "example.avro",
"type": "record",
"name": "decimalContainer",
"fields": [
{
"name": "decimalField1",
"type": {
"type": "bytes",
"scale": 15,
"precision": 11,
"logicalType": "decimal"
}
},
{
"name": "decimalField2",
"type": {
"type": "bytes",
"scale": 46,
"precision": 46,
"logicalType": "decimal"
}
},
{
"name": "decimalField3",
"type": {
"type": "bytes",
"scale": 1,
"precision": 46,
"logicalType": "decimal"
}
},
{
"name": "decimalField4",
"type": {
"type": "bytes",
"scale": 12,
"precision": 46,
"logicalType": "decimal"
}
}
]
}
Loading