From 11f0eedc6eece6578183be4e71a7e1a2703154f6 Mon Sep 17 00:00:00 2001 From: Jehan Bruggeman Date: Mon, 29 Jan 2024 15:56:45 +0100 Subject: [PATCH] Add support for encoding and decoding Decimals See: https://avro.apache.org/docs/1.11.1/specification/#decimal --- lib/avro_ex.ex | 8 +++++++ lib/avro_ex/decode.ex | 23 ++++++++++++++++++++ lib/avro_ex/encode.ex | 33 +++++++++++++++++++++++++++++ mix.exs | 3 ++- mix.lock | 1 + test/decode_test.exs | 28 +++++++++++++++++++++++++ test/encode_test.exs | 28 +++++++++++++++++++++++++ test/fixtures/decimal.avro | 2 ++ test/fixtures/decimal.avsc | 43 ++++++++++++++++++++++++++++++++++++++ 9 files changed, 168 insertions(+), 1 deletion(-) create mode 100644 test/fixtures/decimal.avro create mode 100644 test/fixtures/decimal.avsc diff --git a/lib/avro_ex.ex b/lib/avro_ex.ex index ae004c0..89cc30d 100644 --- a/lib/avro_ex.ex +++ b/lib/avro_ex.ex @@ -177,6 +177,14 @@ defmodule AvroEx do values as a tagged tuple of `{name, value}` instead of just the plain `value`. This allows to retain the information about which union schema was used for encoding when this cannot be infered from the `value` alone. + + ## Decimals + + Specify the option `decimals: :exact` to use `Decimal.new/3` to parse decimals + into a Decimal struct with arbitrary precision. + + Otherwise, an approximate number is calculated. + """ @spec decode(Schema.t(), encoded_avro, keyword()) :: {:ok, term} diff --git a/lib/avro_ex/decode.ex b/lib/avro_ex/decode.ex index 6e55d12..3c3f98b 100644 --- a/lib/avro_ex/decode.ex +++ b/lib/avro_ex/decode.ex @@ -130,6 +130,29 @@ defmodule AvroEx.Decode do {date_time, rest} end + defp do_decode( + %Primitive{type: :bytes, metadata: %{"logicalType" => "decimal"} = metadata}, + %Context{} = context, + data, + opts + ) + when is_binary(data) do + scale = Map.get(metadata, "scale", 0) + {bytes, rest} = do_decode(%Primitive{type: :bytes}, context, data, opts) + + size = bit_size(bytes) + <> = bytes + + number = + if :exact == Keyword.get(opts, :decimals, false) do + Decimal.new(if(unscaled >= 0, do: 1, else: -1), abs(unscaled), -scale) + else + unscaled * :math.pow(10, -scale) + end + + {number, rest} + end + defp do_decode(%Primitive{type: :long}, %Context{}, data, _) when is_binary(data) do {val, rest} = variable_integer_decode(data, 0, 0, 64) {zigzag_decode(val), rest} diff --git a/lib/avro_ex/encode.ex b/lib/avro_ex/encode.ex index 17cf8f9..a39184e 100644 --- a/lib/avro_ex/encode.ex +++ b/lib/avro_ex/encode.ex @@ -109,6 +109,31 @@ defmodule AvroEx.Encode do |> encode_integer(schema) end + defp do_encode( + %Primitive{type: :bytes, metadata: %{"logicalType" => "decimal"} = metadata}, + %Context{} = context, + value, + opts + ) do + scale = Map.get(metadata, "scale", 0) + + unscaled = + cond do + is_number(value) -> + value / :math.pow(10, -scale) + + match?(%Decimal{}, value) -> + if value.exp != -scale do + error("Incompatible decimal: expected scale #{-scale}, got #{value.exp}") + end + + value.coef * value.sign + end + + bin = <> + do_encode(%Primitive{type: :bytes}, context, bin, opts) + end + defp do_encode(%Primitive{type: :long} = schema, %Context{}, long, _) when is_integer(long) do encode_integer(long, schema) end @@ -291,6 +316,14 @@ defmodule AvroEx.Encode do |> variable_integer_encode end + defp value_size(value, bits \\ 8) do + if :math.pow(2, bits) > abs(value) do + bits + else + value_size(value, bits + 8) + end + end + @compile {:inline, error: 1} defp error(error) do error |> AvroEx.EncodeError.new() |> throw() diff --git a/mix.exs b/mix.exs index af2a1ff..c0d3bc8 100644 --- a/mix.exs +++ b/mix.exs @@ -36,7 +36,8 @@ defmodule AvroEx.Mixfile do {:credo, "~> 1.0", only: :dev, runtime: false}, {:dialyxir, "~> 1.1", only: :dev, runtime: false}, {:ex_doc, "~> 0.20", only: :dev, runtime: false}, - {:stream_data, "~> 0.5", only: [:dev, :test]} + {:stream_data, "~> 0.5", only: [:dev, :test]}, + {:decimal, "~> 2.0", optional: true} ] end diff --git a/mix.lock b/mix.lock index df31f47..0c9db1d 100644 --- a/mix.lock +++ b/mix.lock @@ -1,6 +1,7 @@ %{ "bunt": {:hex, :bunt, "0.2.0", "951c6e801e8b1d2cbe58ebbd3e616a869061ddadcc4863d0a2182541acae9a38", [:mix], [], "hexpm", "7af5c7e09fe1d40f76c8e4f9dd2be7cebd83909f31fee7cd0e9eadc567da8353"}, "credo": {:hex, :credo, "1.6.4", "ddd474afb6e8c240313f3a7b0d025cc3213f0d171879429bf8535d7021d9ad78", [:mix], [{:bunt, "~> 0.2.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2.8", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "c28f910b61e1ff829bffa056ef7293a8db50e87f2c57a9b5c3f57eee124536b7"}, + "decimal": {:hex, :decimal, "2.1.1", "5611dca5d4b2c3dd497dec8f68751f1f1a54755e8ed2a966c2633cf885973ad6", [:mix], [], "hexpm", "53cfe5f497ed0e7771ae1a475575603d77425099ba5faef9394932b35020ffcc"}, "dialyxir": {:hex, :dialyxir, "1.1.0", "c5aab0d6e71e5522e77beff7ba9e08f8e02bad90dfbeffae60eaf0cb47e29488", [:mix], [{:erlex, ">= 0.2.6", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "07ea8e49c45f15264ebe6d5b93799d4dd56a44036cf42d0ad9c960bc266c0b9a"}, "earmark_parser": {:hex, :earmark_parser, "1.4.20", "89970db71b11b6b89759ce16807e857df154f8df3e807b2920a8c39834a9e5cf", [:mix], [], "hexpm", "1eb0d2dabeeeff200e0d17dc3048a6045aab271f73ebb82e416464832eb57bdd"}, "erlex": {:hex, :erlex, "0.2.6", "c7987d15e899c7a2f34f5420d2a2ea0d659682c06ac607572df55a43753aa12e", [:mix], [], "hexpm", "2ed2e25711feb44d52b17d2780eabf998452f6efda104877a3881c2f8c0c0c75"}, diff --git a/test/decode_test.exs b/test/decode_test.exs index ca78374..7b403e6 100644 --- a/test/decode_test.exs +++ b/test/decode_test.exs @@ -316,6 +316,34 @@ defmodule AvroEx.Decode.Test do assert Time.truncate(time, :millisecond) == now end + + test "decimal" do + schema = "test/fixtures/decimal.avsc" |> File.read!() |> AvroEx.decode_schema!() + # This reference file was encoded using avro's reference implementation: + # + # ```java + # Conversions.DecimalConversion conversion = new Conversions.DecimalConversion(); + # BigDecimal bigDecimal = new BigDecimal(valueInString); + # return conversion.toBytes(bigDecimal, schema, logicalType); + # ``` + result = AvroEx.decode!(schema, File.read!("test/fixtures/decimal.avro"), decimals: :exact) + + assert result == %{ + "decimalField1" => Decimal.new("1.23456789E-7"), + "decimalField2" => Decimal.new("4.54545454545E-35"), + "decimalField3" => Decimal.new("-111111111.1"), + "decimalField4" => Decimal.new("5.3E-11") + } + + result_approximate_values = AvroEx.decode!(schema, File.read!("test/fixtures/decimal.avro")) + + assert result_approximate_values == %{ + "decimalField1" => 1.2345678900000002e-7, + "decimalField2" => 4.54545454545e-35, + "decimalField3" => -111_111_111.10000001, + "decimalField4" => 5.3e-11 + } + end end describe "DecodingError" do diff --git a/test/encode_test.exs b/test/encode_test.exs index 3e987d7..63da1e4 100644 --- a/test/encode_test.exs +++ b/test/encode_test.exs @@ -75,6 +75,34 @@ defmodule AvroEx.Encode.Test do date2 = ~D[1970-03-01] assert {:ok, "v"} = AvroEx.encode(schema, date2) end + + test "decimal" do + schema = "test/fixtures/decimal.avsc" |> File.read!() |> AvroEx.decode_schema!() + + encoded = + AvroEx.encode!(schema, %{ + "decimalField1" => Decimal.new("1.23456789E-7"), + "decimalField2" => Decimal.new("4.54545454545E-35"), + "decimalField3" => Decimal.new("-111111111.1"), + "decimalField4" => Decimal.new("5.3E-11") + }) + + assert AvroEx.decode!(schema, encoded, decimals: :exact) == %{ + "decimalField1" => Decimal.new("1.23456789E-7"), + "decimalField2" => Decimal.new("4.54545454545E-35"), + "decimalField3" => Decimal.new("-111111111.1"), + "decimalField4" => Decimal.new("5.3E-11") + } + + # This reference file was encoded using avro's reference implementation: + # + # ```java + # Conversions.DecimalConversion conversion = new Conversions.DecimalConversion(); + # BigDecimal bigDecimal = new BigDecimal(valueInString); + # return conversion.toBytes(bigDecimal, schema, logicalType); + # ``` + assert encoded == File.read!("test/fixtures/decimal.avro") + end end describe "variable_integer_encode" do diff --git a/test/fixtures/decimal.avro b/test/fixtures/decimal.avro new file mode 100644 index 0000000..3bedfd0 --- /dev/null +++ b/test/fixtures/decimal.avro @@ -0,0 +1,2 @@ +[Í +iÕ5ѽÅÊ95 \ No newline at end of file diff --git a/test/fixtures/decimal.avsc b/test/fixtures/decimal.avsc new file mode 100644 index 0000000..fd2ef83 --- /dev/null +++ b/test/fixtures/decimal.avsc @@ -0,0 +1,43 @@ +{ + "namespace": "example.avro", + "type": "record", + "name": "decimalContainer", + "fields": [ + { + "name": "decimalField1", + "type": { + "type": "bytes", + "scale": 15, + "precision": 11, + "logicalType": "decimal" + } + }, + { + "name": "decimalField2", + "type": { + "type": "bytes", + "scale": 46, + "precision": 46, + "logicalType": "decimal" + } + }, + { + "name": "decimalField3", + "type": { + "type": "bytes", + "scale": 1, + "precision": 46, + "logicalType": "decimal" + } + }, + { + "name": "decimalField4", + "type": { + "type": "bytes", + "scale": 12, + "precision": 46, + "logicalType": "decimal" + } + } + ] +}