From 7c67d344235459221de9f21ed505fa83dd140f7a Mon Sep 17 00:00:00 2001 From: Joel Date: Thu, 27 Jul 2023 09:37:12 +0930 Subject: [PATCH 1/3] replace current field arith with fiat arithmetic --- src/field_5x52_int128_impl.h | 490 +++++++++++++++++------------------ 1 file changed, 239 insertions(+), 251 deletions(-) diff --git a/src/field_5x52_int128_impl.h b/src/field_5x52_int128_impl.h index b2a391dec9..56b2e02e42 100644 --- a/src/field_5x52_int128_impl.h +++ b/src/field_5x52_int128_impl.h @@ -1,9 +1,3 @@ -/*********************************************************************** - * Copyright (c) 2013, 2014 Pieter Wuille * - * Distributed under the MIT software license, see the accompanying * - * file COPYING or https://www.opensource.org/licenses/mit-license.php.* - ***********************************************************************/ - #ifndef SECP256K1_FIELD_INNER5X52_IMPL_H #define SECP256K1_FIELD_INNER5X52_IMPL_H @@ -12,268 +6,262 @@ #include "int128.h" #include "util.h" -#ifdef VERIFY -#define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0) -#define VERIFY_BITS_128(x, n) VERIFY_CHECK(secp256k1_u128_check_bits((x), (n))) +/* Autogenerated: 'src/ExtractionOCaml/dettman_multiplication' --inline --static + * --use-value-barrier secp256k1_dettman 64 5 48 2 '2^256 - 4294968273' mul + * square */ +/* curve description: secp256k1_dettman */ +/* machine_wordsize = 64 (from "64") */ +/* requested operations: mul, square */ +/* n = 5 (from "5") */ +/* last_limb_width = 48 (from "48") */ +/* last_reduction = 2 (from "2") */ +/* s-c = 2^256 - [(1, 4294968273)] (from "2^256 - 4294968273") */ +/* inbounds_multiplier: None (from "") */ +/* */ +/* Computed values: */ +/* */ +/* */ + +#if defined(__GNUC__) || defined(__clang__) +# define FIAT_SECP256K1_DETTMAN_FIAT_EXTENSION __extension__ +# define FIAT_SECP256K1_DETTMAN_FIAT_INLINE __inline__ #else -#define VERIFY_BITS(x, n) do { } while(0) -#define VERIFY_BITS_128(x, n) do { } while(0) +# define FIAT_SECP256K1_DETTMAN_FIAT_EXTENSION +# define FIAT_SECP256K1_DETTMAN_FIAT_INLINE #endif -SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t *a, const uint64_t * SECP256K1_RESTRICT b) { - secp256k1_uint128 c, d; - uint64_t t3, t4, tx, u0; - uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4]; - const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL; - - VERIFY_BITS(a[0], 56); - VERIFY_BITS(a[1], 56); - VERIFY_BITS(a[2], 56); - VERIFY_BITS(a[3], 56); - VERIFY_BITS(a[4], 52); - VERIFY_BITS(b[0], 56); - VERIFY_BITS(b[1], 56); - VERIFY_BITS(b[2], 56); - VERIFY_BITS(b[3], 56); - VERIFY_BITS(b[4], 52); - VERIFY_CHECK(r != b); - VERIFY_CHECK(a != b); - - /* [... a b c] is a shorthand for ... + a<<104 + b<<52 + c<<0 mod n. - * for 0 <= x <= 4, px is a shorthand for sum(a[i]*b[x-i], i=0..x). - * for 4 <= x <= 8, px is a shorthand for sum(a[i]*b[x-i], i=(x-4)..4) - * Note that [x 0 0 0 0 0] = [x*R]. - */ - - secp256k1_u128_mul(&d, a0, b[3]); - secp256k1_u128_accum_mul(&d, a1, b[2]); - secp256k1_u128_accum_mul(&d, a2, b[1]); - secp256k1_u128_accum_mul(&d, a3, b[0]); - VERIFY_BITS_128(&d, 114); - /* [d 0 0 0] = [p3 0 0 0] */ - secp256k1_u128_mul(&c, a4, b[4]); - VERIFY_BITS_128(&c, 112); - /* [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ - secp256k1_u128_accum_mul(&d, R, secp256k1_u128_to_u64(&c)); secp256k1_u128_rshift(&c, 64); - VERIFY_BITS_128(&d, 115); - VERIFY_BITS_128(&c, 48); - /* [(c<<12) 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ - t3 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52); - VERIFY_BITS(t3, 52); - VERIFY_BITS_128(&d, 63); - /* [(c<<12) 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ - - secp256k1_u128_accum_mul(&d, a0, b[4]); - secp256k1_u128_accum_mul(&d, a1, b[3]); - secp256k1_u128_accum_mul(&d, a2, b[2]); - secp256k1_u128_accum_mul(&d, a3, b[1]); - secp256k1_u128_accum_mul(&d, a4, b[0]); - VERIFY_BITS_128(&d, 115); - /* [(c<<12) 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ - secp256k1_u128_accum_mul(&d, R << 12, secp256k1_u128_to_u64(&c)); - VERIFY_BITS_128(&d, 116); - /* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ - t4 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52); - VERIFY_BITS(t4, 52); - VERIFY_BITS_128(&d, 64); - /* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ - tx = (t4 >> 48); t4 &= (M >> 4); - VERIFY_BITS(tx, 4); - VERIFY_BITS(t4, 48); - /* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ +FIAT_SECP256K1_DETTMAN_FIAT_EXTENSION typedef secp256k1_uint128 + fiat_secp256k1_dettman_uint128; - secp256k1_u128_mul(&c, a0, b[0]); - VERIFY_BITS_128(&c, 112); - /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0] */ - secp256k1_u128_accum_mul(&d, a1, b[4]); - secp256k1_u128_accum_mul(&d, a2, b[3]); - secp256k1_u128_accum_mul(&d, a3, b[2]); - secp256k1_u128_accum_mul(&d, a4, b[1]); - VERIFY_BITS_128(&d, 115); - /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ - u0 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52); - VERIFY_BITS(u0, 52); - VERIFY_BITS_128(&d, 63); - /* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ - /* [d 0 t4+(tx<<48)+(u0<<52) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ - u0 = (u0 << 4) | tx; - VERIFY_BITS(u0, 56); - /* [d 0 t4+(u0<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ - secp256k1_u128_accum_mul(&c, u0, R >> 4); - VERIFY_BITS_128(&c, 115); - /* [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ - r[0] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52); - VERIFY_BITS(r[0], 52); - VERIFY_BITS_128(&c, 61); - /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 0 p0] */ +#define secp256k1_fe_sqr_inner fiat_secp256k1_dettman_square +#define secp256k1_fe_mul_inner fiat_secp256k1_dettman_mul - secp256k1_u128_accum_mul(&c, a0, b[1]); - secp256k1_u128_accum_mul(&c, a1, b[0]); - VERIFY_BITS_128(&c, 114); - /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 p1 p0] */ - secp256k1_u128_accum_mul(&d, a2, b[4]); - secp256k1_u128_accum_mul(&d, a3, b[3]); - secp256k1_u128_accum_mul(&d, a4, b[2]); - VERIFY_BITS_128(&d, 114); - /* [d 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ - secp256k1_u128_accum_mul(&c, secp256k1_u128_to_u64(&d) & M, R); secp256k1_u128_rshift(&d, 52); - VERIFY_BITS_128(&c, 115); - VERIFY_BITS_128(&d, 62); - /* [d 0 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ - r[1] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52); - VERIFY_BITS(r[1], 52); - VERIFY_BITS_128(&c, 63); - /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ +#if (-1 & 3) != 3 +#error "This code only works on a two's complement system" +#endif - secp256k1_u128_accum_mul(&c, a0, b[2]); - secp256k1_u128_accum_mul(&c, a1, b[1]); - secp256k1_u128_accum_mul(&c, a2, b[0]); - VERIFY_BITS_128(&c, 114); - /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */ - secp256k1_u128_accum_mul(&d, a3, b[4]); - secp256k1_u128_accum_mul(&d, a4, b[3]); - VERIFY_BITS_128(&d, 114); - /* [d 0 0 t4 t3 c t1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - secp256k1_u128_accum_mul(&c, R, secp256k1_u128_to_u64(&d)); secp256k1_u128_rshift(&d, 64); - VERIFY_BITS_128(&c, 115); - VERIFY_BITS_128(&d, 50); - /* [(d<<12) 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ +static FIAT_SECP256K1_DETTMAN_FIAT_INLINE secp256k1_uint128 u128_mul_u64_u64(uint64_t a, uint64_t b) { + secp256k1_uint128 r; + secp256k1_u128_mul(&r, a, b); + return r; +} - r[2] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52); - VERIFY_BITS(r[2], 52); - VERIFY_BITS_128(&c, 63); - /* [(d<<12) 0 0 0 t4 t3+c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - secp256k1_u128_accum_mul(&c, R << 12, secp256k1_u128_to_u64(&d)); - secp256k1_u128_accum_u64(&c, t3); - VERIFY_BITS_128(&c, 100); - /* [t4 c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[3] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52); - VERIFY_BITS(r[3], 52); - VERIFY_BITS_128(&c, 48); - /* [t4+c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[4] = secp256k1_u128_to_u64(&c) + t4; - VERIFY_BITS(r[4], 49); - /* [r4 r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ +static FIAT_SECP256K1_DETTMAN_FIAT_INLINE uint64_t u64_shr_u128(secp256k1_uint128 a, unsigned int n) { + secp256k1_uint128 r = a; + secp256k1_u128_rshift(&r, n); + return secp256k1_u128_to_u64(&r); } -SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t *a) { - secp256k1_uint128 c, d; - uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4]; - int64_t t3, t4, tx, u0; - const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL; +static FIAT_SECP256K1_DETTMAN_FIAT_INLINE uint64_t u64_and_u128_u64(secp256k1_uint128 a, uint64_t b) { + return secp256k1_u128_to_u64(&a) & b; +} - VERIFY_BITS(a[0], 56); - VERIFY_BITS(a[1], 56); - VERIFY_BITS(a[2], 56); - VERIFY_BITS(a[3], 56); - VERIFY_BITS(a[4], 52); +static FIAT_SECP256K1_DETTMAN_FIAT_INLINE secp256k1_uint128 u128_add_u128_u128(secp256k1_uint128 a, secp256k1_uint128 b) { + uint64_t bl; + uint64_t bh; + uint64_t rh; + uint64_t rl; + secp256k1_uint128 r = a; - /** [... a b c] is a shorthand for ... + a<<104 + b<<52 + c<<0 mod n. - * px is a shorthand for sum(a[i]*a[x-i], i=0..x). - * Note that [x 0 0 0 0 0] = [x*R]. - */ + bl = secp256k1_u128_to_u64(&b); - secp256k1_u128_mul(&d, a0*2, a3); - secp256k1_u128_accum_mul(&d, a1*2, a2); - VERIFY_BITS_128(&d, 114); - /* [d 0 0 0] = [p3 0 0 0] */ - secp256k1_u128_mul(&c, a4, a4); - VERIFY_BITS_128(&c, 112); - /* [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ - secp256k1_u128_accum_mul(&d, R, secp256k1_u128_to_u64(&c)); secp256k1_u128_rshift(&c, 64); - VERIFY_BITS_128(&d, 115); - VERIFY_BITS_128(&c, 48); - /* [(c<<12) 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ - t3 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52); - VERIFY_BITS(t3, 52); - VERIFY_BITS_128(&d, 63); - /* [(c<<12) 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ + /* adding low b to r*/ + secp256k1_u128_accum_u64(&r, bl); - a4 *= 2; - secp256k1_u128_accum_mul(&d, a0, a4); - secp256k1_u128_accum_mul(&d, a1*2, a3); - secp256k1_u128_accum_mul(&d, a2, a2); - VERIFY_BITS_128(&d, 115); - /* [(c<<12) 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ - secp256k1_u128_accum_mul(&d, R << 12, secp256k1_u128_to_u64(&c)); - VERIFY_BITS_128(&d, 116); - /* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ - t4 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52); - VERIFY_BITS(t4, 52); - VERIFY_BITS_128(&d, 64); - /* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ - tx = (t4 >> 48); t4 &= (M >> 4); - VERIFY_BITS(tx, 4); - VERIFY_BITS(t4, 48); - /* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ + rl = secp256k1_u128_to_u64(&r); + rh = secp256k1_u128_hi_u64(&r); - secp256k1_u128_mul(&c, a0, a0); - VERIFY_BITS_128(&c, 112); - /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0] */ - secp256k1_u128_accum_mul(&d, a1, a4); - secp256k1_u128_accum_mul(&d, a2*2, a3); - VERIFY_BITS_128(&d, 114); - /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ - u0 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52); - VERIFY_BITS(u0, 52); - VERIFY_BITS_128(&d, 62); - /* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ - /* [d 0 t4+(tx<<48)+(u0<<52) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ - u0 = (u0 << 4) | tx; - VERIFY_BITS(u0, 56); - /* [d 0 t4+(u0<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ - secp256k1_u128_accum_mul(&c, u0, R >> 4); - VERIFY_BITS_128(&c, 113); - /* [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ - r[0] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52); - VERIFY_BITS(r[0], 52); - VERIFY_BITS_128(&c, 61); - /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 0 p0] */ + /* adding high b*/ + bh = secp256k1_u128_hi_u64(&b); + rh += bh; - a0 *= 2; - secp256k1_u128_accum_mul(&c, a0, a1); - VERIFY_BITS_128(&c, 114); - /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 p1 p0] */ - secp256k1_u128_accum_mul(&d, a2, a4); - secp256k1_u128_accum_mul(&d, a3, a3); - VERIFY_BITS_128(&d, 114); - /* [d 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ - secp256k1_u128_accum_mul(&c, secp256k1_u128_to_u64(&d) & M, R); secp256k1_u128_rshift(&d, 52); - VERIFY_BITS_128(&c, 115); - VERIFY_BITS_128(&d, 62); - /* [d 0 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ - r[1] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52); - VERIFY_BITS(r[1], 52); - VERIFY_BITS_128(&c, 63); - /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ + /* saving all in r*/ + secp256k1_u128_load(&r, rh, rl); - secp256k1_u128_accum_mul(&c, a0, a2); - secp256k1_u128_accum_mul(&c, a1, a1); - VERIFY_BITS_128(&c, 114); - /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */ - secp256k1_u128_accum_mul(&d, a3, a4); - VERIFY_BITS_128(&d, 114); - /* [d 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - secp256k1_u128_accum_mul(&c, R, secp256k1_u128_to_u64(&d)); secp256k1_u128_rshift(&d, 64); - VERIFY_BITS_128(&c, 115); - VERIFY_BITS_128(&d, 50); - /* [(d<<12) 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[2] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52); - VERIFY_BITS(r[2], 52); - VERIFY_BITS_128(&c, 63); - /* [(d<<12) 0 0 0 t4 t3+c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + return r; +} +static FIAT_SECP256K1_DETTMAN_FIAT_INLINE secp256k1_uint128 u128_add_u64_u128(uint64_t a, secp256k1_uint128 b) { + secp256k1_uint128 r = b; + secp256k1_u128_accum_u64(&r, a); + return r; +} +/* + * The function fiat_secp256k1_dettman_mul multiplies two field elements. + * + * Postconditions: + * eval out1 mod 115792089237316195423570985008687907853269984665640564039457584007908834671663 = (eval arg1 * eval arg2) mod 115792089237316195423570985008687907853269984665640564039457584007908834671663 + * + * Input Bounds: + * arg1: [[0x0 ~> 0x1ffffffffffffe], [0x0 ~> 0x1ffffffffffffe], [0x0 ~> 0x1ffffffffffffe], [0x0 ~> 0x1ffffffffffffe], [0x0 ~> 0x1fffffffffffe]] + * arg2: [[0x0 ~> 0x1ffffffffffffe], [0x0 ~> 0x1ffffffffffffe], [0x0 ~> 0x1ffffffffffffe], [0x0 ~> 0x1ffffffffffffe], [0x0 ~> 0x1fffffffffffe]] + * Output Bounds: + * out1: [[0x0 ~> 0x1ffffffffffffe], [0x0 ~> 0x1ffffffffffffe], [0x0 ~> 0x1ffffffffffffe], [0x0 ~> 0x1ffffffffffffe], [0x0 ~> 0x17fffffffffff]] + */ +static FIAT_SECP256K1_DETTMAN_FIAT_INLINE void fiat_secp256k1_dettman_mul(uint64_t out1[5], const uint64_t arg1[5], const uint64_t arg2[5]) { + fiat_secp256k1_dettman_uint128 x1; + uint64_t x2; + uint64_t x3; + fiat_secp256k1_dettman_uint128 x4; + uint64_t x5; + uint64_t x6; + fiat_secp256k1_dettman_uint128 x7; + uint64_t x8; + uint64_t x9; + fiat_secp256k1_dettman_uint128 x10; + uint64_t x11; + uint64_t x12; + uint64_t x13; + uint64_t x14; + fiat_secp256k1_dettman_uint128 x15; + uint64_t x16; + uint64_t x17; + fiat_secp256k1_dettman_uint128 x18; + uint64_t x19; + uint64_t x20; + fiat_secp256k1_dettman_uint128 x21; + uint64_t x22; + uint64_t x23; + fiat_secp256k1_dettman_uint128 x24; + uint64_t x25; + uint64_t x26; + fiat_secp256k1_dettman_uint128 x27; + uint64_t x28; + uint64_t x29; + fiat_secp256k1_dettman_uint128 x30; + uint64_t x31; + uint64_t x32; + uint64_t x33; + x1 = u128_mul_u64_u64((arg1[4]), (arg2[4])); + x2 = u64_shr_u128(x1, 64); + x3 = u64_and_u128_u64(x1, UINT64_C(0xffffffffffffffff)); + x4 = u128_add_u128_u128(u128_add_u128_u128(u128_mul_u64_u64((arg1[0]), (arg2[3])), u128_add_u128_u128(u128_mul_u64_u64((arg1[1]), (arg2[2])), u128_add_u128_u128(u128_mul_u64_u64((arg1[2]), (arg2[1])), u128_mul_u64_u64((arg1[3]), (arg2[0]))))), u128_mul_u64_u64(x3, UINT64_C(0x1000003d10))); + x5 = u64_shr_u128(x4, 52); + x6 = u64_and_u128_u64(x4, UINT64_C(0xfffffffffffff)); + x7 = u128_add_u128_u128(u128_add_u64_u128(x5, u128_add_u128_u128(u128_mul_u64_u64((arg1[0]), (arg2[4])), u128_add_u128_u128(u128_mul_u64_u64((arg1[1]), (arg2[3])), u128_add_u128_u128(u128_mul_u64_u64((arg1[2]), (arg2[2])), u128_add_u128_u128(u128_mul_u64_u64((arg1[3]), (arg2[1])), u128_mul_u64_u64((arg1[4]), (arg2[0]))))))), u128_mul_u64_u64(x2, UINT64_C(0x1000003d10000))); + x8 = u64_shr_u128(x7, 52); + x9 = u64_and_u128_u64(x7, UINT64_C(0xfffffffffffff)); + x10 = u128_add_u64_u128(x8, u128_add_u128_u128(u128_mul_u64_u64((arg1[1]), (arg2[4])), u128_add_u128_u128(u128_mul_u64_u64((arg1[2]), (arg2[3])), u128_add_u128_u128(u128_mul_u64_u64((arg1[3]), (arg2[2])), u128_mul_u64_u64((arg1[4]), (arg2[1])))))); + x11 = u64_shr_u128(x10, 52); + x12 = u64_and_u128_u64(x10, UINT64_C(0xfffffffffffff)); + x13 = (x9 >> 48); + x14 = (x9 & UINT64_C(0xffffffffffff)); + x15 = u128_add_u128_u128(u128_mul_u64_u64((arg1[0]), (arg2[0])), u128_mul_u64_u64((x13 + (x12 << 4)), UINT64_C(0x1000003d1))); + x16 = u64_shr_u128(x15, 52); + x17 = u64_and_u128_u64(x15, UINT64_C(0xfffffffffffff)); + x18 = u128_add_u64_u128(x11, u128_add_u128_u128(u128_mul_u64_u64((arg1[2]), (arg2[4])), u128_add_u128_u128(u128_mul_u64_u64((arg1[3]), (arg2[3])), u128_mul_u64_u64((arg1[4]), (arg2[2]))))); + x19 = u64_shr_u128(x18, 52); + x20 = u64_and_u128_u64(x18, UINT64_C(0xfffffffffffff)); + x21 = u128_add_u128_u128(u128_add_u64_u128(x16, u128_add_u128_u128(u128_mul_u64_u64((arg1[0]), (arg2[1])), u128_mul_u64_u64((arg1[1]), (arg2[0])))), u128_mul_u64_u64(x20, UINT64_C(0x1000003d10))); + x22 = u64_shr_u128(x21, 52); + x23 = u64_and_u128_u64(x21, UINT64_C(0xfffffffffffff)); + x24 = u128_add_u64_u128(x19, u128_add_u128_u128(u128_mul_u64_u64((arg1[3]), (arg2[4])), u128_mul_u64_u64((arg1[4]), (arg2[3])))); + x25 = u64_shr_u128(x24, 64); + x26 = u64_and_u128_u64(x24, UINT64_C(0xffffffffffffffff)); + x27 = u128_add_u128_u128(u128_add_u64_u128(x22, u128_add_u128_u128(u128_mul_u64_u64((arg1[0]), (arg2[2])), u128_add_u128_u128(u128_mul_u64_u64((arg1[1]), (arg2[1])), u128_mul_u64_u64((arg1[2]), (arg2[0]))))), u128_mul_u64_u64(x26, UINT64_C(0x1000003d10))); + x28 = u64_shr_u128(x27, 52); + x29 = u64_and_u128_u64(x27, UINT64_C(0xfffffffffffff)); + x30 = u128_add_u64_u128((x28 + x6), u128_mul_u64_u64(x25, UINT64_C(0x1000003d10000))); + x31 = u64_shr_u128(x30, 52); + x32 = u64_and_u128_u64(x30, UINT64_C(0xfffffffffffff)); + x33 = (x31 + x14); + out1[0] = x17; + out1[1] = x23; + out1[2] = x29; + out1[3] = x32; + out1[4] = x33; +} - secp256k1_u128_accum_mul(&c, R << 12, secp256k1_u128_to_u64(&d)); - secp256k1_u128_accum_u64(&c, t3); - VERIFY_BITS_128(&c, 100); - /* [t4 c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[3] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52); - VERIFY_BITS(r[3], 52); - VERIFY_BITS_128(&c, 48); - /* [t4+c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[4] = secp256k1_u128_to_u64(&c) + t4; - VERIFY_BITS(r[4], 49); - /* [r4 r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ +/* + * The function fiat_secp256k1_dettman_square squares a field element. + * + * Postconditions: + * eval out1 mod 115792089237316195423570985008687907853269984665640564039457584007908834671663 = (eval arg1 * eval arg1) mod 115792089237316195423570985008687907853269984665640564039457584007908834671663 + * + * Input Bounds: + * arg1: [[0x0 ~> 0x1ffffffffffffe], [0x0 ~> 0x1ffffffffffffe], [0x0 ~> 0x1ffffffffffffe], [0x0 ~> 0x1ffffffffffffe], [0x0 ~> 0x1fffffffffffe]] + * Output Bounds: + * out1: [[0x0 ~> 0x1ffffffffffffe], [0x0 ~> 0x1ffffffffffffe], [0x0 ~> 0x1ffffffffffffe], [0x0 ~> 0x1ffffffffffffe], [0x0 ~> 0x17fffffffffff]] + */ +static FIAT_SECP256K1_DETTMAN_FIAT_INLINE void fiat_secp256k1_dettman_square(uint64_t out1[5], const uint64_t arg1[5]) { + uint64_t x1; + uint64_t x2; + uint64_t x3; + uint64_t x4; + fiat_secp256k1_dettman_uint128 x5; + uint64_t x6; + uint64_t x7; + fiat_secp256k1_dettman_uint128 x8; + uint64_t x9; + uint64_t x10; + fiat_secp256k1_dettman_uint128 x11; + uint64_t x12; + uint64_t x13; + fiat_secp256k1_dettman_uint128 x14; + uint64_t x15; + uint64_t x16; + uint64_t x17; + uint64_t x18; + fiat_secp256k1_dettman_uint128 x19; + uint64_t x20; + uint64_t x21; + fiat_secp256k1_dettman_uint128 x22; + uint64_t x23; + uint64_t x24; + fiat_secp256k1_dettman_uint128 x25; + uint64_t x26; + uint64_t x27; + fiat_secp256k1_dettman_uint128 x28; + uint64_t x29; + uint64_t x30; + fiat_secp256k1_dettman_uint128 x31; + uint64_t x32; + uint64_t x33; + fiat_secp256k1_dettman_uint128 x34; + uint64_t x35; + uint64_t x36; + uint64_t x37; + x1 = ((arg1[3]) * 0x2); + x2 = ((arg1[2]) * 0x2); + x3 = ((arg1[1]) * 0x2); + x4 = ((arg1[0]) * 0x2); + x5 = u128_mul_u64_u64((arg1[4]), (arg1[4])); + x6 = u64_shr_u128(x5, 64); + x7 = u64_and_u128_u64(x5, UINT64_C(0xffffffffffffffff)); + x8 = u128_add_u128_u128(u128_add_u128_u128(u128_mul_u64_u64(x4, (arg1[3])), u128_mul_u64_u64(x3, (arg1[2]))), u128_mul_u64_u64(x7, UINT64_C(0x1000003d10))); + x9 = u64_shr_u128(x8, 52); + x10 = u64_and_u128_u64(x8, UINT64_C(0xfffffffffffff)); + x11 = u128_add_u128_u128(u128_add_u64_u128(x9, u128_add_u128_u128(u128_mul_u64_u64(x4, (arg1[4])), u128_add_u128_u128(u128_mul_u64_u64(x3, (arg1[3])), u128_mul_u64_u64((arg1[2]), (arg1[2]))))), u128_mul_u64_u64(x6, UINT64_C(0x1000003d10000))); + x12 = u64_shr_u128(x11, 52); + x13 = u64_and_u128_u64(x11, UINT64_C(0xfffffffffffff)); + x14 = u128_add_u64_u128(x12, u128_add_u128_u128(u128_mul_u64_u64(x3, (arg1[4])), u128_mul_u64_u64(x2, (arg1[3])))); + x15 = u64_shr_u128(x14, 52); + x16 = u64_and_u128_u64(x14, UINT64_C(0xfffffffffffff)); + x17 = (x13 >> 48); + x18 = (x13 & UINT64_C(0xffffffffffff)); + x19 = u128_add_u128_u128(u128_mul_u64_u64((arg1[0]), (arg1[0])), u128_mul_u64_u64((x17 + (x16 << 4)), UINT64_C(0x1000003d1))); + x20 = u64_shr_u128(x19, 52); + x21 = u64_and_u128_u64(x19, UINT64_C(0xfffffffffffff)); + x22 = u128_add_u64_u128(x15, u128_add_u128_u128(u128_mul_u64_u64(x2, (arg1[4])), u128_mul_u64_u64((arg1[3]), (arg1[3])))); + x23 = u64_shr_u128(x22, 52); + x24 = u64_and_u128_u64(x22, UINT64_C(0xfffffffffffff)); + x25 = u128_add_u128_u128(u128_add_u64_u128(x20, u128_mul_u64_u64(x4, (arg1[1]))), u128_mul_u64_u64(x24, UINT64_C(0x1000003d10))); + x26 = u64_shr_u128(x25, 52); + x27 = u64_and_u128_u64(x25, UINT64_C(0xfffffffffffff)); + x28 = u128_add_u64_u128(x23, u128_mul_u64_u64(x1, (arg1[4]))); + x29 = u64_shr_u128(x28, 64); + x30 = u64_and_u128_u64(x28, UINT64_C(0xffffffffffffffff)); + x31 = u128_add_u128_u128(u128_add_u64_u128(x26, u128_add_u128_u128(u128_mul_u64_u64(x4, (arg1[2])), u128_mul_u64_u64((arg1[1]), (arg1[1])))), u128_mul_u64_u64(x30, UINT64_C(0x1000003d10))); + x32 = u64_shr_u128(x31, 52); + x33 = u64_and_u128_u64(x31, UINT64_C(0xfffffffffffff)); + x34 = u128_add_u64_u128((x32 + x10), u128_mul_u64_u64(x29, UINT64_C(0x1000003d10000))); + x35 = u64_shr_u128(x34, 52); + x36 = u64_and_u128_u64(x34, UINT64_C(0xfffffffffffff)); + x37 = (x35 + x18); + out1[0] = x21; + out1[1] = x27; + out1[2] = x33; + out1[3] = x36; + out1[4] = x37; } #endif /* SECP256K1_FIELD_INNER5X52_IMPL_H */ From cc741f38d3bf36f7ba2ddb1ead99c6a00dd8e265 Mon Sep 17 00:00:00 2001 From: Joel Date: Fri, 28 Jul 2023 09:26:40 +0930 Subject: [PATCH 2/3] moved u128+u128 to int128.h --- src/field_5x52_int128_impl.h | 19 +------------------ src/int128.h | 5 +++++ src/int128_native_impl.h | 4 ++++ src/int128_struct_impl.h | 5 +++++ 4 files changed, 15 insertions(+), 18 deletions(-) diff --git a/src/field_5x52_int128_impl.h b/src/field_5x52_int128_impl.h index 56b2e02e42..e288617aa2 100644 --- a/src/field_5x52_int128_impl.h +++ b/src/field_5x52_int128_impl.h @@ -57,26 +57,9 @@ static FIAT_SECP256K1_DETTMAN_FIAT_INLINE uint64_t u64_and_u128_u64(secp256k1_ui } static FIAT_SECP256K1_DETTMAN_FIAT_INLINE secp256k1_uint128 u128_add_u128_u128(secp256k1_uint128 a, secp256k1_uint128 b) { - uint64_t bl; - uint64_t bh; - uint64_t rh; - uint64_t rl; secp256k1_uint128 r = a; - bl = secp256k1_u128_to_u64(&b); - - /* adding low b to r*/ - secp256k1_u128_accum_u64(&r, bl); - - rl = secp256k1_u128_to_u64(&r); - rh = secp256k1_u128_hi_u64(&r); - - /* adding high b*/ - bh = secp256k1_u128_hi_u64(&b); - rh += bh; - - /* saving all in r*/ - secp256k1_u128_load(&r, rh, rl); + secp256k1_u128_accum(&r, &b); return r; } diff --git a/src/int128.h b/src/int128.h index 5355fbfae0..c0b611ecf9 100644 --- a/src/int128.h +++ b/src/int128.h @@ -28,6 +28,11 @@ static SECP256K1_INLINE void secp256k1_u128_accum_mul(secp256k1_uint128 *r, uint */ static SECP256K1_INLINE void secp256k1_u128_accum_u64(secp256k1_uint128 *r, uint64_t a); +/* Add an unsigned 128-bit value a to r. + * The final result is taken modulo 2^128. + */ +static SECP256K1_INLINE void secp256k1_u128_accum(secp256k1_uint128 *r, secp256k1_uint128 *a); + /* Unsigned (logical) right shift. * Non-constant time in n. */ diff --git a/src/int128_native_impl.h b/src/int128_native_impl.h index 7f02e1590b..b9f089d43a 100644 --- a/src/int128_native_impl.h +++ b/src/int128_native_impl.h @@ -20,6 +20,10 @@ static SECP256K1_INLINE void secp256k1_u128_accum_u64(secp256k1_uint128 *r, uint *r += a; } +static SECP256K1_INLINE void secp256k1_u128_accum(secp256k1_uint128 *r, secp256k1_uint128 *a){ + *r += *a; +} + static SECP256K1_INLINE void secp256k1_u128_rshift(secp256k1_uint128 *r, unsigned int n) { VERIFY_CHECK(n < 128); *r >>= n; diff --git a/src/int128_struct_impl.h b/src/int128_struct_impl.h index 962a71d13b..e8eaccd95b 100644 --- a/src/int128_struct_impl.h +++ b/src/int128_struct_impl.h @@ -71,6 +71,11 @@ static SECP256K1_INLINE void secp256k1_u128_accum_u64(secp256k1_uint128 *r, uint r->hi += r->lo < a; } +static SECP256K1_INLINE void secp256k1_u128_accum(secp256k1_uint128 *r, secp256k1_uint128 *a){ + r->lo += a->lo; + r->hi += (r->lo < a->lo) + a->hi; +} + /* Unsigned (logical) right shift. * Non-constant time in n. */ From e3affa143f510e45d1e23f71c0b8c11c030680ac Mon Sep 17 00:00:00 2001 From: Joel Date: Fri, 28 Jul 2023 09:27:13 +0930 Subject: [PATCH 3/3] chore: reorderd functions, beautified comment --- src/field_5x52_int128_impl.h | 48 +++++++++++++++++------------------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/src/field_5x52_int128_impl.h b/src/field_5x52_int128_impl.h index e288617aa2..10095964a2 100644 --- a/src/field_5x52_int128_impl.h +++ b/src/field_5x52_int128_impl.h @@ -6,22 +6,6 @@ #include "int128.h" #include "util.h" -/* Autogenerated: 'src/ExtractionOCaml/dettman_multiplication' --inline --static - * --use-value-barrier secp256k1_dettman 64 5 48 2 '2^256 - 4294968273' mul - * square */ -/* curve description: secp256k1_dettman */ -/* machine_wordsize = 64 (from "64") */ -/* requested operations: mul, square */ -/* n = 5 (from "5") */ -/* last_limb_width = 48 (from "48") */ -/* last_reduction = 2 (from "2") */ -/* s-c = 2^256 - [(1, 4294968273)] (from "2^256 - 4294968273") */ -/* inbounds_multiplier: None (from "") */ -/* */ -/* Computed values: */ -/* */ -/* */ - #if defined(__GNUC__) || defined(__clang__) # define FIAT_SECP256K1_DETTMAN_FIAT_EXTENSION __extension__ # define FIAT_SECP256K1_DETTMAN_FIAT_INLINE __inline__ @@ -30,16 +14,6 @@ # define FIAT_SECP256K1_DETTMAN_FIAT_INLINE #endif -FIAT_SECP256K1_DETTMAN_FIAT_EXTENSION typedef secp256k1_uint128 - fiat_secp256k1_dettman_uint128; - -#define secp256k1_fe_sqr_inner fiat_secp256k1_dettman_square -#define secp256k1_fe_mul_inner fiat_secp256k1_dettman_mul - -#if (-1 & 3) != 3 -#error "This code only works on a two's complement system" -#endif - static FIAT_SECP256K1_DETTMAN_FIAT_INLINE secp256k1_uint128 u128_mul_u64_u64(uint64_t a, uint64_t b) { secp256k1_uint128 r; secp256k1_u128_mul(&r, a, b); @@ -68,6 +42,28 @@ static FIAT_SECP256K1_DETTMAN_FIAT_INLINE secp256k1_uint128 u128_add_u64_u128(ui secp256k1_u128_accum_u64(&r, a); return r; } + +/* Autogenerated: 'src/ExtractionOCaml/dettman_multiplication' --inline --static -use-value-barrier secp256k1_dettman 64 5 48 2 '2^256 - 4294968273' mul square + * curve description: secp256k1_dettman + * machine_wordsize = 64 (from "64") + * requested operations: mul, square + * n = 5 (from "5") + * last_limb_width = 48 (from "48") + * last_reduction = 2 (from "2") + * s-c = 2^256 - [(1, 4294968273)] (from "2^256 - 4294968273") + * inbounds_multiplier: None (from "") + */ + + +FIAT_SECP256K1_DETTMAN_FIAT_EXTENSION typedef secp256k1_uint128 fiat_secp256k1_dettman_uint128; + +#define secp256k1_fe_sqr_inner fiat_secp256k1_dettman_square +#define secp256k1_fe_mul_inner fiat_secp256k1_dettman_mul + +#if (-1 & 3) != 3 +#error "This code only works on a two's complement system" +#endif + /* * The function fiat_secp256k1_dettman_mul multiplies two field elements. *