From ad6262705fd3c92470e5c61f54f9672b689706c3 Mon Sep 17 00:00:00 2001 From: Tim Ruffing Date: Thu, 9 Jan 2020 19:01:33 +0100 Subject: [PATCH] Make 64x64->64 multiplications constant-time with MSVC on 32bit x86 --- src/field_10x26_impl.h | 17 ++++++-------- src/field_5x52_int128_impl.h | 6 ----- src/scalar_8x32_impl.h | 9 ++++++++ src/util.h | 45 ++++++++++++++++++++++++++++++++++++ 4 files changed, 61 insertions(+), 16 deletions(-) diff --git a/src/field_10x26_impl.h b/src/field_10x26_impl.h index 651500ee8e..35844b9da7 100644 --- a/src/field_10x26_impl.h +++ b/src/field_10x26_impl.h @@ -454,18 +454,15 @@ void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a); #else -#ifdef VERIFY -#define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0) -#else -#define VERIFY_BITS(x, n) do { } while(0) -#endif - SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b) { uint64_t c, d; uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8; uint32_t t9, t1, t0, t2, t3, t4, t5, t6, t7; const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL; + VERIFY_BITS(R0, 14); + VERIFY_BITS(R1, 11); + VERIFY_BITS(a[0], 30); VERIFY_BITS(a[1], 30); VERIFY_BITS(a[2], 30); @@ -765,14 +762,14 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - d = c * (R0 >> 4) + t0; + d = MUL_64X64_63(c, R0 >> 4) + t0; VERIFY_BITS(d, 56); /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ r[0] = d & M; d >>= 26; VERIFY_BITS(r[0], 26); VERIFY_BITS(d, 30); /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - d += c * (R1 >> 4) + t1; + d += MUL_64X64_63(c, R1 >> 4) + t1; VERIFY_BITS(d, 53); VERIFY_CHECK(d <= 0x10000003FFFFBFULL); /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ @@ -1039,14 +1036,14 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - d = c * (R0 >> 4) + t0; + d = MUL_64X64_63(c, R0 >> 4) + t0; VERIFY_BITS(d, 56); /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ r[0] = d & M; d >>= 26; VERIFY_BITS(r[0], 26); VERIFY_BITS(d, 30); /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - d += c * (R1 >> 4) + t1; + d += MUL_64X64_63(c, R1 >> 4) + t1; VERIFY_BITS(d, 53); VERIFY_CHECK(d <= 0x10000003FFFFBFULL); /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ diff --git a/src/field_5x52_int128_impl.h b/src/field_5x52_int128_impl.h index bcbfb92ac2..0f780e8bd3 100644 --- a/src/field_5x52_int128_impl.h +++ b/src/field_5x52_int128_impl.h @@ -9,12 +9,6 @@ #include -#ifdef VERIFY -#define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0) -#else -#define VERIFY_BITS(x, n) do { } while(0) -#endif - SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t *a, const uint64_t * SECP256K1_RESTRICT b) { uint128_t c, d; uint64_t t3, t4, tx, u0; diff --git a/src/scalar_8x32_impl.h b/src/scalar_8x32_impl.h index 3c372f34fe..b3b1c662da 100644 --- a/src/scalar_8x32_impl.h +++ b/src/scalar_8x32_impl.h @@ -265,6 +265,9 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) { /** Add a*b to the number defined by (c0,c1,c2). c2 must never overflow. */ #define muladd(a,b) { \ uint32_t tl, th; \ + /* VERIFY that the 64x64->64 mul a*b is in fact a 32x32->64 mul for MSVC, see util.h. */ \ + VERIFY_BITS((uint64_t)a, 32); \ + VERIFY_BITS((uint64_t)b, 32); \ { \ uint64_t t = (uint64_t)a * b; \ th = t >> 32; /* at most 0xFFFFFFFE */ \ @@ -280,6 +283,9 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) { /** Add a*b to the number defined by (c0,c1). c1 must never overflow. */ #define muladd_fast(a,b) { \ uint32_t tl, th; \ + /* VERIFY that the 64x64->64 mul a*b is in fact a 32x32->64 mul for MSVC, see util.h. */ \ + VERIFY_BITS((uint64_t)a, 32); \ + VERIFY_BITS((uint64_t)b, 32); \ { \ uint64_t t = (uint64_t)a * b; \ th = t >> 32; /* at most 0xFFFFFFFE */ \ @@ -294,6 +300,9 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) { /** Add 2*a*b to the number defined by (c0,c1,c2). c2 must never overflow. */ #define muladd2(a,b) { \ uint32_t tl, th, th2, tl2; \ + /* VERIFY that the 64x64->64 mul a*b is in fact a 32x32->64 mul for MSVC, see util.h. */ \ + VERIFY_BITS((uint64_t)a, 32); \ + VERIFY_BITS((uint64_t)b, 32); \ { \ uint64_t t = (uint64_t)a * b; \ th = t >> 32; /* at most 0xFFFFFFFE */ \ diff --git a/src/util.h b/src/util.h index 8289e23e0c..552e2b80a0 100644 --- a/src/util.h +++ b/src/util.h @@ -88,6 +88,12 @@ static SECP256K1_INLINE void secp256k1_callback_call(const secp256k1_callback * #define VG_CHECK_VERIFY(x,y) #endif +#ifdef VERIFY +#define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0) +#else +#define VERIFY_BITS(x, n) do { } while(0) +#endif + static SECP256K1_INLINE void *checked_malloc(const secp256k1_callback* cb, size_t size) { void *ret = malloc(size); if (ret == NULL) { @@ -208,4 +214,43 @@ static SECP256K1_INLINE void secp256k1_int_cmov(int *r, const int *a, int flag) *r = (int)(r_masked | a_masked); } +/* MSVC for x86 32-bit targets implements 64x64->64 bit multiplications using + a non-constant subroutine. The subroutine is not constant-time because it + shortcuts when the high 32 bits of both multiplicands are all 0. + See https://research.kudelskisecurity.com/2017/01/16/when-constant-time-source-may-not-save-you/ + and also https://www.bearssl.org/ctmul.html for more information. + + To work around this we can use the following macro if all we need is a + 64x64->63 multiplication. We set the MSB of the right multiplicand + before the multiplication and clear the MSB of the product. + + For 64-bit integers a, b where a, b, and a*b can be represented in 63 + bits, we have + + (a * (b | 0x8000000000000000)) & 0x7FFFFFFFFFFFFFFF + = (a * (b + 0x8000000000000000)) & 0x7FFFFFFFFFFFFFFF + = ((a * b) + (a * 0x8000000000000000)) & 0x7FFFFFFFFFFFFFFF + = ((a * b) + (a << 63)) & 0x7FFFFFFFFFFFFFFF + = ((a * b) | (a << 63)) & 0x7FFFFFFFFFFFFFFF + = ((a * b) & 0x7FFFFFFFFFFFFFFF) | (a << 63) & 0x7FFFFFFFFFFFFFFF + = (a * b) | 0 + = a * b. + + If a*b can be represented in 63 bits but a or b can't, we necessarily + have a == 0 or b == 0. Then it's easy to check that + + (0 * (b | 0x8000000000000000)) & 0x7FFFFFFFFFFFFFFF = 0, and + (a * (0 | 0x8000000000000000)) & 0x7FFFFFFFFFFFFFFF = 0. + + Thus our macro works for any 64x64->63 multiplication. + + A variant of the macro for 16x16->15 bit multiplications has been + verified for all acceptable inputs by brute-force computation. +*/ +#if defined(_MSC_VER) && defined(_M_IX86) +#define MUL_64X64_63(a, b) ((((a) | UINT64_C(0x8000000000000000)) * (b)) & UINT64_C(0x7FFFFFFFFFFFFFFF)) +#else +#define MUL_64X64_63(a, b) ((a) * (b)) +#endif + #endif /* SECP256K1_UTIL_H */