diff --git a/src/scalar_4x64_impl.h b/src/scalar_4x64_impl.h index ede31a06ff..82cd957f16 100644 --- a/src/scalar_4x64_impl.h +++ b/src/scalar_4x64_impl.h @@ -681,7 +681,7 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l) secp256k1_scalar_reduce(r, c + secp256k1_scalar_check_overflow(r)); } -static void secp256k1_scalar_mul_512(uint64_t l[8], const secp256k1_scalar *a, const secp256k1_scalar *b) { +static void secp256k1_scalar_mul_512(uint64_t *l8, const secp256k1_scalar *a, const secp256k1_scalar *b) { #ifdef USE_ASM_X86_64 const uint64_t *pb = b->d; __asm__ __volatile__( @@ -696,7 +696,7 @@ static void secp256k1_scalar_mul_512(uint64_t l[8], const secp256k1_scalar *a, c /* (rax,rdx) = a0 * b0 */ "movq %%r15, %%rax\n" "mulq %%r11\n" - /* Extract l0 */ + /* Extract l8[0] */ "movq %%rax, 0(%%rsi)\n" /* (r8,r9,r10) = (rdx) */ "movq %%rdx, %%r8\n" @@ -714,7 +714,7 @@ static void secp256k1_scalar_mul_512(uint64_t l[8], const secp256k1_scalar *a, c "addq %%rax, %%r8\n" "adcq %%rdx, %%r9\n" "adcq $0, %%r10\n" - /* Extract l1 */ + /* Extract l8[1] */ "movq %%r8, 8(%%rsi)\n" "xorq %%r8, %%r8\n" /* (r9,r10,r8) += a0 * b2 */ @@ -735,7 +735,7 @@ static void secp256k1_scalar_mul_512(uint64_t l[8], const secp256k1_scalar *a, c "addq %%rax, %%r9\n" "adcq %%rdx, %%r10\n" "adcq $0, %%r8\n" - /* Extract l2 */ + /* Extract l8[2] */ "movq %%r9, 16(%%rsi)\n" "xorq %%r9, %%r9\n" /* (r10,r8,r9) += a0 * b3 */ @@ -764,7 +764,7 @@ static void secp256k1_scalar_mul_512(uint64_t l[8], const secp256k1_scalar *a, c "addq %%rax, %%r10\n" "adcq %%rdx, %%r8\n" "adcq $0, %%r9\n" - /* Extract l3 */ + /* Extract l8[3] */ "movq %%r10, 24(%%rsi)\n" "xorq %%r10, %%r10\n" /* (r8,r9,r10) += a1 * b3 */ @@ -785,7 +785,7 @@ static void secp256k1_scalar_mul_512(uint64_t l[8], const secp256k1_scalar *a, c "addq %%rax, %%r8\n" "adcq %%rdx, %%r9\n" "adcq $0, %%r10\n" - /* Extract l4 */ + /* Extract l8[4] */ "movq %%r8, 32(%%rsi)\n" "xorq %%r8, %%r8\n" /* (r9,r10,r8) += a2 * b3 */ @@ -800,51 +800,54 @@ static void secp256k1_scalar_mul_512(uint64_t l[8], const secp256k1_scalar *a, c "addq %%rax, %%r9\n" "adcq %%rdx, %%r10\n" "adcq $0, %%r8\n" - /* Extract l5 */ + /* Extract l8[5] */ "movq %%r9, 40(%%rsi)\n" /* (r10,r8) += a3 * b3 */ "movq %%r15, %%rax\n" "mulq %%r14\n" "addq %%rax, %%r10\n" "adcq %%rdx, %%r8\n" - /* Extract l6 */ + /* Extract l8[6] */ "movq %%r10, 48(%%rsi)\n" - /* Extract l7 */ + /* Extract l8[7] */ "movq %%r8, 56(%%rsi)\n" : "+d"(pb) - : "S"(l), "D"(a->d) + : "S"(l8), "D"(a->d) : "rax", "rbx", "rcx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "cc", "memory"); + + SECP256K1_CHECKMEM_MSAN_DEFINE(l8, sizeof(*l8) * 8); + #else /* 160 bit accumulator. */ uint64_t c0 = 0, c1 = 0; uint32_t c2 = 0; - /* l[0..7] = a[0..3] * b[0..3]. */ + /* l8[0..7] = a[0..3] * b[0..3]. */ muladd_fast(a->d[0], b->d[0]); - extract_fast(l[0]); + extract_fast(l8[0]); muladd(a->d[0], b->d[1]); muladd(a->d[1], b->d[0]); - extract(l[1]); + extract(l8[1]); muladd(a->d[0], b->d[2]); muladd(a->d[1], b->d[1]); muladd(a->d[2], b->d[0]); - extract(l[2]); + extract(l8[2]); muladd(a->d[0], b->d[3]); muladd(a->d[1], b->d[2]); muladd(a->d[2], b->d[1]); muladd(a->d[3], b->d[0]); - extract(l[3]); + extract(l8[3]); muladd(a->d[1], b->d[3]); muladd(a->d[2], b->d[2]); muladd(a->d[3], b->d[1]); - extract(l[4]); + extract(l8[4]); muladd(a->d[2], b->d[3]); muladd(a->d[3], b->d[2]); - extract(l[5]); + extract(l8[5]); muladd_fast(a->d[3], b->d[3]); - extract_fast(l[6]); + extract_fast(l8[6]); VERIFY_CHECK(c1 == 0); - l[7] = c0; + l8[7] = c0; #endif }