From 3aa67acb7c4787cd212b03cc44146b177347a578 Mon Sep 17 00:00:00 2001 From: Hang Su Date: Sat, 9 Nov 2024 11:37:31 -0500 Subject: [PATCH 1/6] move out peripheral changes from orion impl branch --- arith/gf2/src/gf2x64.rs | 301 ++++++++++++++++++ arith/gf2/src/gf2x8.rs | 13 +- arith/gf2/src/lib.rs | 3 + arith/gf2/src/tests.rs | 22 +- arith/gf2_128/src/gf2_ext128/avx.rs | 56 +++- arith/gf2_128/src/gf2_ext128/neon.rs | 51 ++- arith/gf2_128/src/gf2_ext128x8/avx256.rs | 14 +- arith/gf2_128/src/gf2_ext128x8/avx512.rs | 16 +- arith/gf2_128/src/gf2_ext128x8/neon.rs | 17 +- arith/gf2_128/src/tests.rs | 3 +- arith/mersenne31/src/m31_ext.rs | 9 + arith/mersenne31/src/m31_ext3x16.rs | 16 +- arith/mersenne31/src/m31x16/m31_avx256.rs | 5 +- arith/mersenne31/src/m31x16/m31_avx512.rs | 5 +- arith/mersenne31/src/m31x16/m31_neon.rs | 5 +- arith/polynomials/src/mle.rs | 8 +- arith/src/bn254.rs | 5 +- arith/src/extension_field.rs | 6 +- arith/src/simd_field.rs | 5 +- config/src/gkr_config.rs | 2 +- transcript/src/fiat_shamir_hash.rs | 6 +- transcript/src/fiat_shamir_hash/keccak_256.rs | 2 +- transcript/src/transcript.rs | 16 +- 23 files changed, 520 insertions(+), 66 deletions(-) create mode 100644 arith/gf2/src/gf2x64.rs diff --git a/arith/gf2/src/gf2x64.rs b/arith/gf2/src/gf2x64.rs new file mode 100644 index 00000000..a03b594c --- /dev/null +++ b/arith/gf2/src/gf2x64.rs @@ -0,0 +1,301 @@ +use std::ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub, SubAssign}; + +use arith::{Field, FieldSerde, FieldSerdeResult, SimdField}; + +use super::GF2; + +#[derive(Debug, Clone, Copy, Default, PartialEq)] +pub struct GF2x64 { + pub v: u64, +} + +impl FieldSerde for GF2x64 { + const SERIALIZED_SIZE: usize = 8; + + #[inline(always)] + fn serialize_into(&self, mut writer: W) -> FieldSerdeResult<()> { + writer.write_all(self.v.to_le_bytes().as_ref())?; + Ok(()) + } + + #[inline(always)] + fn deserialize_from(mut reader: R) -> FieldSerdeResult { + let mut u = [0u8; Self::SERIALIZED_SIZE]; + reader.read_exact(&mut u)?; + Ok(GF2x64 { + v: u64::from_le_bytes(u), + }) + } + + #[inline] + fn try_deserialize_from_ecc_format(_reader: R) -> FieldSerdeResult { + unimplemented!("We don't have serialization in ecc for gf2x64") + } +} + +impl Field for GF2x64 { + const NAME: &'static str = "Galois Field 2 SIMD 64"; + + const SIZE: usize = 8; + + const FIELD_SIZE: usize = 1; + + const ZERO: Self = GF2x64 { v: 0 }; + + const ONE: Self = GF2x64 { v: !0u64 }; + + const INV_2: Self = GF2x64 { v: 0 }; // NOTE: should not be used + + #[inline(always)] + fn zero() -> Self { + GF2x64::ZERO + } + + #[inline(always)] + fn one() -> Self { + GF2x64::ONE + } + + #[inline(always)] + fn is_zero(&self) -> bool { + self.v == 0 + } + + #[inline(always)] + fn random_unsafe(mut rng: impl rand::RngCore) -> Self { + GF2x64 { v: rng.next_u64() } + } + + #[inline(always)] + fn random_bool(mut rng: impl rand::RngCore) -> Self { + GF2x64 { v: rng.next_u64() } + } + + #[inline(always)] + fn exp(&self, exponent: u128) -> Self { + if exponent % 2 == 0 { + Self::one() + } else { + *self + } + } + + #[inline(always)] + fn inv(&self) -> Option { + unimplemented!() + } + + #[inline(always)] + fn as_u32_unchecked(&self) -> u32 { + self.v as u32 + } + + #[inline(always)] + fn from_uniform_bytes(bytes: &[u8; 32]) -> Self { + let mut buf = [0u8; 8]; + buf[..].copy_from_slice(&bytes[..8]); + GF2x64 { + v: u64::from_le_bytes(buf), + } + } + + #[inline(always)] + fn mul_by_5(&self) -> Self { + *self + } + + #[inline(always)] + fn mul_by_6(&self) -> Self { + Self::ZERO + } +} + +impl Mul<&GF2x64> for GF2x64 { + type Output = GF2x64; + + #[inline(always)] + #[allow(clippy::suspicious_arithmetic_impl)] + fn mul(self, rhs: &GF2x64) -> Self::Output { + GF2x64 { v: self.v & rhs.v } + } +} + +impl Mul for GF2x64 { + type Output = GF2x64; + + #[inline(always)] + #[allow(clippy::suspicious_arithmetic_impl)] + fn mul(self, rhs: GF2x64) -> GF2x64 { + GF2x64 { v: self.v & rhs.v } + } +} + +impl MulAssign<&GF2x64> for GF2x64 { + #[inline(always)] + #[allow(clippy::suspicious_op_assign_impl)] + fn mul_assign(&mut self, rhs: &GF2x64) { + self.v &= rhs.v; + } +} + +impl MulAssign for GF2x64 { + #[inline(always)] + #[allow(clippy::suspicious_op_assign_impl)] + fn mul_assign(&mut self, rhs: GF2x64) { + self.v &= rhs.v; + } +} + +impl Sub for GF2x64 { + type Output = GF2x64; + + #[inline(always)] + #[allow(clippy::suspicious_arithmetic_impl)] + fn sub(self, rhs: GF2x64) -> GF2x64 { + GF2x64 { v: self.v ^ rhs.v } + } +} + +impl SubAssign for GF2x64 { + #[inline(always)] + #[allow(clippy::suspicious_op_assign_impl)] + fn sub_assign(&mut self, rhs: GF2x64) { + self.v ^= rhs.v; + } +} + +impl Add for GF2x64 { + type Output = GF2x64; + + #[inline(always)] + #[allow(clippy::suspicious_arithmetic_impl)] + fn add(self, rhs: GF2x64) -> GF2x64 { + GF2x64 { v: self.v ^ rhs.v } + } +} + +impl AddAssign for GF2x64 { + #[inline(always)] + #[allow(clippy::suspicious_op_assign_impl)] + fn add_assign(&mut self, rhs: GF2x64) { + self.v ^= rhs.v; + } +} + +impl Add<&GF2x64> for GF2x64 { + type Output = GF2x64; + + #[inline(always)] + #[allow(clippy::suspicious_arithmetic_impl)] + fn add(self, rhs: &GF2x64) -> GF2x64 { + GF2x64 { v: self.v ^ rhs.v } + } +} + +impl Sub<&GF2x64> for GF2x64 { + type Output = GF2x64; + + #[inline(always)] + #[allow(clippy::suspicious_arithmetic_impl)] + fn sub(self, rhs: &GF2x64) -> GF2x64 { + GF2x64 { v: self.v ^ rhs.v } + } +} + +impl> std::iter::Sum for GF2x64 { + fn sum>(iter: I) -> Self { + iter.fold(Self::zero(), |acc, item| acc + item.borrow()) + } +} + +impl> std::iter::Product for GF2x64 { + fn product>(iter: I) -> Self { + iter.fold(Self::one(), |acc, item| acc * item.borrow()) + } +} + +impl Neg for GF2x64 { + type Output = GF2x64; + + #[inline(always)] + #[allow(clippy::suspicious_arithmetic_impl)] + fn neg(self) -> GF2x64 { + GF2x64 { v: self.v } + } +} + +impl AddAssign<&GF2x64> for GF2x64 { + #[inline(always)] + #[allow(clippy::suspicious_op_assign_impl)] + fn add_assign(&mut self, rhs: &GF2x64) { + self.v ^= rhs.v; + } +} + +impl SubAssign<&GF2x64> for GF2x64 { + #[inline(always)] + #[allow(clippy::suspicious_op_assign_impl)] + fn sub_assign(&mut self, rhs: &GF2x64) { + self.v ^= rhs.v; + } +} + +impl From for GF2x64 { + #[inline(always)] + fn from(v: u32) -> Self { + assert!(v < 2); + if v == 0 { + GF2x64 { v: 0 } + } else { + GF2x64 { v: !0u64 } + } + } +} + +impl From for GF2x64 { + #[inline(always)] + fn from(v: GF2) -> Self { + assert!(v.v < 2); + if v.v == 0 { + GF2x64 { v: 0 } + } else { + GF2x64 { v: !0u64 } + } + } +} + +impl SimdField for GF2x64 { + #[inline(always)] + fn scale(&self, challenge: &Self::Scalar) -> Self { + if challenge.v == 0 { + Self::zero() + } else { + *self + } + } + + #[inline(always)] + fn pack(base_vec: &[Self::Scalar]) -> Self { + assert!(base_vec.len() == Self::PACK_SIZE); + let mut ret = 0u64; + for (i, scalar) in base_vec.iter().enumerate() { + ret |= (scalar.v as u64) << (Self::PACK_SIZE - 1 - i); + } + Self { v: ret } + } + + #[inline(always)] + fn unpack(&self) -> Vec { + let mut ret = vec![]; + for i in 0..Self::PACK_SIZE { + ret.push(Self::Scalar { + v: ((self.v >> (Self::PACK_SIZE - 1 - i)) & 1u64) as u8, + }); + } + ret + } + + type Scalar = crate::GF2; + + const PACK_SIZE: usize = 64; +} diff --git a/arith/gf2/src/gf2x8.rs b/arith/gf2/src/gf2x8.rs index 8bb9e057..56ad8064 100644 --- a/arith/gf2/src/gf2x8.rs +++ b/arith/gf2/src/gf2x8.rs @@ -38,7 +38,7 @@ impl FieldSerde for GF2x8 { impl Field for GF2x8 { // still will pack 8 bits into a u8 - const NAME: &'static str = "Galios Field 2 SIMD"; + const NAME: &'static str = "Galios Field 2 SIMD 8"; const SIZE: usize = 1; @@ -278,14 +278,9 @@ impl SimdField for GF2x8 { } } - #[inline(always)] - fn pack_size() -> usize { - 8 - } - #[inline(always)] fn pack(base_vec: &[Self::Scalar]) -> Self { - assert!(base_vec.len() == Self::pack_size()); + assert!(base_vec.len() == Self::PACK_SIZE); let mut ret = 0u8; for (i, scalar) in base_vec.iter().enumerate() { ret |= scalar.v << (7 - i); @@ -296,7 +291,7 @@ impl SimdField for GF2x8 { #[inline(always)] fn unpack(&self) -> Vec { let mut ret = vec![]; - for i in 0..Self::pack_size() { + for i in 0..Self::PACK_SIZE { ret.push(Self::Scalar { v: (self.v >> (7 - i)) & 1u8, }); @@ -305,4 +300,6 @@ impl SimdField for GF2x8 { } type Scalar = crate::GF2; + + const PACK_SIZE: usize = 8; } diff --git a/arith/gf2/src/lib.rs b/arith/gf2/src/lib.rs index 5a4dcaf0..94edd4a7 100644 --- a/arith/gf2/src/lib.rs +++ b/arith/gf2/src/lib.rs @@ -6,5 +6,8 @@ pub use gf2::GF2; mod gf2x8; pub use gf2x8::GF2x8; +mod gf2x64; +pub use gf2x64::GF2x64; + #[cfg(test)] mod tests; diff --git a/arith/gf2/src/tests.rs b/arith/gf2/src/tests.rs index aec115b3..f3364700 100644 --- a/arith/gf2/src/tests.rs +++ b/arith/gf2/src/tests.rs @@ -1,9 +1,11 @@ use ark_std::test_rng; use std::io::Cursor; -use arith::{random_field_tests, random_inversion_tests, random_simd_field_tests, FieldSerde}; +use arith::{ + random_field_tests, random_inversion_tests, random_simd_field_tests, Field, FieldSerde, +}; -use crate::{GF2x8, GF2}; +use crate::{GF2x64, GF2x8, GF2}; #[test] fn test_field() { @@ -17,16 +19,24 @@ fn test_field() { fn test_simd_field() { random_field_tests::("Vectorized GF2".to_string()); random_simd_field_tests::("Vectorized GF2".to_string()); + + random_field_tests::("Vectorized GF2 len 64".to_string()); + random_simd_field_tests::("Vectorized GF2 len 64".to_string()); } -#[test] -fn test_custom_serde_vectorize_gf2() { - let a = GF2x8::from(0); +fn custom_serde_vectorize_gf2() { + let a = F::from(0); let mut buffer = vec![]; assert!(a.serialize_into(&mut buffer).is_ok()); let mut cursor = Cursor::new(buffer); - let b = GF2x8::deserialize_from(&mut cursor); + let b = F::deserialize_from(&mut cursor); assert!(b.is_ok()); let b = b.unwrap(); assert_eq!(a, b); } + +#[test] +fn test_custom_serde_vectorize_gf2() { + custom_serde_vectorize_gf2::(); + custom_serde_vectorize_gf2::() +} diff --git a/arith/gf2_128/src/gf2_ext128/avx.rs b/arith/gf2_128/src/gf2_ext128/avx.rs index 10089cb2..97403998 100644 --- a/arith/gf2_128/src/gf2_ext128/avx.rs +++ b/arith/gf2_128/src/gf2_ext128/avx.rs @@ -5,9 +5,8 @@ use std::{ ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub, SubAssign}, }; -use arith::{field_common, ExtensionField, Field, FieldSerde, FieldSerdeResult}; - -use gf2::GF2; +use arith::{field_common, ExtensionField, Field, FieldSerde, FieldSerdeResult, SimdField}; +use gf2::{GF2x64, GF2}; #[derive(Debug, Clone, Copy)] pub struct AVXGF2_128 { @@ -21,7 +20,9 @@ impl FieldSerde for AVXGF2_128 { #[inline(always)] fn serialize_into(&self, mut writer: W) -> FieldSerdeResult<()> { - unsafe { writer.write_all(transmute::<__m128i, [u8; 16]>(self.v).as_ref())? }; + unsafe { + writer.write_all(transmute::<__m128i, [u8; Self::SERIALIZED_SIZE]>(self.v).as_ref())? + }; Ok(()) } @@ -208,6 +209,15 @@ impl ExtensionField for AVXGF2_128 { } } +impl Mul for AVXGF2_128 { + type Output = AVXGF2_128; + + #[inline(always)] + fn mul(self, rhs: GF2) -> Self::Output { + self.mul_by_base_field(&rhs) + } +} + impl From for AVXGF2_128 { #[inline(always)] fn from(v: GF2) -> Self { @@ -318,3 +328,41 @@ fn mul_internal(a: &AVXGF2_128, b: &AVXGF2_128) -> AVXGF2_128 { v: unsafe { gfmul(a.v, b.v) }, } } + +impl SimdField for AVXGF2_128 { + type Scalar = GF2; + + const PACK_SIZE: usize = 128; + + #[inline(always)] + fn scale(&self, challenge: &Self::Scalar) -> Self { + if challenge.v == 0 { + Self::ZERO + } else { + *self + } + } + + #[inline(always)] + fn pack(base_vec: &[Self::Scalar]) -> Self { + assert_eq!(base_vec.len(), Self::PACK_SIZE); + let mut packed_to_gf2x64 = [GF2x64::ZERO; Self::PACK_SIZE / GF2x64::PACK_SIZE]; + packed_to_gf2x64 + .iter_mut() + .zip(base_vec.chunks(GF2x64::PACK_SIZE)) + .for_each(|(gf2x64, pack)| *gf2x64 = GF2x64::pack(pack)); + + unsafe { transmute(packed_to_gf2x64) } + } + + #[inline(always)] + fn unpack(&self) -> Vec { + let packed_to_gf2x64: [GF2x64; Self::PACK_SIZE / GF2x64::PACK_SIZE] = + unsafe { transmute(*self) }; + + packed_to_gf2x64 + .iter() + .flat_map(|packed| packed.unpack()) + .collect() + } +} diff --git a/arith/gf2_128/src/gf2_ext128/neon.rs b/arith/gf2_128/src/gf2_ext128/neon.rs index 9e1f97db..792e7ad3 100644 --- a/arith/gf2_128/src/gf2_ext128/neon.rs +++ b/arith/gf2_128/src/gf2_ext128/neon.rs @@ -2,8 +2,8 @@ use std::iter::{Product, Sum}; use std::ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub, SubAssign}; use std::{arch::aarch64::*, mem::transmute}; -use arith::{field_common, ExtensionField, Field, FieldSerde, FieldSerdeResult}; -use gf2::GF2; +use arith::{field_common, ExtensionField, Field, FieldSerde, FieldSerdeResult, SimdField}; +use gf2::{GF2x64, GF2}; #[derive(Clone, Copy, Debug)] pub struct NeonGF2_128 { @@ -203,6 +203,15 @@ impl ExtensionField for NeonGF2_128 { } } +impl Mul for NeonGF2_128 { + type Output = NeonGF2_128; + + #[inline] + fn mul(self, rhs: GF2) -> Self::Output { + self.mul_by_base_field(&rhs) + } +} + impl From for NeonGF2_128 { #[inline(always)] fn from(v: GF2) -> Self { @@ -394,3 +403,41 @@ pub(crate) fn mul_by_x_internal(a: &uint32x4_t) -> uint32x4_t { vreinterpretq_u32_u64(res) } } + +impl SimdField for NeonGF2_128 { + type Scalar = GF2; + + const PACK_SIZE: usize = 128; + + #[inline(always)] + fn scale(&self, challenge: &Self::Scalar) -> Self { + if challenge.v == 0 { + Self::ZERO + } else { + *self + } + } + + #[inline(always)] + fn pack(base_vec: &[Self::Scalar]) -> Self { + assert_eq!(base_vec.len(), Self::PACK_SIZE); + let mut packed_to_gf2x64 = [GF2x64::ZERO; Self::PACK_SIZE / GF2x64::PACK_SIZE]; + packed_to_gf2x64 + .iter_mut() + .zip(base_vec.chunks(GF2x64::PACK_SIZE)) + .for_each(|(gf2x64, pack)| *gf2x64 = GF2x64::pack(pack)); + + unsafe { transmute(packed_to_gf2x64) } + } + + #[inline(always)] + fn unpack(&self) -> Vec { + let packed_to_gf2x64: [GF2x64; Self::PACK_SIZE / GF2x64::PACK_SIZE] = + unsafe { transmute(*self) }; + + packed_to_gf2x64 + .iter() + .flat_map(|packed| packed.unpack()) + .collect() + } +} diff --git a/arith/gf2_128/src/gf2_ext128x8/avx256.rs b/arith/gf2_128/src/gf2_ext128x8/avx256.rs index 9ef2b183..f923fb54 100644 --- a/arith/gf2_128/src/gf2_ext128x8/avx256.rs +++ b/arith/gf2_128/src/gf2_ext128x8/avx256.rs @@ -473,10 +473,7 @@ impl SimdField for AVX256GF2_128x8 { } type Scalar = GF2_128; - #[inline(always)] - fn pack_size() -> usize { - 8 - } + const PACK_SIZE: usize = 8; fn pack(base_vec: &[Self::Scalar]) -> Self { assert!(base_vec.len() == 8); @@ -680,6 +677,15 @@ impl ExtensionField for AVX256GF2_128x8 { } } +impl Mul for AVX256GF2_128x8 { + type Output = AVX256GF2_128x8; + + #[inline] + fn mul(self, rhs: GF2x8) -> Self::Output { + self.mul_by_base_field(&rhs) + } +} + impl From for AVX256GF2_128x8 { #[inline(always)] fn from(v: GF2x8) -> Self { diff --git a/arith/gf2_128/src/gf2_ext128x8/avx512.rs b/arith/gf2_128/src/gf2_ext128x8/avx512.rs index b41f98b6..37f89b2c 100644 --- a/arith/gf2_128/src/gf2_ext128x8/avx512.rs +++ b/arith/gf2_128/src/gf2_ext128x8/avx512.rs @@ -448,14 +448,11 @@ impl SimdField for AVX512GF2_128x8 { } type Scalar = GF2_128; - #[inline(always)] - fn pack_size() -> usize { - 8 - } + const PACK_SIZE: usize = 8; #[inline(always)] fn pack(base_vec: &[Self::Scalar]) -> Self { - assert!(base_vec.len() == 8); + assert_eq!(base_vec.len(), Self::PACK_SIZE); let base_vec_array: [Self::Scalar; 8] = base_vec.try_into().unwrap(); unsafe { transmute(base_vec_array) } } @@ -715,6 +712,15 @@ impl Mul for AVX512GF2_128x8 { } } +impl Mul for AVX512GF2_128x8 { + type Output = AVX512GF2_128x8; + + #[inline(always)] + fn mul(self, rhs: GF2x8) -> Self::Output { + self.mul_by_base_field(&rhs) + } +} + impl Add for AVX512GF2_128x8 { type Output = AVX512GF2_128x8; #[inline(always)] diff --git a/arith/gf2_128/src/gf2_ext128x8/neon.rs b/arith/gf2_128/src/gf2_ext128x8/neon.rs index 04ba6909..cf49c66a 100644 --- a/arith/gf2_128/src/gf2_ext128x8/neon.rs +++ b/arith/gf2_128/src/gf2_ext128x8/neon.rs @@ -75,7 +75,7 @@ impl Field for NeonGF2_128x8 { const SIZE: usize = 16 * 8; - const FIELD_SIZE: usize = 128 * 8; // in bits + const FIELD_SIZE: usize = 128; // in bits const ZERO: Self = NeonGF2_128x8 { v: [unsafe { transmute::<[u32; 4], uint32x4_t>([0, 0, 0, 0]) }; 8], @@ -200,10 +200,8 @@ impl SimdField for NeonGF2_128x8 { ], } } - #[inline(always)] - fn pack_size() -> usize { - 8 - } + + const PACK_SIZE: usize = 8; #[inline(always)] fn pack(base_vec: &[Self::Scalar]) -> Self { @@ -392,6 +390,15 @@ impl From for NeonGF2_128x8 { } } +impl Mul for NeonGF2_128x8 { + type Output = NeonGF2_128x8; + + #[inline] + fn mul(self, rhs: GF2x8) -> Self::Output { + self.mul_by_base_field(&rhs) + } +} + impl Mul for NeonGF2_128x8 { type Output = NeonGF2_128x8; diff --git a/arith/gf2_128/src/tests.rs b/arith/gf2_128/src/tests.rs index 43be77ff..653a7604 100644 --- a/arith/gf2_128/src/tests.rs +++ b/arith/gf2_128/src/tests.rs @@ -13,7 +13,8 @@ use crate::{GF2_128x8, GF2_128}; #[test] fn test_simd_field() { - random_simd_field_tests::("Simd GF2 Ext128".to_string()); + random_simd_field_tests::("Simd for GF2 over GF2Ext128".to_string()); + random_simd_field_tests::("Simd for GF2Ext128 over GF2Ext128x8".to_string()); } #[test] diff --git a/arith/mersenne31/src/m31_ext.rs b/arith/mersenne31/src/m31_ext.rs index 0e95ae5e..15b38397 100644 --- a/arith/mersenne31/src/m31_ext.rs +++ b/arith/mersenne31/src/m31_ext.rs @@ -207,6 +207,15 @@ impl ExtensionField for M31Ext3 { } } +impl Mul for M31Ext3 { + type Output = M31Ext3; + + #[inline(always)] + fn mul(self, rhs: M31) -> Self::Output { + self.mul_by_base_field(&rhs) + } +} + impl Add for M31Ext3 { type Output = M31Ext3; diff --git a/arith/mersenne31/src/m31_ext3x16.rs b/arith/mersenne31/src/m31_ext3x16.rs index 8de30cfc..85bebd4f 100644 --- a/arith/mersenne31/src/m31_ext3x16.rs +++ b/arith/mersenne31/src/m31_ext3x16.rs @@ -57,14 +57,11 @@ impl SimdField for M31Ext3x16 { *self * *challenge } - #[inline(always)] - fn pack_size() -> usize { - M31x16::pack_size() - } + const PACK_SIZE: usize = M31x16::PACK_SIZE; #[inline(always)] fn pack(base_vec: &[Self::Scalar]) -> Self { - assert!(base_vec.len() == Self::pack_size()); + assert!(base_vec.len() == Self::PACK_SIZE); let mut v0s = vec![]; let mut v1s = vec![]; let mut v2s = vec![]; @@ -137,6 +134,15 @@ impl ExtensionField for M31Ext3x16 { } } +impl Mul for M31Ext3x16 { + type Output = M31Ext3x16; + + #[inline] + fn mul(self, rhs: M31x16) -> Self::Output { + self.mul_by_base_field(&rhs) + } +} + impl From for M31Ext3x16 { #[inline(always)] fn from(x: M31Ext3) -> Self { diff --git a/arith/mersenne31/src/m31x16/m31_avx256.rs b/arith/mersenne31/src/m31x16/m31_avx256.rs index 537e3911..a8bfedc5 100644 --- a/arith/mersenne31/src/m31x16/m31_avx256.rs +++ b/arith/mersenne31/src/m31x16/m31_avx256.rs @@ -279,10 +279,7 @@ impl SimdField for AVXM31 { *self * *challenge } - #[inline(always)] - fn pack_size() -> usize { - M31_PACK_SIZE - } + const PACK_SIZE: usize = M31_PACK_SIZE; fn pack(base_vec: &[Self::Scalar]) -> Self { assert_eq!(base_vec.len(), M31_PACK_SIZE); diff --git a/arith/mersenne31/src/m31x16/m31_avx512.rs b/arith/mersenne31/src/m31x16/m31_avx512.rs index bba45482..c2204a23 100644 --- a/arith/mersenne31/src/m31x16/m31_avx512.rs +++ b/arith/mersenne31/src/m31x16/m31_avx512.rs @@ -234,10 +234,7 @@ impl SimdField for AVXM31 { *self * *challenge } - #[inline(always)] - fn pack_size() -> usize { - M31_PACK_SIZE - } + const PACK_SIZE: usize = M31_PACK_SIZE; #[inline(always)] fn pack(base_vec: &[Self::Scalar]) -> Self { diff --git a/arith/mersenne31/src/m31x16/m31_neon.rs b/arith/mersenne31/src/m31x16/m31_neon.rs index 9e0b046e..a61a7841 100644 --- a/arith/mersenne31/src/m31x16/m31_neon.rs +++ b/arith/mersenne31/src/m31x16/m31_neon.rs @@ -301,10 +301,7 @@ impl SimdField for NeonM31 { *self * packed_challenge } - #[inline(always)] - fn pack_size() -> usize { - M31_PACK_SIZE - } + const PACK_SIZE: usize = M31_PACK_SIZE; #[inline(always)] fn pack(base_vec: &[Self::Scalar]) -> Self { diff --git a/arith/polynomials/src/mle.rs b/arith/polynomials/src/mle.rs index 02da940d..e53fcd73 100644 --- a/arith/polynomials/src/mle.rs +++ b/arith/polynomials/src/mle.rs @@ -9,8 +9,14 @@ pub struct MultiLinearPoly { } impl MultiLinearPoly { - /// Sample a random polynomials. + #[inline] + pub fn new(evals: Vec) -> Self { + assert!(evals.len().is_power_of_two()); + Self { coeffs: evals } + } + + /// Sample a random polynomials. #[inline] pub fn random(nv: usize, mut rng: impl RngCore) -> Self { let coeff = (0..1 << nv).map(|_| F::random_unsafe(&mut rng)).collect(); diff --git a/arith/src/bn254.rs b/arith/src/bn254.rs index 3a36a2a3..2a7f8c9b 100644 --- a/arith/src/bn254.rs +++ b/arith/src/bn254.rs @@ -141,10 +141,7 @@ impl SimdField for Fr { vec![*self] } - #[inline(always)] - fn pack_size() -> usize { - 1 - } + const PACK_SIZE: usize = 1; } impl FieldSerde for Fr { diff --git a/arith/src/extension_field.rs b/arith/src/extension_field.rs index 5f9fe869..c1ab27d7 100644 --- a/arith/src/extension_field.rs +++ b/arith/src/extension_field.rs @@ -1,10 +1,14 @@ +use std::ops::Mul; + use crate::{Field, FieldSerde}; /// Configurations for Extension Field over /// - either the Binomial polynomial x^DEGREE - W /// - or the AES polynomial x^128 + x^7 + x^2 + x + 1 // -pub trait ExtensionField: From + Field + FieldSerde { +pub trait ExtensionField: + Mul + From + Field + FieldSerde +{ /// Degree of the Extension const DEGREE: usize; diff --git a/arith/src/simd_field.rs b/arith/src/simd_field.rs index e7042a7d..e1d697d5 100644 --- a/arith/src/simd_field.rs +++ b/arith/src/simd_field.rs @@ -5,6 +5,9 @@ pub trait SimdField: From + Field + FieldSerde { /// Field for the challenge. Can be self. type Scalar: Field + FieldSerde + Send; + /// Pack size (width) for the SIMD instruction + const PACK_SIZE: usize; + /// scale self with the challenge fn scale(&self, challenge: &Self::Scalar) -> Self; @@ -13,6 +16,4 @@ pub trait SimdField: From + Field + FieldSerde { /// unpack into a vector. fn unpack(&self) -> Vec; - - fn pack_size() -> usize; } diff --git a/config/src/gkr_config.rs b/config/src/gkr_config.rs index 6bac5a3e..5930df89 100644 --- a/config/src/gkr_config.rs +++ b/config/src/gkr_config.rs @@ -101,7 +101,7 @@ pub trait GKRConfig: Default + Debug + Clone + Send + Sync + 'static { /// The pack size for the simd circuit field, e.g., 16 for M31x16 fn get_field_pack_size() -> usize { - Self::SimdCircuitField::pack_size() + Self::SimdCircuitField::PACK_SIZE } /// Evaluate the circuit values at the challenge diff --git a/transcript/src/fiat_shamir_hash.rs b/transcript/src/fiat_shamir_hash.rs index 268d1e35..208bdd56 100644 --- a/transcript/src/fiat_shamir_hash.rs +++ b/transcript/src/fiat_shamir_hash.rs @@ -1,3 +1,5 @@ +use std::fmt::Debug; + use arith::{Field, FieldSerde}; pub mod sha2_256; @@ -9,7 +11,7 @@ pub use keccak_256::*; pub mod mimc; pub use mimc::*; -pub trait FiatShamirBytesHash { +pub trait FiatShamirBytesHash: Clone + Debug { /// The size of the hash output in bytes. const DIGEST_SIZE: usize; @@ -23,7 +25,7 @@ pub trait FiatShamirBytesHash { fn hash_inplace(buffer: &mut [u8]); } -pub trait FiatShamirFieldHash { +pub trait FiatShamirFieldHash: Clone + Debug { /// Create a new hash instance. fn new() -> Self; diff --git a/transcript/src/fiat_shamir_hash/keccak_256.rs b/transcript/src/fiat_shamir_hash/keccak_256.rs index 21645eca..6db6bcd0 100644 --- a/transcript/src/fiat_shamir_hash/keccak_256.rs +++ b/transcript/src/fiat_shamir_hash/keccak_256.rs @@ -2,7 +2,7 @@ use tiny_keccak::{Hasher, Sha3}; use super::FiatShamirBytesHash; -#[derive(Clone, Default)] +#[derive(Clone, Default, Debug)] pub struct Keccak256hasher {} impl FiatShamirBytesHash for Keccak256hasher { diff --git a/transcript/src/transcript.rs b/transcript/src/transcript.rs index 05da7d76..221c10a7 100644 --- a/transcript/src/transcript.rs +++ b/transcript/src/transcript.rs @@ -1,4 +1,4 @@ -use std::marker::PhantomData; +use std::{fmt::Debug, marker::PhantomData}; use arith::{Field, FieldSerde}; @@ -7,7 +7,7 @@ use crate::{ Proof, }; -pub trait Transcript { +pub trait Transcript: Clone + Debug { /// Create a new transcript. fn new() -> Self; @@ -24,6 +24,18 @@ pub trait Transcript { /// Use this function when you need some randomness other than the native field fn generate_challenge_u8_slice(&mut self, n_bytes: usize) -> Vec; + /// Generate a list of positions that we want to open the polynomial at. + #[inline] + fn generate_challenge_index_vector(&mut self, num_queries: usize) -> Vec { + let mut challenges = Vec::with_capacity(num_queries); + let mut buf = [0u8; 8]; + for _ in 0..num_queries { + buf.copy_from_slice(self.generate_challenge_u8_slice(8).as_slice()); + challenges.push(usize::from_le_bytes(buf)); + } + challenges + } + /// Generate a challenge vector. #[inline] fn generate_challenge_field_elements(&mut self, n: usize) -> Vec { From 5b5e645909c2619613d7e2ff7d5ed3a27787e425 Mon Sep 17 00:00:00 2001 From: Hang Su Date: Mon, 11 Nov 2024 15:23:04 -0500 Subject: [PATCH 2/6] working on moving simd stuffs from gf2_128 to gf2x128 - avx done, on neon - food first --- arith/gf2/src/gf2x128.rs | 9 + arith/gf2/src/gf2x128/avx.rs | 365 ++++++++++++++++++++++++++++++++++ arith/gf2/src/gf2x128/neon.rs | 44 ++++ arith/gf2/src/lib.rs | 3 + 4 files changed, 421 insertions(+) create mode 100644 arith/gf2/src/gf2x128.rs create mode 100644 arith/gf2/src/gf2x128/avx.rs create mode 100644 arith/gf2/src/gf2x128/neon.rs diff --git a/arith/gf2/src/gf2x128.rs b/arith/gf2/src/gf2x128.rs new file mode 100644 index 00000000..4a150bab --- /dev/null +++ b/arith/gf2/src/gf2x128.rs @@ -0,0 +1,9 @@ +#[cfg(target_arch = "x86_64")] +mod avx; +#[cfg(target_arch = "x86_64")] +pub type GF2x128 = avx::AVXGF2x128; + +#[cfg(target_arch = "aarch64")] +mod neon; +#[cfg(target_arch = "aarch64")] +pub type GF2x128 = neon::NeonGF2x128; diff --git a/arith/gf2/src/gf2x128/avx.rs b/arith/gf2/src/gf2x128/avx.rs new file mode 100644 index 00000000..c3d06ef3 --- /dev/null +++ b/arith/gf2/src/gf2x128/avx.rs @@ -0,0 +1,365 @@ +use std::{ + arch::x86_64::*, + mem::{transmute, zeroed}, + ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub, SubAssign}, +}; + +use arith::{Field, FieldSerde, FieldSerdeResult, SimdField}; + +use crate::{GF2x64, GF2}; + +#[derive(Debug, Clone, Copy)] +pub struct AVXGF2x128 { + pub v: __m128i, +} + +impl FieldSerde for AVXGF2x128 { + const SERIALIZED_SIZE: usize = 16; + + #[inline(always)] + fn serialize_into(&self, mut writer: W) -> FieldSerdeResult<()> { + unsafe { + writer.write_all(transmute::<__m128i, [u8; Self::SERIALIZED_SIZE]>(self.v).as_ref())? + }; + Ok(()) + } + + #[inline(always)] + fn deserialize_from(mut reader: R) -> FieldSerdeResult { + let mut u = [0u8; Self::SERIALIZED_SIZE]; + reader.read_exact(&mut u)?; + unsafe { + Ok(AVXGF2x128 { + v: transmute::<[u8; Self::SERIALIZED_SIZE], __m128i>(u), + }) + } + } + + #[inline(always)] + fn try_deserialize_from_ecc_format(mut reader: R) -> FieldSerdeResult { + let mut u = [0u8; 32]; + reader.read_exact(&mut u)?; + Ok(unsafe { + AVXGF2x128 { + v: transmute::<[u8; 16], __m128i>(u[..16].try_into().unwrap()), + } + }) + } +} + +impl Field for AVXGF2x128 { + const NAME: &'static str = "Galios Field 2 SIMD 128"; + + const SIZE: usize = 16; + + const FIELD_SIZE: usize = 1; // in bits + + const ZERO: Self = AVXGF2x128 { + v: unsafe { zeroed() }, + }; + + const ONE: Self = AVXGF2x128 { + v: unsafe { transmute([!0u64, !0u64]) }, + }; + + const INV_2: Self = AVXGF2x128 { + v: unsafe { zeroed() }, + }; // should not be used + + #[inline(always)] + fn zero() -> Self { + AVXGF2x128 { + v: unsafe { zeroed() }, + } + } + + #[inline(always)] + fn one() -> Self { + AVXGF2x128 { + v: unsafe { transmute([!0u64, !0u64]) }, + } + } + + #[inline(always)] + fn is_zero(&self) -> bool { + unsafe { transmute::<__m128i, [u8; 16]>(self.v) == [0; 16] } + } + + #[inline(always)] + fn random_unsafe(mut rng: impl rand::RngCore) -> Self { + let mut u = [0u8; 16]; + rng.fill_bytes(&mut u); + unsafe { + AVXGF2x128 { + v: *(u.as_ptr() as *const __m128i), + } + } + } + + #[inline(always)] + fn random_bool(mut rng: impl rand::RngCore) -> Self { + let mut u = [0u8; 16]; + rng.fill_bytes(&mut u); + unsafe { + AVXGF2x128 { + v: *(u.as_ptr() as *const __m128i), + } + } + } + + #[inline(always)] + fn exp(&self, exponent: u128) -> Self { + if exponent % 2 == 0 { + AVXGF2x128::ONE + } else { + *self + } + } + + #[inline(always)] + fn inv(&self) -> Option { + unimplemented!() + } + + #[inline(always)] + fn as_u32_unchecked(&self) -> u32 { + unimplemented!("u32 for GF2x128 doesn't make sense") + } + + #[inline(always)] + fn from_uniform_bytes(bytes: &[u8; 32]) -> Self { + unsafe { + AVXGF2x128 { + v: transmute::<[u8; 16], __m128i>(bytes[..16].try_into().unwrap()), + } + } + } + + #[inline(always)] + fn mul_by_5(&self) -> Self { + *self + } + + #[inline(always)] + fn mul_by_6(&self) -> Self { + Self::ZERO + } +} + +impl Default for AVXGF2x128 { + #[inline(always)] + fn default() -> Self { + Self::ZERO + } +} + +impl PartialEq for AVXGF2x128 { + #[inline(always)] + fn eq(&self, other: &Self) -> bool { + unsafe { _mm_test_all_ones(_mm_cmpeq_epi8(self.v, other.v)) == 1 } + } +} + +impl Mul<&AVXGF2x128> for AVXGF2x128 { + type Output = AVXGF2x128; + + #[inline(always)] + #[allow(clippy::suspicious_arithmetic_impl)] + fn mul(self, rhs: &AVXGF2x128) -> AVXGF2x128 { + AVXGF2x128 { + v: unsafe { _mm_and_si128(self.v, rhs.v) }, + } + } +} + +impl Mul for AVXGF2x128 { + type Output = AVXGF2x128; + + #[inline(always)] + #[allow(clippy::suspicious_arithmetic_impl)] + fn mul(self, rhs: AVXGF2x128) -> AVXGF2x128 { + AVXGF2x128 { + v: unsafe { _mm_and_si128(self.v, rhs.v) }, + } + } +} + +impl MulAssign<&AVXGF2x128> for AVXGF2x128 { + #[inline(always)] + #[allow(clippy::suspicious_op_assign_impl)] + fn mul_assign(&mut self, rhs: &AVXGF2x128) { + self.v = unsafe { _mm_and_si128(self.v, rhs.v) }; + } +} + +impl MulAssign for AVXGF2x128 { + #[inline(always)] + #[allow(clippy::suspicious_op_assign_impl)] + fn mul_assign(&mut self, rhs: AVXGF2x128) { + self.v = unsafe { _mm_and_si128(self.v, rhs.v) }; + } +} + +impl Sub for AVXGF2x128 { + type Output = AVXGF2x128; + + #[inline(always)] + #[allow(clippy::suspicious_arithmetic_impl)] + fn sub(self, rhs: AVXGF2x128) -> AVXGF2x128 { + AVXGF2x128 { + v: unsafe { _mm_xor_si128(self.v, rhs.v) }, + } + } +} + +impl SubAssign for AVXGF2x128 { + #[inline(always)] + #[allow(clippy::suspicious_op_assign_impl)] + fn sub_assign(&mut self, rhs: AVXGF2x128) { + self.v = unsafe { _mm_xor_si128(self.v, rhs.v) }; + } +} + +impl Add for AVXGF2x128 { + type Output = AVXGF2x128; + + #[inline(always)] + #[allow(clippy::suspicious_arithmetic_impl)] + fn add(self, rhs: AVXGF2x128) -> AVXGF2x128 { + AVXGF2x128 { + v: unsafe { _mm_xor_si128(self.v, rhs.v) }, + } + } +} + +impl AddAssign for AVXGF2x128 { + #[inline(always)] + #[allow(clippy::suspicious_op_assign_impl)] + fn add_assign(&mut self, rhs: AVXGF2x128) { + self.v = unsafe { _mm_xor_si128(self.v, rhs.v) }; + } +} + +impl Add<&AVXGF2x128> for AVXGF2x128 { + type Output = AVXGF2x128; + + #[inline(always)] + #[allow(clippy::suspicious_arithmetic_impl)] + fn add(self, rhs: &AVXGF2x128) -> AVXGF2x128 { + AVXGF2x128 { + v: unsafe { _mm_xor_si128(self.v, rhs.v) }, + } + } +} + +impl AddAssign<&AVXGF2x128> for AVXGF2x128 { + #[inline(always)] + #[allow(clippy::suspicious_op_assign_impl)] + fn add_assign(&mut self, rhs: &AVXGF2x128) { + self.v = unsafe { _mm_xor_si128(self.v, rhs.v) }; + } +} + +impl Sub<&AVXGF2x128> for AVXGF2x128 { + type Output = AVXGF2x128; + + #[inline(always)] + #[allow(clippy::suspicious_arithmetic_impl)] + fn sub(self, rhs: &AVXGF2x128) -> AVXGF2x128 { + AVXGF2x128 { + v: unsafe { _mm_xor_si128(self.v, rhs.v) }, + } + } +} + +impl SubAssign<&AVXGF2x128> for AVXGF2x128 { + #[inline(always)] + #[allow(clippy::suspicious_op_assign_impl)] + fn sub_assign(&mut self, rhs: &AVXGF2x128) { + self.v = unsafe { _mm_xor_si128(self.v, rhs.v) }; + } +} + +impl> std::iter::Sum for AVXGF2x128 { + fn sum>(iter: I) -> Self { + iter.fold(Self::zero(), |acc, item| acc + item.borrow()) + } +} + +impl> std::iter::Product for AVXGF2x128 { + fn product>(iter: I) -> Self { + iter.fold(Self::one(), |acc, item| acc * item.borrow()) + } +} + +impl Neg for AVXGF2x128 { + type Output = AVXGF2x128; + + #[inline(always)] + #[allow(clippy::suspicious_arithmetic_impl)] + fn neg(self) -> AVXGF2x128 { + AVXGF2x128 { v: self.v } + } +} + +impl From for AVXGF2x128 { + #[inline(always)] + fn from(v: u32) -> Self { + assert!(v < 2); + if v == 0 { + AVXGF2x128::ZERO + } else { + AVXGF2x128::ONE + } + } +} + +impl From for AVXGF2x128 { + #[inline(always)] + fn from(v: GF2) -> Self { + assert!(v.v < 2); + if v.v == 0 { + AVXGF2x128::ZERO + } else { + AVXGF2x128::ONE + } + } +} + +impl SimdField for AVXGF2x128 { + type Scalar = GF2; + + const PACK_SIZE: usize = 128; + + #[inline(always)] + fn scale(&self, challenge: &Self::Scalar) -> Self { + if challenge.v == 0 { + Self::ZERO + } else { + *self + } + } + + #[inline(always)] + fn pack(base_vec: &[Self::Scalar]) -> Self { + assert_eq!(base_vec.len(), Self::PACK_SIZE); + let mut packed_to_gf2x64 = [GF2x64::ZERO; Self::PACK_SIZE / GF2x64::PACK_SIZE]; + packed_to_gf2x64 + .iter_mut() + .zip(base_vec.chunks(GF2x64::PACK_SIZE)) + .for_each(|(gf2x64, pack)| *gf2x64 = GF2x64::pack(pack)); + + unsafe { transmute(packed_to_gf2x64) } + } + + #[inline(always)] + fn unpack(&self) -> Vec { + let packed_to_gf2x64: [GF2x64; Self::PACK_SIZE / GF2x64::PACK_SIZE] = + unsafe { transmute(*self) }; + + packed_to_gf2x64 + .iter() + .flat_map(|packed| packed.unpack()) + .collect() + } +} diff --git a/arith/gf2/src/gf2x128/neon.rs b/arith/gf2/src/gf2x128/neon.rs new file mode 100644 index 00000000..494463ff --- /dev/null +++ b/arith/gf2/src/gf2x128/neon.rs @@ -0,0 +1,44 @@ +#[derive(Clone, Copy, Debug)] +pub struct NeonGF2x128 { + pub(crate) v: uint32x4_t, +} + +impl FieldSerde for NeonGF2_128 { + const SERIALIZED_SIZE: usize = 16; + + #[inline(always)] + fn serialize_into(&self, mut writer: W) -> FieldSerdeResult<()> { + unsafe { writer.write_all(transmute::(self.v).as_ref())? }; + Ok(()) + } + + #[inline(always)] + fn deserialize_from(mut reader: R) -> FieldSerdeResult { + let mut u = [0u8; 16]; + reader.read_exact(&mut u)?; + unsafe { + Ok(NeonGF2_128 { + v: transmute::<[u8; 16], uint32x4_t>(u), + }) + } + } + + #[inline] + fn try_deserialize_from_ecc_format(mut reader: R) -> FieldSerdeResult + where + Self: Sized, + { + let mut u = [0u8; 32]; + reader.read_exact(&mut u)?; + Ok(unsafe { + NeonGF2_128 { + v: transmute::<[u8; 16], uint32x4_t>(u[..16].try_into().unwrap()), + } + }) + } +} +// TODO: FieldSerde + +// TODO: Field + +// TODO: SimdField diff --git a/arith/gf2/src/lib.rs b/arith/gf2/src/lib.rs index 94edd4a7..46de4995 100644 --- a/arith/gf2/src/lib.rs +++ b/arith/gf2/src/lib.rs @@ -9,5 +9,8 @@ pub use gf2x8::GF2x8; mod gf2x64; pub use gf2x64::GF2x64; +// mod gf2x128; +// pub use gf2x128::GF2x128; + #[cfg(test)] mod tests; From 10e2afc6aa01477bb70880b4b5e62db9ed0e6945 Mon Sep 17 00:00:00 2001 From: Hang Su Date: Mon, 11 Nov 2024 15:59:59 -0500 Subject: [PATCH 3/6] neon impl wrap up --- arith/gf2/src/gf2x128/neon.rs | 326 +++++++++++++++++++++++++++++++++- arith/gf2/src/lib.rs | 4 +- 2 files changed, 323 insertions(+), 7 deletions(-) diff --git a/arith/gf2/src/gf2x128/neon.rs b/arith/gf2/src/gf2x128/neon.rs index 494463ff..19403405 100644 --- a/arith/gf2/src/gf2x128/neon.rs +++ b/arith/gf2/src/gf2x128/neon.rs @@ -1,9 +1,19 @@ +use std::{ + arch::aarch64::*, + mem::{transmute, zeroed}, + ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub, SubAssign}, +}; + +use arith::{Field, FieldSerde, FieldSerdeResult, SimdField}; + +use crate::{GF2x64, GF2}; + #[derive(Clone, Copy, Debug)] pub struct NeonGF2x128 { pub(crate) v: uint32x4_t, } -impl FieldSerde for NeonGF2_128 { +impl FieldSerde for NeonGF2x128 { const SERIALIZED_SIZE: usize = 16; #[inline(always)] @@ -17,7 +27,7 @@ impl FieldSerde for NeonGF2_128 { let mut u = [0u8; 16]; reader.read_exact(&mut u)?; unsafe { - Ok(NeonGF2_128 { + Ok(NeonGF2x128 { v: transmute::<[u8; 16], uint32x4_t>(u), }) } @@ -31,14 +41,320 @@ impl FieldSerde for NeonGF2_128 { let mut u = [0u8; 32]; reader.read_exact(&mut u)?; Ok(unsafe { - NeonGF2_128 { + NeonGF2x128 { v: transmute::<[u8; 16], uint32x4_t>(u[..16].try_into().unwrap()), } }) } } -// TODO: FieldSerde -// TODO: Field +impl Field for NeonGF2x128 { + const NAME: &'static str = "Galios Field 2 SIMD 128"; + + const SIZE: usize = 128 / 8; + + const FIELD_SIZE: usize = 128; // in bits + + const ZERO: Self = NeonGF2x128 { + v: unsafe { zeroed() }, + }; + + const ONE: Self = NeonGF2x128 { + v: unsafe { transmute::<[u64; 2], uint32x4_t>([!0u64, !0u64]) }, + }; + + const INV_2: Self = NeonGF2x128 { + v: unsafe { zeroed() }, + }; // should not be used + + #[inline(always)] + fn zero() -> Self { + NeonGF2x128 { + v: unsafe { zeroed() }, + } + } + + #[inline(always)] + fn one() -> Self { + NeonGF2x128 { + v: unsafe { transmute::<[u64; 2], uint32x4_t>([!0u64, !0u64]) }, + } + } + + #[inline(always)] + fn is_zero(&self) -> bool { + unsafe { transmute::(self.v) == [0; 16] } + } + + #[inline(always)] + fn random_unsafe(mut rng: impl rand::RngCore) -> Self { + let mut u = [0u8; 16]; + rng.fill_bytes(&mut u); + unsafe { + NeonGF2x128 { + v: *(u.as_ptr() as *const uint32x4_t), + } + } + } + + #[inline(always)] + fn random_bool(mut rng: impl rand::RngCore) -> Self { + let mut u = [0u8; 16]; + rng.fill_bytes(&mut u); + unsafe { + NeonGF2x128 { + v: *(u.as_ptr() as *const uint32x4_t), + } + } + } + + #[inline(always)] + fn exp(&self, exponent: u128) -> Self { + if exponent % 2 == 0 { + NeonGF2x128::ONE + } else { + *self + } + } + + #[inline(always)] + fn inv(&self) -> Option { + unimplemented!() + } + + #[inline(always)] + fn as_u32_unchecked(&self) -> u32 { + unimplemented!("u32 for GFx128 doesn't make sense") + } + + #[inline(always)] + fn from_uniform_bytes(bytes: &[u8; 32]) -> Self { + unsafe { + NeonGF2x128 { + v: transmute::<[u8; 16], uint32x4_t>(bytes[..16].try_into().unwrap()), + } + } + } +} + +impl Default for NeonGF2x128 { + #[inline(always)] + fn default() -> Self { + Self::ZERO + } +} + +impl PartialEq for NeonGF2x128 { + #[inline(always)] + fn eq(&self, other: &Self) -> bool { + unsafe { + transmute::(self.v) == transmute::(other.v) + } + } +} + +impl Mul<&NeonGF2x128> for NeonGF2x128 { + type Output = NeonGF2x128; + + #[inline(always)] + #[allow(clippy::suspicious_arithmetic_impl)] + fn mul(self, rhs: &NeonGF2x128) -> NeonGF2x128 { + NeonGF2x128 { + v: unsafe { vandq_u32(self.v, rhs.v) }, + } + } +} + +impl Mul for NeonGF2x128 { + type Output = NeonGF2x128; + + #[inline(always)] + #[allow(clippy::suspicious_arithmetic_impl)] + fn mul(self, rhs: NeonGF2x128) -> NeonGF2x128 { + NeonGF2x128 { + v: unsafe { vandq_u32(self.v, rhs.v) }, + } + } +} + +impl MulAssign<&NeonGF2x128> for NeonGF2x128 { + #[inline(always)] + #[allow(clippy::suspicious_op_assign_impl)] + fn mul_assign(&mut self, rhs: &NeonGF2x128) { + self.v = unsafe { vandq_u32(self.v, rhs.v) }; + } +} + +impl MulAssign for NeonGF2x128 { + #[inline(always)] + #[allow(clippy::suspicious_op_assign_impl)] + fn mul_assign(&mut self, rhs: NeonGF2x128) { + self.v = unsafe { vandq_u32(self.v, rhs.v) }; + } +} + +impl Sub for NeonGF2x128 { + type Output = NeonGF2x128; + + #[inline(always)] + #[allow(clippy::suspicious_arithmetic_impl)] + fn sub(self, rhs: NeonGF2x128) -> NeonGF2x128 { + NeonGF2x128 { + v: unsafe { veorq_u32(self.v, rhs.v) }, + } + } +} + +impl SubAssign for NeonGF2x128 { + #[inline(always)] + #[allow(clippy::suspicious_op_assign_impl)] + fn sub_assign(&mut self, rhs: NeonGF2x128) { + self.v = unsafe { veorq_u32(self.v, rhs.v) }; + } +} + +impl Add for NeonGF2x128 { + type Output = NeonGF2x128; + + #[inline(always)] + #[allow(clippy::suspicious_arithmetic_impl)] + fn add(self, rhs: NeonGF2x128) -> NeonGF2x128 { + NeonGF2x128 { + v: unsafe { veorq_u32(self.v, rhs.v) }, + } + } +} + +impl AddAssign for NeonGF2x128 { + #[inline(always)] + #[allow(clippy::suspicious_op_assign_impl)] + fn add_assign(&mut self, rhs: NeonGF2x128) { + self.v = unsafe { veorq_u32(self.v, rhs.v) }; + } +} + +impl Add<&NeonGF2x128> for NeonGF2x128 { + type Output = NeonGF2x128; + + #[inline(always)] + #[allow(clippy::suspicious_arithmetic_impl)] + fn add(self, rhs: &NeonGF2x128) -> NeonGF2x128 { + NeonGF2x128 { + v: unsafe { veorq_u32(self.v, rhs.v) }, + } + } +} + +impl AddAssign<&NeonGF2x128> for NeonGF2x128 { + #[inline(always)] + #[allow(clippy::suspicious_op_assign_impl)] + fn add_assign(&mut self, rhs: &NeonGF2x128) { + self.v = unsafe { veorq_u32(self.v, rhs.v) }; + } +} + +impl Sub<&NeonGF2x128> for NeonGF2x128 { + type Output = NeonGF2x128; + + #[inline(always)] + #[allow(clippy::suspicious_arithmetic_impl)] + fn sub(self, rhs: &NeonGF2x128) -> NeonGF2x128 { + NeonGF2x128 { + v: unsafe { veorq_u32(self.v, rhs.v) }, + } + } +} + +impl SubAssign<&NeonGF2x128> for NeonGF2x128 { + #[inline(always)] + #[allow(clippy::suspicious_op_assign_impl)] + fn sub_assign(&mut self, rhs: &NeonGF2x128) { + self.v = unsafe { veorq_u32(self.v, rhs.v) }; + } +} + +impl> std::iter::Sum for NeonGF2x128 { + fn sum>(iter: I) -> Self { + iter.fold(Self::zero(), |acc, item| acc + item.borrow()) + } +} + +impl> std::iter::Product for NeonGF2x128 { + fn product>(iter: I) -> Self { + iter.fold(Self::one(), |acc, item| acc * item.borrow()) + } +} + +impl Neg for NeonGF2x128 { + type Output = NeonGF2x128; + + #[inline(always)] + #[allow(clippy::suspicious_arithmetic_impl)] + fn neg(self) -> NeonGF2x128 { + NeonGF2x128 { v: self.v } + } +} + +impl From for NeonGF2x128 { + #[inline(always)] + fn from(v: u32) -> Self { + assert!(v < 2); + if v == 0 { + NeonGF2x128::ZERO + } else { + NeonGF2x128::ONE + } + } +} + +impl From for NeonGF2x128 { + #[inline(always)] + fn from(v: GF2) -> Self { + assert!(v.v < 2); + if v.v == 0 { + NeonGF2x128::ZERO + } else { + NeonGF2x128::ONE + } + } +} // TODO: SimdField + +impl SimdField for NeonGF2x128 { + type Scalar = GF2; + + const PACK_SIZE: usize = 128; + + #[inline(always)] + fn scale(&self, challenge: &Self::Scalar) -> Self { + if challenge.v == 0 { + Self::ZERO + } else { + *self + } + } + + #[inline(always)] + fn pack(base_vec: &[Self::Scalar]) -> Self { + assert_eq!(base_vec.len(), Self::PACK_SIZE); + let mut packed_to_gf2x64 = [GF2x64::ZERO; Self::PACK_SIZE / GF2x64::PACK_SIZE]; + packed_to_gf2x64 + .iter_mut() + .zip(base_vec.chunks(GF2x64::PACK_SIZE)) + .for_each(|(gf2x64, pack)| *gf2x64 = GF2x64::pack(pack)); + + unsafe { transmute(packed_to_gf2x64) } + } + + #[inline(always)] + fn unpack(&self) -> Vec { + let packed_to_gf2x64: [GF2x64; Self::PACK_SIZE / GF2x64::PACK_SIZE] = + unsafe { transmute(*self) }; + + packed_to_gf2x64 + .iter() + .flat_map(|packed| packed.unpack()) + .collect() + } +} diff --git a/arith/gf2/src/lib.rs b/arith/gf2/src/lib.rs index 46de4995..9c422663 100644 --- a/arith/gf2/src/lib.rs +++ b/arith/gf2/src/lib.rs @@ -9,8 +9,8 @@ pub use gf2x8::GF2x8; mod gf2x64; pub use gf2x64::GF2x64; -// mod gf2x128; -// pub use gf2x128::GF2x128; +mod gf2x128; +pub use gf2x128::GF2x128; #[cfg(test)] mod tests; From 4fd26abc44b17fa828b34d4bccf3a2eba86029d4 Mon Sep 17 00:00:00 2001 From: Hang Su Date: Mon, 11 Nov 2024 16:04:39 -0500 Subject: [PATCH 4/6] remove simdfield impl in gf0_128 --- arith/gf2/src/gf2x128/avx.rs | 4 +-- arith/gf2/src/gf2x128/neon.rs | 2 -- arith/gf2/src/tests.rs | 8 ++++-- arith/gf2_128/src/gf2_ext128/avx.rs | 42 ++-------------------------- arith/gf2_128/src/gf2_ext128/neon.rs | 42 ++-------------------------- arith/gf2_128/src/tests.rs | 3 +- 6 files changed, 13 insertions(+), 88 deletions(-) diff --git a/arith/gf2/src/gf2x128/avx.rs b/arith/gf2/src/gf2x128/avx.rs index c3d06ef3..566c7532 100644 --- a/arith/gf2/src/gf2x128/avx.rs +++ b/arith/gf2/src/gf2x128/avx.rs @@ -59,7 +59,7 @@ impl Field for AVXGF2x128 { }; const ONE: Self = AVXGF2x128 { - v: unsafe { transmute([!0u64, !0u64]) }, + v: unsafe { transmute::<[u64; 2], __m128i>([!0u64, !0u64]) }, }; const INV_2: Self = AVXGF2x128 { @@ -76,7 +76,7 @@ impl Field for AVXGF2x128 { #[inline(always)] fn one() -> Self { AVXGF2x128 { - v: unsafe { transmute([!0u64, !0u64]) }, + v: unsafe { transmute::<[u64; 2], __m128i>([!0u64, !0u64]) }, } } diff --git a/arith/gf2/src/gf2x128/neon.rs b/arith/gf2/src/gf2x128/neon.rs index 19403405..c3f7b3be 100644 --- a/arith/gf2/src/gf2x128/neon.rs +++ b/arith/gf2/src/gf2x128/neon.rs @@ -319,8 +319,6 @@ impl From for NeonGF2x128 { } } -// TODO: SimdField - impl SimdField for NeonGF2x128 { type Scalar = GF2; diff --git a/arith/gf2/src/tests.rs b/arith/gf2/src/tests.rs index f3364700..b6700b24 100644 --- a/arith/gf2/src/tests.rs +++ b/arith/gf2/src/tests.rs @@ -5,7 +5,7 @@ use arith::{ random_field_tests, random_inversion_tests, random_simd_field_tests, Field, FieldSerde, }; -use crate::{GF2x64, GF2x8, GF2}; +use crate::{GF2x128, GF2x64, GF2x8, GF2}; #[test] fn test_field() { @@ -22,6 +22,9 @@ fn test_simd_field() { random_field_tests::("Vectorized GF2 len 64".to_string()); random_simd_field_tests::("Vectorized GF2 len 64".to_string()); + + random_field_tests::("Vectorized GF2 len 128".to_string()); + random_simd_field_tests::("Vectorized GF2 len 128".to_string()); } fn custom_serde_vectorize_gf2() { @@ -38,5 +41,6 @@ fn custom_serde_vectorize_gf2() { #[test] fn test_custom_serde_vectorize_gf2() { custom_serde_vectorize_gf2::(); - custom_serde_vectorize_gf2::() + custom_serde_vectorize_gf2::(); + custom_serde_vectorize_gf2::() } diff --git a/arith/gf2_128/src/gf2_ext128/avx.rs b/arith/gf2_128/src/gf2_ext128/avx.rs index 97403998..03990aa2 100644 --- a/arith/gf2_128/src/gf2_ext128/avx.rs +++ b/arith/gf2_128/src/gf2_ext128/avx.rs @@ -5,8 +5,8 @@ use std::{ ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub, SubAssign}, }; -use arith::{field_common, ExtensionField, Field, FieldSerde, FieldSerdeResult, SimdField}; -use gf2::{GF2x64, GF2}; +use arith::{field_common, ExtensionField, Field, FieldSerde, FieldSerdeResult}; +use gf2::GF2; #[derive(Debug, Clone, Copy)] pub struct AVXGF2_128 { @@ -328,41 +328,3 @@ fn mul_internal(a: &AVXGF2_128, b: &AVXGF2_128) -> AVXGF2_128 { v: unsafe { gfmul(a.v, b.v) }, } } - -impl SimdField for AVXGF2_128 { - type Scalar = GF2; - - const PACK_SIZE: usize = 128; - - #[inline(always)] - fn scale(&self, challenge: &Self::Scalar) -> Self { - if challenge.v == 0 { - Self::ZERO - } else { - *self - } - } - - #[inline(always)] - fn pack(base_vec: &[Self::Scalar]) -> Self { - assert_eq!(base_vec.len(), Self::PACK_SIZE); - let mut packed_to_gf2x64 = [GF2x64::ZERO; Self::PACK_SIZE / GF2x64::PACK_SIZE]; - packed_to_gf2x64 - .iter_mut() - .zip(base_vec.chunks(GF2x64::PACK_SIZE)) - .for_each(|(gf2x64, pack)| *gf2x64 = GF2x64::pack(pack)); - - unsafe { transmute(packed_to_gf2x64) } - } - - #[inline(always)] - fn unpack(&self) -> Vec { - let packed_to_gf2x64: [GF2x64; Self::PACK_SIZE / GF2x64::PACK_SIZE] = - unsafe { transmute(*self) }; - - packed_to_gf2x64 - .iter() - .flat_map(|packed| packed.unpack()) - .collect() - } -} diff --git a/arith/gf2_128/src/gf2_ext128/neon.rs b/arith/gf2_128/src/gf2_ext128/neon.rs index 792e7ad3..ea1b528b 100644 --- a/arith/gf2_128/src/gf2_ext128/neon.rs +++ b/arith/gf2_128/src/gf2_ext128/neon.rs @@ -2,8 +2,8 @@ use std::iter::{Product, Sum}; use std::ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub, SubAssign}; use std::{arch::aarch64::*, mem::transmute}; -use arith::{field_common, ExtensionField, Field, FieldSerde, FieldSerdeResult, SimdField}; -use gf2::{GF2x64, GF2}; +use arith::{field_common, ExtensionField, Field, FieldSerde, FieldSerdeResult}; +use gf2::GF2; #[derive(Clone, Copy, Debug)] pub struct NeonGF2_128 { @@ -403,41 +403,3 @@ pub(crate) fn mul_by_x_internal(a: &uint32x4_t) -> uint32x4_t { vreinterpretq_u32_u64(res) } } - -impl SimdField for NeonGF2_128 { - type Scalar = GF2; - - const PACK_SIZE: usize = 128; - - #[inline(always)] - fn scale(&self, challenge: &Self::Scalar) -> Self { - if challenge.v == 0 { - Self::ZERO - } else { - *self - } - } - - #[inline(always)] - fn pack(base_vec: &[Self::Scalar]) -> Self { - assert_eq!(base_vec.len(), Self::PACK_SIZE); - let mut packed_to_gf2x64 = [GF2x64::ZERO; Self::PACK_SIZE / GF2x64::PACK_SIZE]; - packed_to_gf2x64 - .iter_mut() - .zip(base_vec.chunks(GF2x64::PACK_SIZE)) - .for_each(|(gf2x64, pack)| *gf2x64 = GF2x64::pack(pack)); - - unsafe { transmute(packed_to_gf2x64) } - } - - #[inline(always)] - fn unpack(&self) -> Vec { - let packed_to_gf2x64: [GF2x64; Self::PACK_SIZE / GF2x64::PACK_SIZE] = - unsafe { transmute(*self) }; - - packed_to_gf2x64 - .iter() - .flat_map(|packed| packed.unpack()) - .collect() - } -} diff --git a/arith/gf2_128/src/tests.rs b/arith/gf2_128/src/tests.rs index 653a7604..43be77ff 100644 --- a/arith/gf2_128/src/tests.rs +++ b/arith/gf2_128/src/tests.rs @@ -13,8 +13,7 @@ use crate::{GF2_128x8, GF2_128}; #[test] fn test_simd_field() { - random_simd_field_tests::("Simd for GF2 over GF2Ext128".to_string()); - random_simd_field_tests::("Simd for GF2Ext128 over GF2Ext128x8".to_string()); + random_simd_field_tests::("Simd GF2 Ext128".to_string()); } #[test] From 33e9d8cc26ea048eb1be0e88ebd67d1345615ad4 Mon Sep 17 00:00:00 2001 From: Hang Su Date: Mon, 11 Nov 2024 16:45:46 -0500 Subject: [PATCH 5/6] minor, cleanup things, exp impl fixing --- arith/gf2/src/gf2x128/avx.rs | 19 ++++--------------- arith/gf2/src/gf2x128/neon.rs | 9 ++++----- arith/gf2/src/gf2x64.rs | 7 +++---- arith/gf2/src/gf2x8.rs | 7 +++---- 4 files changed, 14 insertions(+), 28 deletions(-) diff --git a/arith/gf2/src/gf2x128/avx.rs b/arith/gf2/src/gf2x128/avx.rs index 566c7532..64ad962d 100644 --- a/arith/gf2/src/gf2x128/avx.rs +++ b/arith/gf2/src/gf2x128/avx.rs @@ -50,7 +50,7 @@ impl FieldSerde for AVXGF2x128 { impl Field for AVXGF2x128 { const NAME: &'static str = "Galios Field 2 SIMD 128"; - const SIZE: usize = 16; + const SIZE: usize = 128 / 8; const FIELD_SIZE: usize = 1; // in bits @@ -109,11 +109,10 @@ impl Field for AVXGF2x128 { #[inline(always)] fn exp(&self, exponent: u128) -> Self { - if exponent % 2 == 0 { - AVXGF2x128::ONE - } else { - *self + if exponent == 0 { + return Self::one(); } + *self } #[inline(always)] @@ -134,16 +133,6 @@ impl Field for AVXGF2x128 { } } } - - #[inline(always)] - fn mul_by_5(&self) -> Self { - *self - } - - #[inline(always)] - fn mul_by_6(&self) -> Self { - Self::ZERO - } } impl Default for AVXGF2x128 { diff --git a/arith/gf2/src/gf2x128/neon.rs b/arith/gf2/src/gf2x128/neon.rs index c3f7b3be..7a015410 100644 --- a/arith/gf2/src/gf2x128/neon.rs +++ b/arith/gf2/src/gf2x128/neon.rs @@ -53,7 +53,7 @@ impl Field for NeonGF2x128 { const SIZE: usize = 128 / 8; - const FIELD_SIZE: usize = 128; // in bits + const FIELD_SIZE: usize = 1; // in bits const ZERO: Self = NeonGF2x128 { v: unsafe { zeroed() }, @@ -110,11 +110,10 @@ impl Field for NeonGF2x128 { #[inline(always)] fn exp(&self, exponent: u128) -> Self { - if exponent % 2 == 0 { - NeonGF2x128::ONE - } else { - *self + if exponent == 0 { + return Self::one(); } + *self } #[inline(always)] diff --git a/arith/gf2/src/gf2x64.rs b/arith/gf2/src/gf2x64.rs index a03b594c..681ab92f 100644 --- a/arith/gf2/src/gf2x64.rs +++ b/arith/gf2/src/gf2x64.rs @@ -73,11 +73,10 @@ impl Field for GF2x64 { #[inline(always)] fn exp(&self, exponent: u128) -> Self { - if exponent % 2 == 0 { - Self::one() - } else { - *self + if exponent == 0 { + return Self::one(); } + *self } #[inline(always)] diff --git a/arith/gf2/src/gf2x8.rs b/arith/gf2/src/gf2x8.rs index 56ad8064..6dcda1d3 100644 --- a/arith/gf2/src/gf2x8.rs +++ b/arith/gf2/src/gf2x8.rs @@ -81,11 +81,10 @@ impl Field for GF2x8 { #[inline(always)] fn exp(&self, exponent: u128) -> Self { - if exponent % 2 == 0 { - Self::one() - } else { - *self + if exponent == 0 { + return Self::one(); } + *self } #[inline(always)] From 32895d0040383013734756c86197495b818a785a Mon Sep 17 00:00:00 2001 From: Hang Su Date: Mon, 11 Nov 2024 17:02:00 -0500 Subject: [PATCH 6/6] random message fixing --- gkr/src/main.rs | 2 +- gkr/src/main_mpi.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gkr/src/main.rs b/gkr/src/main.rs index a79f5b28..64a6cc32 100644 --- a/gkr/src/main.rs +++ b/gkr/src/main.rs @@ -168,7 +168,7 @@ fn run_benchmark(args: &Args, config: Config) { }) .collect::>(); - println!("We are now calculating average throughput, please wait for 1 minutes"); + println!("We are now calculating average throughput, please wait for 5 seconds"); for i in 0..args.repeats { thread::sleep(std::time::Duration::from_secs(5)); let stop_time = std::time::Instant::now(); diff --git a/gkr/src/main_mpi.rs b/gkr/src/main_mpi.rs index b0302a2d..57730eb1 100644 --- a/gkr/src/main_mpi.rs +++ b/gkr/src/main_mpi.rs @@ -125,7 +125,7 @@ fn run_benchmark(args: &Args, config: Config) { const N_PROOF: usize = 1000; - println!("We are now calculating average throughput, please wait for 1 minutes"); + println!("We are now calculating average throughput, please wait until {N_PROOF} proofs are computed"); for i in 0..args.repeats { config.mpi_config.barrier(); // wait until everyone is here let start_time = std::time::Instant::now();