plonky3 deps added (#4)

* dependencies added * cargo fmt
pluto · May 1, 2024 · 7a7a0db · 7a7a0db
1 parent 307e995
commit 7a7a0db
Show file tree

Hide file tree

Showing 22 changed files with 2,072 additions and 10 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,11 +1,8 @@
-[package]
-authors    =["Pluto Authors"]
-description="""ronkathon"""
-edition    ="2021"
-license    ="Apache2.0 OR MIT"
-name       ="ronkathon"
-repository ="https://github.com/thor314/ronkathon"
-version    ="0.1.0"
+[workspace]
+resolver = "2"
 
-[dependencies]
-anyhow   ="1.0"
+members = [
+    "ronkathon",
+    "field",
+    "util"
+]
diff --git a/field/Cargo.toml b/field/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "p3-field"
+version = "0.1.0"
+edition = "2021"
+license = "MIT OR Apache-2.0"
+
+[dependencies]
+p3-util = { path = "../util" }
+num-bigint = { version = "0.4.3", default-features = false }
+num-traits = { version = "0.2.18", default-features = false }
+
+itertools = "0.12.0"
+rand = "0.8.5"
+serde = { version = "1.0", default-features = false, features = ["derive"] }
diff --git a/field/src/array.rs b/field/src/array.rs
@@ -0,0 +1,148 @@
+use core::{
+  array,
+  iter::{Product, Sum},
+  ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub, SubAssign},
+};
+
+use crate::{AbstractField, Field};
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub struct FieldArray<F: Field, const N: usize>(pub [F; N]);
+
+impl<F: Field, const N: usize> Default for FieldArray<F, N> {
+  fn default() -> Self { Self::zero() }
+}
+
+impl<F: Field, const N: usize> From<F> for FieldArray<F, N> {
+  fn from(val: F) -> Self { [val; N].into() }
+}
+
+impl<F: Field, const N: usize> From<[F; N]> for FieldArray<F, N> {
+  fn from(arr: [F; N]) -> Self { Self(arr) }
+}
+
+impl<F: Field, const N: usize> AbstractField for FieldArray<F, N> {
+  type F = F;
+
+  fn zero() -> Self { FieldArray([F::zero(); N]) }
+
+  fn one() -> Self { FieldArray([F::one(); N]) }
+
+  fn two() -> Self { FieldArray([F::two(); N]) }
+
+  fn neg_one() -> Self { FieldArray([F::neg_one(); N]) }
+
+  #[inline]
+  fn from_f(f: Self::F) -> Self { f.into() }
+
+  fn from_bool(b: bool) -> Self { [F::from_bool(b); N].into() }
+
+  fn from_canonical_u8(n: u8) -> Self { [F::from_canonical_u8(n); N].into() }
+
+  fn from_canonical_u16(n: u16) -> Self { [F::from_canonical_u16(n); N].into() }
+
+  fn from_canonical_u32(n: u32) -> Self { [F::from_canonical_u32(n); N].into() }
+
+  fn from_canonical_u64(n: u64) -> Self { [F::from_canonical_u64(n); N].into() }
+
+  fn from_canonical_usize(n: usize) -> Self { [F::from_canonical_usize(n); N].into() }
+
+  fn from_wrapped_u32(n: u32) -> Self { [F::from_wrapped_u32(n); N].into() }
+
+  fn from_wrapped_u64(n: u64) -> Self { [F::from_wrapped_u64(n); N].into() }
+
+  fn generator() -> Self { [F::generator(); N].into() }
+}
+
+impl<F: Field, const N: usize> Add for FieldArray<F, N> {
+  type Output = Self;
+
+  #[inline]
+  fn add(self, rhs: Self) -> Self::Output { array::from_fn(|i| self.0[i] + rhs.0[i]).into() }
+}
+
+impl<F: Field, const N: usize> Add<F> for FieldArray<F, N> {
+  type Output = Self;
+
+  #[inline]
+  fn add(self, rhs: F) -> Self::Output { self.0.map(|x| x + rhs).into() }
+}
+
+impl<F: Field, const N: usize> AddAssign for FieldArray<F, N> {
+  #[inline]
+  fn add_assign(&mut self, rhs: Self) { self.0.iter_mut().zip(rhs.0).for_each(|(x, y)| *x += y); }
+}
+
+impl<F: Field, const N: usize> AddAssign<F> for FieldArray<F, N> {
+  #[inline]
+  fn add_assign(&mut self, rhs: F) { self.0.iter_mut().for_each(|x| *x += rhs); }
+}
+
+impl<F: Field, const N: usize> Sub for FieldArray<F, N> {
+  type Output = Self;
+
+  #[inline]
+  fn sub(self, rhs: Self) -> Self::Output { array::from_fn(|i| self.0[i] - rhs.0[i]).into() }
+}
+
+impl<F: Field, const N: usize> Sub<F> for FieldArray<F, N> {
+  type Output = Self;
+
+  #[inline]
+  fn sub(self, rhs: F) -> Self::Output { self.0.map(|x| x - rhs).into() }
+}
+
+impl<F: Field, const N: usize> SubAssign for FieldArray<F, N> {
+  #[inline]
+  fn sub_assign(&mut self, rhs: Self) { self.0.iter_mut().zip(rhs.0).for_each(|(x, y)| *x -= y); }
+}
+
+impl<F: Field, const N: usize> SubAssign<F> for FieldArray<F, N> {
+  #[inline]
+  fn sub_assign(&mut self, rhs: F) { self.0.iter_mut().for_each(|x| *x -= rhs); }
+}
+
+impl<F: Field, const N: usize> Neg for FieldArray<F, N> {
+  type Output = Self;
+
+  #[inline]
+  fn neg(self) -> Self::Output { self.0.map(|x| -x).into() }
+}
+
+impl<F: Field, const N: usize> Mul for FieldArray<F, N> {
+  type Output = Self;
+
+  #[inline]
+  fn mul(self, rhs: Self) -> Self::Output { array::from_fn(|i| self.0[i] * rhs.0[i]).into() }
+}
+
+impl<F: Field, const N: usize> Mul<F> for FieldArray<F, N> {
+  type Output = Self;
+
+  #[inline]
+  fn mul(self, rhs: F) -> Self::Output { self.0.map(|x| x * rhs).into() }
+}
+
+impl<F: Field, const N: usize> MulAssign for FieldArray<F, N> {
+  #[inline]
+  fn mul_assign(&mut self, rhs: Self) { self.0.iter_mut().zip(rhs.0).for_each(|(x, y)| *x *= y); }
+}
+
+impl<F: Field, const N: usize> MulAssign<F> for FieldArray<F, N> {
+  #[inline]
+  fn mul_assign(&mut self, rhs: F) { self.0.iter_mut().for_each(|x| *x *= rhs); }
+}
+
+impl<F: Field, const N: usize> Sum for FieldArray<F, N> {
+  #[inline]
+  fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
+    iter.reduce(|lhs, rhs| lhs + rhs).unwrap_or(Self::zero())
+  }
+}
+
+impl<F: Field, const N: usize> Product for FieldArray<F, N> {
+  #[inline]
+  fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
+    iter.reduce(|lhs, rhs| lhs * rhs).unwrap_or(Self::one())
+  }
+}
diff --git a/field/src/batch_inverse.rs b/field/src/batch_inverse.rs
@@ -0,0 +1,93 @@
+use alloc::{vec, vec::Vec};
+
+use crate::field::Field;
+
+/// Batch multiplicative inverses with Montgomery's trick
+/// This is Montgomery's trick. At a high level, we invert the product of the given field
+/// elements, then derive the individual inverses from that via multiplication.
+///
+/// The usual Montgomery trick involves calculating an array of cumulative products,
+/// resulting in a long dependency chain. To increase instruction-level parallelism, we
+/// compute WIDTH separate cumulative product arrays that only meet at the end.
+///
+/// # Panics
+/// Might panic if asserts or unwraps uncover a bug.
+pub fn batch_multiplicative_inverse<F: Field>(x: &[F]) -> Vec<F> {
+  // Higher WIDTH increases instruction-level parallelism, but too high a value will cause us
+  // to run out of registers.
+  const WIDTH: usize = 4;
+  // JN note: WIDTH is 4. The code is specialized to this value and will need
+  // modification if it is changed. I tried to make it more generic, but Rust's const
+  // generics are not yet good enough.
+
+  // Handle special cases. Paradoxically, below is repetitive but concise.
+  // The branches should be very predictable.
+  let n = x.len();
+  if n == 0 {
+    return Vec::new();
+  } else if n == 1 {
+    return vec![x[0].inverse()];
+  } else if n == 2 {
+    let x01 = x[0] * x[1];
+    let x01inv = x01.inverse();
+    return vec![x01inv * x[1], x01inv * x[0]];
+  } else if n == 3 {
+    let x01 = x[0] * x[1];
+    let x012 = x01 * x[2];
+    let x012inv = x012.inverse();
+    let x01inv = x012inv * x[2];
+    return vec![x01inv * x[1], x01inv * x[0], x012inv * x01];
+  }
+  debug_assert!(n >= WIDTH);
+
+  // Buf is reused for a few things to save allocations.
+  // Fill buf with cumulative product of x, only taking every 4th value. Concretely, buf will
+  // be [
+  //   x[0], x[1], x[2], x[3],
+  //   x[0] * x[4], x[1] * x[5], x[2] * x[6], x[3] * x[7],
+  //   x[0] * x[4] * x[8], x[1] * x[5] * x[9], x[2] * x[6] * x[10], x[3] * x[7] * x[11],
+  //   ...
+  // ].
+  // If n is not a multiple of WIDTH, the result is truncated from the end. For example,
+  // for n == 5, we get [x[0], x[1], x[2], x[3], x[0] * x[4]].
+  let mut buf: Vec<F> = Vec::with_capacity(n);
+  // cumul_prod holds the last WIDTH elements of buf. This is redundant, but it's how we
+  // convince LLVM to keep the values in the registers.
+  let mut cumul_prod: [F; WIDTH] = x[..WIDTH].try_into().unwrap();
+  buf.extend(cumul_prod);
+  for (i, &xi) in x[WIDTH..].iter().enumerate() {
+    cumul_prod[i % WIDTH] *= xi;
+    buf.push(cumul_prod[i % WIDTH]);
+  }
+  debug_assert_eq!(buf.len(), n);
+
+  let mut a_inv = {
+    // This is where the four dependency chains meet.
+    // Take the last four elements of buf and invert them all.
+    let c01 = cumul_prod[0] * cumul_prod[1];
+    let c23 = cumul_prod[2] * cumul_prod[3];
+    let c0123 = c01 * c23;
+    let c0123inv = c0123.inverse();
+    let c01inv = c0123inv * c23;
+    let c23inv = c0123inv * c01;
+    [c01inv * cumul_prod[1], c01inv * cumul_prod[0], c23inv * cumul_prod[3], c23inv * cumul_prod[2]]
+  };
+
+  for i in (WIDTH..n).rev() {
+    // buf[i - WIDTH] has not been written to by this loop, so it equals
+    // x[i % WIDTH] * x[i % WIDTH + WIDTH] * ... * x[i - WIDTH].
+    buf[i] = buf[i - WIDTH] * a_inv[i % WIDTH];
+    // buf[i] now holds the inverse of x[i].
+    a_inv[i % WIDTH] *= x[i];
+  }
+  for i in (0..WIDTH).rev() {
+    buf[i] = a_inv[i];
+  }
+
+  for (&bi, &xi) in buf.iter().zip(x) {
+    // Sanity check only.
+    debug_assert_eq!(bi * xi, F::one());
+  }
+
+  buf
+}
diff --git a/field/src/exponentiation.rs b/field/src/exponentiation.rs
@@ -0,0 +1,122 @@
+use crate::AbstractField;
+
+pub fn exp_u64_by_squaring<AF: AbstractField>(val: AF, power: u64) -> AF {
+  let mut current = val;
+  let mut product = AF::one();
+
+  for j in 0..bits_u64(power) {
+    if (power >> j & 1) != 0 {
+      product *= current.clone();
+    }
+    current = current.square();
+  }
+  product
+}
+
+const fn bits_u64(n: u64) -> usize { (64 - n.leading_zeros()) as usize }
+
+pub fn exp_1717986917<AF: AbstractField>(val: AF) -> AF {
+  // Note that 5 * 1717986917 = 4*(2^31 - 2) + 1 = 1 mod p - 1.
+  // Thus as a^{p - 1} = 1 for all a \in F_p, (a^{1717986917})^5 = a.
+  // Note the binary expansion: 1717986917 = 1100110011001100110011001100101_2
+  // This uses 30 Squares + 7 Multiplications => 37 Operations total.
+  // Suspect it's possible to improve this with enough effort. For example 1717986918 takes only 4
+  // Multiplications.
+  let p1 = val;
+  let p10 = p1.square();
+  let p11 = p10.clone() * p1;
+  let p101 = p10 * p11.clone();
+  let p110000 = p11.exp_power_of_2(4);
+  let p110011 = p110000 * p11.clone();
+  let p11001100000000 = p110011.exp_power_of_2(8);
+  let p11001100110011 = p11001100000000.clone() * p110011;
+  let p1100110000000000000000 = p11001100000000.exp_power_of_2(8);
+  let p1100110011001100110011 = p1100110000000000000000 * p11001100110011;
+  let p11001100110011001100110000 = p1100110011001100110011.exp_power_of_2(4);
+  let p11001100110011001100110011 = p11001100110011001100110000 * p11;
+  let p1100110011001100110011001100000 = p11001100110011001100110011.exp_power_of_2(5);
+  p1100110011001100110011001100000 * p101
+}
+
+pub fn exp_1420470955<AF: AbstractField>(val: AF) -> AF {
+  // Note that 3 * 1420470955 = 2*(2^31 - 2^24) + 1 = 1 mod (p - 1).
+  // Thus as a^{p - 1} = 1 for all a \in F_p, (a^{1420470955})^3 = a.
+  // Note the binary expansion: 1420470955 = 1010100101010101010101010101011_2
+  // This uses 29 Squares + 7 Multiplications => 36 Operations total.
+  // Suspect it's possible to improve this with enough effort.
+  let p1 = val;
+  let p100 = p1.exp_power_of_2(2);
+  let p101 = p100.clone() * p1.clone();
+  let p10000 = p100.exp_power_of_2(2);
+  let p10101 = p10000 * p101;
+  let p10101000000 = p10101.clone().exp_power_of_2(6);
+  let p10101010101 = p10101000000.clone() * p10101.clone();
+  let p101010010101 = p10101000000 * p10101010101.clone();
+  let p101010010101000000000000 = p101010010101.exp_power_of_2(12);
+  let p101010010101010101010101 = p101010010101000000000000 * p10101010101;
+  let p101010010101010101010101000000 = p101010010101010101010101.exp_power_of_2(6);
+  let p101010010101010101010101010101 = p101010010101010101010101000000 * p10101;
+  let p1010100101010101010101010101010 = p101010010101010101010101010101.square();
+  p1010100101010101010101010101010 * p1.clone()
+}
+
+pub fn exp_1725656503<AF: AbstractField>(val: AF) -> AF {
+  // Note that 7 * 1725656503 = 6*(2^31 - 2^27) + 1 = 1 mod (p - 1).
+  // Thus as a^{p - 1} = 1 for all a \in F_p, (a^{1725656503})^7 = a.
+  // Note the binary expansion: 1725656503 = 1100110110110110110110110110111_2
+  // This uses 29 Squares + 8 Multiplications => 37 Operations total.
+  // Suspect it's possible to improve this with enough effort.
+  let p1 = val;
+  let p10 = p1.square();
+  let p11 = p10 * p1.clone();
+  let p110 = p11.square();
+  let p111 = p110.clone() * p1;
+  let p11000 = p110.exp_power_of_2(2);
+  let p11011 = p11000.clone() * p11;
+  let p11000000 = p11000.exp_power_of_2(3);
+  let p11011011 = p11000000.clone() * p11011;
+  let p110011011 = p11011011.clone() * p11000000;
+  let p110011011000000000 = p110011011.exp_power_of_2(9);
+  let p110011011011011011 = p110011011000000000 * p11011011.clone();
+  let p110011011011011011000000000 = p110011011011011011.exp_power_of_2(9);
+  let p110011011011011011011011011 = p110011011011011011000000000 * p11011011;
+  let p1100110110110110110110110110000 = p110011011011011011011011011.exp_power_of_2(4);
+  p1100110110110110110110110110000 * p111
+}
+
+pub fn exp_10540996611094048183<AF: AbstractField>(val: AF) -> AF {
+  // Note that 7*10540996611094048183 = 4*(2^64 - 2**32) + 1 = 1 mod (p - 1).
+  // Thus as a^{p - 1} = 1 for all a \in F_p, (a^{10540996611094048183})^7 = a.
+  // Also: 10540996611094048183 =
+  // 1001001001001001001001001001000110110110110110110110110110110111_2. This uses 63 Squares + 8
+  // Multiplications => 71 Operations total. Suspect it's possible to improve this a little with
+  // enough effort.
+  let p1 = val;
+  let p10 = p1.square();
+  let p11 = p10.clone() * p1.clone();
+  let p100 = p10.square();
+  let p111 = p100.clone() * p11.clone();
+  let p100000000000000000000000000000000 = p100.exp_power_of_2(30);
+  let p100000000000000000000000000000011 = p100000000000000000000000000000000 * p11;
+  let p100000000000000000000000000000011000 = p100000000000000000000000000000011.exp_power_of_2(3);
+  let p100100000000000000000000000000011011 =
+    p100000000000000000000000000000011000 * p100000000000000000000000000000011;
+  let p100100000000000000000000000000011011000000 =
+    p100100000000000000000000000000011011.exp_power_of_2(6);
+  let p100100100100000000000000000000011011011011 =
+    p100100000000000000000000000000011011000000 * p100100000000000000000000000000011011.clone();
+  let p100100100100000000000000000000011011011011000000000000 =
+    p100100100100000000000000000000011011011011.exp_power_of_2(12);
+  let p100100100100100100100100000000011011011011011011011011 =
+    p100100100100000000000000000000011011011011000000000000
+      * p100100100100000000000000000000011011011011;
+  let p100100100100100100100100000000011011011011011011011011000000 =
+    p100100100100100100100100000000011011011011011011011011.exp_power_of_2(6);
+  let p100100100100100100100100100100011011011011011011011011011011 =
+    p100100100100100100100100000000011011011011011011011011000000
+      * p100100000000000000000000000000011011;
+  let p1001001001001001001001001001000110110110110110110110110110110000 =
+    p100100100100100100100100100100011011011011011011011011011011.exp_power_of_2(4);
+
+  p1001001001001001001001001001000110110110110110110110110110110000 * p111
+}