From 16540900bf84a4569aea8388b5f898bc0abdaf4e Mon Sep 17 00:00:00 2001
From: Giacomo Fenzi <giacomofenzi@outlook.com>
Date: Mon, 9 Sep 2024 17:06:18 +0200
Subject: [PATCH] Add support for different strategies for PoW.  (#17)

In some contexts, you want to use different strategies for PoW.
For example, in an EVM context you would like to use Keccak hashes. In a
recursion context, Poseidon, ecc ecc.
This supports it.
---
 src/plugins/{pow.rs => pow/blake3.rs} | 168 +++++++++-----------------
 src/plugins/pow/keccak.rs             |  52 ++++++++
 src/plugins/pow/mod.rs                | 112 +++++++++++++++++
 3 files changed, 220 insertions(+), 112 deletions(-)
 rename src/plugins/{pow.rs => pow/blake3.rs} (68%)
 create mode 100644 src/plugins/pow/keccak.rs
 create mode 100644 src/plugins/pow/mod.rs

diff --git a/src/plugins/pow.rs b/src/plugins/pow/blake3.rs
similarity index 68%
rename from src/plugins/pow.rs
rename to src/plugins/pow/blake3.rs
index 117b1e0..7e2f975 100644
--- a/src/plugins/pow.rs
+++ b/src/plugins/pow/blake3.rs
@@ -1,7 +1,5 @@
-use crate::{
-    Arthur, ByteChallenges, ByteIOPattern, ByteReader, ByteWriter, IOPattern, Merlin, ProofError,
-    ProofResult,
-};
+use super::PowStrategy;
+
 use {
     blake3::{
         guts::BLOCK_LEN,
@@ -14,64 +12,8 @@ use {
 #[cfg(feature = "parallel")]
 use rayon::broadcast;
 
-/// [`IOPattern`] for proof-of-work challenges.
-pub trait PoWIOPattern {
-    /// Adds a [`PoWChal`] to the [`IOPattern`].
-    ///
-    /// In order to squeeze a proof-of-work challenge, we extract a 32-byte challenge using
-    /// the byte interface, and then we find a 16-byte nonce that satisfies the proof-of-work.
-    /// The nonce a 64-bit integer encoded as an unsigned integer and written in big-endian and added
-    /// to the protocol transcript as the nonce for the proof-of-work.
-    ///
-    /// The number of bits used for the proof of work are **not** encoded within the [`IOPattern`].
-    /// It is up to the implementor to change the domain separator or the label in order to reflect changes in the proof
-    /// in order to preserve simulation extractability.
-    fn challenge_pow(self, label: &str) -> Self;
-}
-
-impl PoWIOPattern for IOPattern {
-    fn challenge_pow(self, label: &str) -> Self {
-        // 16 bytes challenge and 16 bytes nonce (that will be written)
-        self.challenge_bytes(32, label).add_bytes(8, "pow-nonce")
-    }
-}
-
-pub trait PoWChallenge {
-    /// Extension trait for generating a proof-of-work challenge.
-    fn challenge_pow(&mut self, bits: f64) -> ProofResult<()>;
-}
-
-impl PoWChallenge for Merlin
-where
-    Merlin: ByteWriter,
-{
-    fn challenge_pow(&mut self, bits: f64) -> ProofResult<()> {
-        let challenge = self.challenge_bytes()?;
-        let nonce = Pow::new(challenge, bits)
-            .solve()
-            .ok_or(ProofError::InvalidProof)?;
-        self.add_bytes(&nonce.to_be_bytes())?;
-        Ok(())
-    }
-}
-
-impl<'a> PoWChallenge for Arthur<'a>
-where
-    Arthur<'a>: ByteReader,
-{
-    fn challenge_pow(&mut self, bits: f64) -> ProofResult<()> {
-        let challenge = self.challenge_bytes()?;
-        let nonce = u64::from_be_bytes(self.next_bytes()?);
-        if Pow::new(challenge, bits).check(nonce) {
-            Ok(())
-        } else {
-            Err(ProofError::InvalidProof)
-        }
-    }
-}
-
 #[derive(Clone, Copy)]
-struct Pow {
+pub struct Blake3PoW {
     challenge: [u8; 32],
     threshold: u64,
     platform: Platform,
@@ -79,18 +21,7 @@ struct Pow {
     outputs: [u8; OUT_LEN * MAX_SIMD_DEGREE],
 }
 
-impl Pow {
-    /// Default Blake3 initialization vector. Copied here because it is not publicly exported.
-    const BLAKE3_IV: [u32; 8] = [
-        0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB,
-        0x5BE0CD19,
-    ];
-    const BLAKE3_FLAGS: u8 = 0x0B; // CHUNK_START | CHUNK_END | ROOT
-
-    /// Creates a new proof-of-work challenge.
-    /// The `challenge` is a 32-byte array that represents the challenge.
-    /// The `bits` is the binary logarithm of the expected amount of work.
-    /// When `bits` is large (i.e. close to 64), a valid solution may not be found.
+impl PowStrategy for Blake3PoW {
     fn new(challenge: [u8; 32], bits: f64) -> Self {
         assert_eq!(BLOCK_LEN, 64);
         assert_eq!(OUT_LEN, 32);
@@ -111,7 +42,6 @@ impl Pow {
         }
     }
 
-    /// Check if the `nonce` satisfies the challenge.
     /// This deliberately uses the high level interface to guarantee
     /// compatibility with standard Blake3.
     fn check(&mut self, nonce: u64) -> bool {
@@ -128,41 +58,6 @@ impl Pow {
         result < self.threshold
     }
 
-    /// Find the minimal nonce that satisfies the challenge (if any) in a
-    /// length `MAX_SIMD_DEGREE` sequence of nonces starting from `nonce`.
-    fn check_many(&mut self, nonce: u64) -> Option<u64> {
-        for (i, input) in self.inputs.chunks_exact_mut(BLOCK_LEN).enumerate() {
-            input[32..40].copy_from_slice(&(nonce + i as u64).to_le_bytes())
-        }
-        // `hash_many` requires an array of references. We need to construct this fresh
-        // each call as we cannot store the references and mutate the array.
-        let inputs: [&[u8; BLOCK_LEN]; MAX_SIMD_DEGREE] = std::array::from_fn(|i| {
-            self.inputs[(i * BLOCK_LEN)..((i + 1) * BLOCK_LEN)]
-                .try_into()
-                .unwrap()
-        });
-        let counter = 0;
-        let flags_start = 0;
-        let flags_end = 0;
-        self.platform.hash_many::<BLOCK_LEN>(
-            &inputs,
-            &Self::BLAKE3_IV,
-            counter,
-            IncrementCounter::No,
-            Self::BLAKE3_FLAGS,
-            flags_start,
-            flags_end,
-            &mut self.outputs,
-        );
-        for (i, input) in self.outputs.chunks_exact_mut(OUT_LEN).enumerate() {
-            let result = u64::from_le_bytes(input[..8].try_into().unwrap());
-            if result < self.threshold {
-                return Some(nonce + i as u64);
-            }
-        }
-        None
-    }
-
     /// Finds the minimal `nonce` that satisfies the challenge.
     #[cfg(not(feature = "parallel"))]
     fn solve(&mut self) -> Option<u64> {
@@ -202,8 +97,57 @@ impl Pow {
     }
 }
 
+impl Blake3PoW {
+    /// Default Blake3 initialization vector. Copied here because it is not publicly exported.
+    const BLAKE3_IV: [u32; 8] = [
+        0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB,
+        0x5BE0CD19,
+    ];
+    const BLAKE3_FLAGS: u8 = 0x0B; // CHUNK_START | CHUNK_END | ROOT
+
+    /// Find the minimal nonce that satisfies the challenge (if any) in a
+    /// length `MAX_SIMD_DEGREE` sequence of nonces starting from `nonce`.
+    fn check_many(&mut self, nonce: u64) -> Option<u64> {
+        for (i, input) in self.inputs.chunks_exact_mut(BLOCK_LEN).enumerate() {
+            input[32..40].copy_from_slice(&(nonce + i as u64).to_le_bytes())
+        }
+        // `hash_many` requires an array of references. We need to construct this fresh
+        // each call as we cannot store the references and mutate the array.
+        let inputs: [&[u8; BLOCK_LEN]; MAX_SIMD_DEGREE] = std::array::from_fn(|i| {
+            self.inputs[(i * BLOCK_LEN)..((i + 1) * BLOCK_LEN)]
+                .try_into()
+                .unwrap()
+        });
+        let counter = 0;
+        let flags_start = 0;
+        let flags_end = 0;
+        self.platform.hash_many::<BLOCK_LEN>(
+            &inputs,
+            &Self::BLAKE3_IV,
+            counter,
+            IncrementCounter::No,
+            Self::BLAKE3_FLAGS,
+            flags_start,
+            flags_end,
+            &mut self.outputs,
+        );
+        for (i, input) in self.outputs.chunks_exact_mut(OUT_LEN).enumerate() {
+            let result = u64::from_le_bytes(input[..8].try_into().unwrap());
+            if result < self.threshold {
+                return Some(nonce + i as u64);
+            }
+        }
+        None
+    }
+}
+
 #[test]
-fn test_pow() {
+fn test_pow_blake3() {
+    use crate::{
+        plugins::pow::{PoWChallenge, PoWIOPattern},
+        ByteIOPattern, ByteReader, ByteWriter, IOPattern,
+    };
+
     const BITS: f64 = 10.0;
 
     let iopattern = IOPattern::new("the proof of work lottery 🎰")
@@ -212,10 +156,10 @@ fn test_pow() {
 
     let mut prover = iopattern.to_merlin();
     prover.add_bytes(b"\0").expect("Invalid IOPattern");
-    prover.challenge_pow(BITS).unwrap();
+    prover.challenge_pow::<Blake3PoW>(BITS).unwrap();
 
     let mut verifier = iopattern.to_arthur(prover.transcript());
     let byte = verifier.next_bytes::<1>().unwrap();
     assert_eq!(&byte, b"\0");
-    verifier.challenge_pow(BITS).unwrap();
+    verifier.challenge_pow::<Blake3PoW>(BITS).unwrap();
 }
diff --git a/src/plugins/pow/keccak.rs b/src/plugins/pow/keccak.rs
new file mode 100644
index 0000000..bab67e2
--- /dev/null
+++ b/src/plugins/pow/keccak.rs
@@ -0,0 +1,52 @@
+use super::PowStrategy;
+
+#[derive(Clone, Copy)]
+pub struct KeccakPoW {
+    challenge: [u64; 4],
+    threshold: u64,
+    state: [u64; 25],
+}
+
+impl PowStrategy for KeccakPoW {
+    fn new(challenge: [u8; 32], bits: f64) -> Self {
+        let threshold = (64.0 - bits).exp2().ceil() as u64;
+        Self {
+            challenge: bytemuck::cast(challenge),
+            threshold,
+            state: [0; 25],
+        }
+    }
+
+    fn check(&mut self, nonce: u64) -> bool {
+        self.state[..4].copy_from_slice(&self.challenge);
+        self.state[4] = nonce;
+        for s in self.state.iter_mut().skip(5) {
+            *s = 0;
+        }
+        keccak::f1600(&mut self.state);
+        self.state[0] < self.threshold
+    }
+}
+
+#[test]
+fn test_pow_keccak() {
+    use crate::{
+        plugins::pow::{PoWChallenge, PoWIOPattern},
+        ByteIOPattern, ByteReader, ByteWriter, IOPattern,
+    };
+
+    const BITS: f64 = 10.0;
+
+    let iopattern = IOPattern::new("the proof of work lottery 🎰")
+        .add_bytes(1, "something")
+        .challenge_pow("rolling dices");
+
+    let mut prover = iopattern.to_merlin();
+    prover.add_bytes(b"\0").expect("Invalid IOPattern");
+    prover.challenge_pow::<KeccakPoW>(BITS).unwrap();
+
+    let mut verifier = iopattern.to_arthur(prover.transcript());
+    let byte = verifier.next_bytes::<1>().unwrap();
+    assert_eq!(&byte, b"\0");
+    verifier.challenge_pow::<KeccakPoW>(BITS).unwrap();
+}
diff --git a/src/plugins/pow/mod.rs b/src/plugins/pow/mod.rs
new file mode 100644
index 0000000..f72d0fe
--- /dev/null
+++ b/src/plugins/pow/mod.rs
@@ -0,0 +1,112 @@
+mod blake3;
+mod keccak;
+
+use crate::{
+    Arthur, ByteChallenges, ByteIOPattern, ByteReader, ByteWriter, IOPattern, Merlin, ProofError,
+    ProofResult,
+};
+/// [`IOPattern`] for proof-of-work challenges.
+pub trait PoWIOPattern {
+    /// Adds a [`PoWChal`] to the [`IOPattern`].
+    ///
+    /// In order to squeeze a proof-of-work challenge, we extract a 32-byte challenge using
+    /// the byte interface, and then we find a 16-byte nonce that satisfies the proof-of-work.
+    /// The nonce a 64-bit integer encoded as an unsigned integer and written in big-endian and added
+    /// to the protocol transcript as the nonce for the proof-of-work.
+    ///
+    /// The number of bits used for the proof of work are **not** encoded within the [`IOPattern`].
+    /// It is up to the implementor to change the domain separator or the label in order to reflect changes in the proof
+    /// in order to preserve simulation extractability.
+    fn challenge_pow(self, label: &str) -> Self;
+}
+
+impl PoWIOPattern for IOPattern {
+    fn challenge_pow(self, label: &str) -> Self {
+        // 16 bytes challenge and 16 bytes nonce (that will be written)
+        self.challenge_bytes(32, label).add_bytes(8, "pow-nonce")
+    }
+}
+
+pub trait PoWChallenge {
+    /// Extension trait for generating a proof-of-work challenge.
+    fn challenge_pow<S: PowStrategy>(&mut self, bits: f64) -> ProofResult<()>;
+}
+
+impl PoWChallenge for Merlin
+where
+    Merlin: ByteWriter,
+{
+    fn challenge_pow<S: PowStrategy>(&mut self, bits: f64) -> ProofResult<()> {
+        let challenge = self.challenge_bytes()?;
+        let nonce = S::new(challenge, bits)
+            .solve()
+            .ok_or(ProofError::InvalidProof)?;
+        self.add_bytes(&nonce.to_be_bytes())?;
+        Ok(())
+    }
+}
+
+impl<'a> PoWChallenge for Arthur<'a>
+where
+    Arthur<'a>: ByteReader,
+{
+    fn challenge_pow<S: PowStrategy>(&mut self, bits: f64) -> ProofResult<()> {
+        let challenge = self.challenge_bytes()?;
+        let nonce = u64::from_be_bytes(self.next_bytes()?);
+        if S::new(challenge, bits).check(nonce) {
+            Ok(())
+        } else {
+            Err(ProofError::InvalidProof)
+        }
+    }
+}
+
+pub trait PowStrategy: Clone + Sync {
+    /// Creates a new proof-of-work challenge.
+    /// The `challenge` is a 32-byte array that represents the challenge.
+    /// The `bits` is the binary logarithm of the expected amount of work.
+    /// When `bits` is large (i.e. close to 64), a valid solution may not be found.
+    fn new(challenge: [u8; 32], bits: f64) -> Self;
+
+    /// Check if the `nonce` satisfies the challenge.
+    fn check(&mut self, nonce: u64) -> bool;
+
+    /// Finds the minimal `nonce` that satisfies the challenge.
+    #[cfg(not(feature = "parallel"))]
+    fn solve(&mut self) -> Option<u64> {
+        // TODO: Parallel default impl
+        (0u64..).find_map(|nonce| if self.check(nonce) { Some(nonce) } else { None })
+    }
+
+    #[cfg(feature = "parallel")]
+    fn solve(&mut self) -> Option<u64> {
+        // Split the work across all available threads.
+        // Use atomics to find the unique deterministic lowest satisfying nonce.
+
+        use std::sync::atomic::{AtomicU64, Ordering};
+
+        use rayon::broadcast;
+        let global_min = AtomicU64::new(u64::MAX);
+        let _ = broadcast(|ctx| {
+            let mut worker = self.clone();
+            let nonces = (ctx.index() as u64..).step_by(ctx.num_threads());
+            for nonce in nonces {
+                // Use relaxed ordering to eventually get notified of another thread's solution.
+                // (Propagation delay should be in the order of tens of nanoseconds.)
+                if nonce >= global_min.load(Ordering::Relaxed) {
+                    break;
+                }
+                if worker.check(nonce) {
+                    // We found a solution, store it in the global_min.
+                    // Use fetch_min to solve race condition with simultaneous solutions.
+                    global_min.fetch_min(nonce, Ordering::SeqCst);
+                    break;
+                }
+            }
+        });
+        match global_min.load(Ordering::SeqCst) {
+            u64::MAX => self.check(u64::MAX).then_some(u64::MAX),
+            nonce => Some(nonce),
+        }
+    }
+}