EspressoSystems · alxiong · Jun 14, 2023 · Jun 15, 2023 · Jun 15, 2023 · Jun 15, 2023
@@ -7,6 +7,8 @@ Cargo.lock
 *.org
 .pre-commit-config.yaml
 .vscode
+**/*flamegraph*
+**/*.coz
 
 # Test coverage (grcov)
 default.profraw

@@ -15,3 +15,8 @@ rust-version = "1.64.0"
 homepage = "https://github.com/EspressoSystems/jellyfish"
 documentation = "https://jellyfish.docs.espressosys.com"
 repository = "https://github.com/EspressoSystems/jellyfish"
+
+# optional, for coz profiler.
+# see <https://github.com/plasma-umass/coz/tree/master/rust>
+[profile.release]
+debug = 1
@@ -137,10 +137,35 @@ where N is the number of threads you want to use (N = 1 for single-thread).
 
 A sample benchmark result is available under [`bench.md`](./bench.md).
 
-## Git Hooks
+### Profiling
 
-The pre-commit hooks are installed via the nix shell. To run them on all files use
+You could use `cargo flamegraph` (already installed in the nix-shell) as follows (more [documentations here](https://github.com/flamegraph-rs/flamegraph#examples)):
 
+``` bash
+# --root is necessary for Mac users
+cargo flamegraph --root --bench=plonk-benches --features test-srs
+
+# output to a specific file, targeting wasm
+cargo flamegraph --root -o path/to/wasm-flamegraph.svg --bench=plonk-benches --no-default-features --features test-srs
+
+# profile a specific test
+cargo flamegraph --root --unit-test -p jf-primitives -- pcs::univariate_kzg::tests::end_to_end_test
 ```
-pre-commit run --all-files
+
+You can also perform _causal profiling_ using [coz](https://github.com/plasma-umass/coz) only on Linux systems.
+
+``` bash
+# build the bench or example or binary that you want to profile
+cargo build --bench reed-solomon-coz --features profiling --release
+
+# you can find the binary inside ./target/<mode>/deps/<name>-<hash>
+coz run --- ./target/release/deps/reed_solomon_coz-db5107103a0e378c
+
+# plot your result
+coz plot
+
+# alternatively, view your profile.coz on https://plasma-umass.org/coz/
 ```
+
+As an example, you can view `./primitives/src/reed_solomon_code/mod.rs::read_solomon_erasure_decode()` for some sample usages of `coz` annotation for latency profiling; view `./primitives/benches/reed_solomon_coz.rs` for the benchmark code.
+You could also conduct throughput profiling, read more [here](https://github.com/plasma-umass/coz/tree/master/rust).
@@ -7,6 +7,13 @@
 {
   description = "Jellyfish dev env";
 
+  nixConfig = {
+    extra-substituters = [ "https://espresso-systems-private.cachix.org" ];
+    extra-trusted-public-keys = [
+      "espresso-systems-private.cachix.org-1:LHYk03zKQCeZ4dvg3NctyCq88e44oBZVug5LpYKjPRI="
+    ];
+  };
+
   inputs.nixpkgs.url = "github:nixos/nixpkgs/nixos-unstable";
   inputs.flake-utils.url = "github:numtide/flake-utils"; # for dedup
 
@@ -18,22 +25,20 @@
   inputs.pre-commit-hooks.url = "github:cachix/pre-commit-hooks.nix";
   inputs.pre-commit-hooks.inputs.nixpkgs.follows = "nixpkgs";
 
-  outputs = { self, nixpkgs, flake-utils, flake-compat, rust-overlay, pre-commit-hooks, ... }:
+  outputs = { self, nixpkgs, flake-utils, flake-compat, rust-overlay
+    , pre-commit-hooks, ... }:
     flake-utils.lib.eachDefaultSystem (system:
       let
-        overlays = [ 
-          (import rust-overlay)
-        ];
+        overlays = [ (import rust-overlay) ];
         pkgs = import nixpkgs { inherit system overlays; };
-        nightlyToolchain = pkgs.rust-bin.selectLatestNightlyWith
-          (toolchain: toolchain.minimal.override { extensions = [ "rustfmt" ]; });
+        nightlyToolchain = pkgs.rust-bin.selectLatestNightlyWith (toolchain:
+          toolchain.minimal.override { extensions = [ "rustfmt" ]; });
 
         stableToolchain = pkgs.rust-bin.stable.latest.minimal.override {
           extensions = [ "clippy" "llvm-tools-preview" "rust-src" ];
-          targets = ["wasm32-unknown-unknown"];
+          targets = [ "wasm32-unknown-unknown" ];
         };
-      in with pkgs;
-      {
+      in with pkgs; {
         check = {
           pre-commit-check = pre-commit-hooks.lib.${system}.run {
             src = ./.;
@@ -76,10 +81,13 @@
             stableToolchain
             nightlyToolchain
             cargo-sort
+            cargo-flamegraph
             clang-tools_15
             clangStdenv
             llvm_15
-          ] ++ lib.optionals stdenv.isDarwin [ darwin.apple_sdk.frameworks.Security ];
+          ] ++ lib.optionals stdenv.isDarwin
+            [ darwin.apple_sdk.frameworks.Security ]
+            ++ lib.optionals stdenv.isLinux [ coz ];
 
           shellHook = ''
             export RUST_BACKTRACE=full
@@ -98,9 +106,8 @@
             # by default choose u64_backend
             export RUSTFLAGS='--cfg curve25519_dalek_backend="u64"'
           ''
-          # install pre-commit hooks
-          + self.check.${system}.pre-commit-check.shellHook;
+            # install pre-commit hooks
+            + self.check.${system}.pre-commit-check.shellHook;
         };
-      }
-    );
+      });
 }
@@ -8,6 +8,7 @@
 //     RAYON_NUM_THREADS=N cargo bench
 // where N is the number of threads you want to use (N = 1 for single-thread).
 
+#![cfg(feature = "test-srs")]
 use ark_bls12_377::{Bls12_377, Fr as Fr377};
 use ark_bls12_381::{Bls12_381, Fr as Fr381};
 use ark_bn254::{Bn254, Fr as Fr254};
@@ -51,7 +52,8 @@ macro_rules! plonk_prove_bench {
         let cs = gen_circuit_for_bench::<$bench_field>($num_gates, $bench_plonk_type).unwrap();
 
         let max_degree = $num_gates + 2;
-        let srs = PlonkKzgSnark::<$bench_curve>::universal_setup(max_degree, rng).unwrap();
+        let srs =
+            PlonkKzgSnark::<$bench_curve>::universal_setup_for_testing(max_degree, rng).unwrap();
 
         let (pk, _) = PlonkKzgSnark::<$bench_curve>::preprocess(&srs, &cs).unwrap();
 
@@ -90,7 +92,8 @@ macro_rules! plonk_verify_bench {
         let cs = gen_circuit_for_bench::<$bench_field>($num_gates, $bench_plonk_type).unwrap();
 
         let max_degree = $num_gates + 2;
-        let srs = PlonkKzgSnark::<$bench_curve>::universal_setup(max_degree, rng).unwrap();
+        let srs =
+            PlonkKzgSnark::<$bench_curve>::universal_setup_for_testing(max_degree, rng).unwrap();
 
         let (pk, vk) = PlonkKzgSnark::<$bench_curve>::preprocess(&srs, &cs).unwrap();
 
@@ -132,7 +135,8 @@ macro_rules! plonk_batch_verify_bench {
         let cs = gen_circuit_for_bench::<$bench_field>(1024, $bench_plonk_type).unwrap();
 
         let max_degree = 1026;
-        let srs = PlonkKzgSnark::<$bench_curve>::universal_setup(max_degree, rng).unwrap();
+        let srs =
+            PlonkKzgSnark::<$bench_curve>::universal_setup_for_testing(max_degree, rng).unwrap();
 
         let (pk, vk) = PlonkKzgSnark::<$bench_curve>::preprocess(&srs, &cs).unwrap();
 

@@ -28,6 +28,7 @@ chacha20poly1305 = { version = "0.10.1", default-features = false, features = [
         "alloc",
         "rand_core",
 ] }
+coz = { version = "0.1", optional = true }
 crypto_kx = { version = "=0.2.0-pre.0", features = ["serde"] }
 derivative = { version = "2", features = ["use_core"] }
 digest = { version = "0.10.1", default-features = false, features = ["alloc"] }
@@ -63,6 +64,7 @@ ark-ed-on-bls12-377 = "0.4.0"
 ark-ed-on-bls12-381-bandersnatch = "0.4.0"
 ark-ed-on-bn254 = "0.4.0"
 bincode = "1.3"
+coz = "0.1"
 criterion = "0.4.0"
 sha2 = { version = "0.10.1" }
 
@@ -79,6 +81,11 @@ name = "reed-solomon"
 path = "benches/reed_solomon.rs"
 harness = false
 
+[[bench]]
+name = "reed-solomon-coz"
+path = "benches/reed_solomon_coz.rs"
+harness = false
+
 [features]
 default = ["parallel"]
 std = [
@@ -98,7 +105,7 @@ std = [
         "jf-utils/std",
         "jf-relation/std",
 ]
-print-trace = ["ark-std/print-trace"]
+profiling = ["ark-std/print-trace", "dep:coz"]
 parallel = [
         "ark-ff/parallel",
         "ark-ec/parallel",

@@ -0,0 +1,30 @@
+//! Benchmark code for reed_solomon implementation for coz profiler.
+use ark_bn254::Fr as Fr254;
+use ark_poly::{EvaluationDomain, GeneralEvaluationDomain};
+use jf_primitives::reed_solomon_code::reed_solomon_erasure_decode;
+
+const N: usize = 2048;
+const N_HALF: usize = 1024;
+// run it many times so coz will be triggered enough times
+// see: <https://github.com/plasma-umass/coz/issues/158#issuecomment-708507510>
+const ITERATIONS: usize = 100;
+
+fn main() {
+    coz::thread_init();
+
+    let domain = GeneralEvaluationDomain::<Fr254>::new(N).unwrap();
+    let input = vec![Fr254::from(1u64); N_HALF];
+
+    // encode and evaluate
+    let code = domain.fft(&input);
+    let eval_points = domain.elements().collect::<Vec<_>>();
+
+    // decode
+    for _ in 0..ITERATIONS {
+        reed_solomon_erasure_decode::<Fr254, _, _, _>(
+            eval_points.iter().zip(&code).take(N_HALF),
+            N_HALF,
+        )
+        .unwrap();
+    }
+}
@@ -11,6 +11,8 @@ use ark_ff::{FftField, Field};
 use ark_poly::{EvaluationDomain, Radix2EvaluationDomain};
 use ark_std::{format, string::ToString, vec, vec::Vec};
 use core::borrow::Borrow;
+#[cfg(all(debug_assertions, target_os = "linux", feature = "profiling"))]
+use coz;
 
 /// Erasure-encode `data` into `data.len() + parity_size` shares.
 ///
@@ -97,6 +99,9 @@ where
         .clone()
         .map(|share| *share.borrow().0.borrow())
         .collect::<Vec<_>>();
+
+    #[cfg(all(debug_assertions, target_os = "linux", feature = "profiling"))]
+    coz::begin!("computing l(X)");
     // Calculating l(x) = \prod (x - x_i)
     let mut l = vec![F::zero(); data_size + 1];
     l[0] = F::one();
@@ -107,6 +112,12 @@ where
         }
         l[0] = -x[i - 1] * l[0];
     }
+
+    #[cfg(all(debug_assertions, target_os = "linux", feature = "profiling"))]
+    {
+        coz::end!("computing l(X)");
+        coz::begin!("computing barycentric weight w_i");
+    }
     // Calculate the barycentric weight w_i
     let w = (0..data_size)
         .map(|i| {
@@ -126,6 +137,11 @@ where
             Ok(ret)
         })
         .collect::<Result<Vec<_>, _>>()?;
+    #[cfg(all(debug_assertions, target_os = "linux", feature = "profiling"))]
+    {
+        coz::end!("computing barycentric weight w_i");
+        coz::begin!("computing f(X)");
+    }
     // Calculate f(x) = \sum_i l_i(x)
     let mut f = vec![F::zero(); data_size];
     // for i in 0..shares.len() {
@@ -140,6 +156,10 @@ where
             f[j] += weight * li[j];
         }
     }
+    #[cfg(all(debug_assertions, target_os = "linux", feature = "profiling"))]
+    {
+        coz::end!("computing f(X)");
+    }
     Ok(f)
 }