msm benchmark

ingonyama-zk · Dec 12, 2023 · 4b47cf0 · 4b47cf0
1 parent b4efc90
commit 4b47cf0
Show file tree

Hide file tree

Showing 7 changed files with 394 additions and 0 deletions.
diff --git a/benchmarks/msm/.devcontainer/Dockerfile b/benchmarks/msm/.devcontainer/Dockerfile
@@ -0,0 +1,42 @@
+# Make sure NVIDIA Container Toolkit is installed on your host
+
+# Use the specified base image
+FROM nvidia/cuda:12.2.0-devel-ubuntu22.04
+
+# Update and install dependencies
+RUN apt-get update && apt-get install -y \
+    nsight-systems-12.2 \
+    cmake \
+    protobuf-compiler \
+    curl \
+    build-essential \
+    git \
+    libboost-all-dev \
+    jq \
+    postgresql-client \
+    python3-pip \    
+    && rm -rf /var/lib/apt/lists/*
+
+# apt-get install cuda-nsight-systems-12-2
+
+# Clone Icicle from a GitHub repository
+RUN git clone https://github.com/ingonyama-zk/icicle.git  /icicle
+
+# Benchmarking in C++
+RUN git clone https://github.com/google/benchmark.git /opt/benchmark \
+    && cd /opt/benchmark \
+    && cmake -E make_directory "build" \
+    && cmake -DCMAKE_BUILD_TYPE=Release -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON -S . -B "build" \
+    && cmake --build "build" --config Release \
+    && cmake --build "build" --config Release --target install
+
+
+# Set the working directory in the container
+WORKDIR /icicle-benchmark
+# COPY . .
+# RUN mkdir -p build && \
+#     cmake -S . -B build && \
+#     cmake --build build
+
+# Specify the default command for the container
+CMD ["/bin/bash"]
diff --git a/benchmarks/msm/.devcontainer/devcontainer.json b/benchmarks/msm/.devcontainer/devcontainer.json
@@ -0,0 +1,25 @@
+{
+    "name": "Icicle Benchmarks - msm",
+    "build": {
+        "dockerfile": "Dockerfile"
+    },
+    "workspaceMount": "source=${localWorkspaceFolder}/.,target=/icicle-benchmark,type=bind",
+    "workspaceFolder": "/icicle-benchmark",
+    "runArgs": [
+        "--gpus",
+        "all"
+    ],
+    "postCreateCommand": [
+        "nvidia-smi"
+	],
+	"customizations": {
+		"vscode": {
+			"extensions": [
+				"ms-vscode.cmake-tools",
+				"ms-python.python",
+				"ms-azuretools.vscode-docker",
+				"ms-vscode.cpptools-extension-pack"
+			]
+		}
+	}
+}
diff --git a/benchmarks/msm/CMakeLists.txt b/benchmarks/msm/CMakeLists.txt
@@ -0,0 +1,29 @@
+cmake_minimum_required(VERSION 3.18)
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CUDA_STANDARD 17)
+set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
+set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
+if (${CMAKE_VERSION} VERSION_LESS "3.24.0")
+    set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH})
+else()
+    set(CMAKE_CUDA_ARCHITECTURES native) # on 3.24+, on earlier it is ignored, and the target is not passed
+endif ()
+project(zk-benchmarks LANGUAGES CUDA CXX)
+
+#set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
+#set(CMAKE_CUDA_FLAGS_RELEASE "")
+#set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -g -G -O0")
+
+include_directories("/icicle")
+include_directories("/opt/benchmark/include")
+
+add_executable(
+  benchmark
+  benchmark.cu
+)
+
+find_library(BENCHMARK_LIBRARY benchmark PATHS /usr/local/lib)
+find_library(NVML_LIBRARY nvidia-ml PATHS /usr/local/cuda/targets/x86_64-linux/lib/stubs/ )
+target_link_libraries(benchmark ${BENCHMARK_LIBRARY} ${NVML_LIBRARY})
+set_target_properties(benchmark PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+
diff --git a/benchmarks/msm/README.md b/benchmarks/msm/README.md
@@ -0,0 +1,18 @@
+# Icicle benchmark: Multi Scalar Multiplication
+
+The benchmark measures the runtime of [Multi-Scalar Multiplication](https://github.com/ingonyama-zk/ingopedia/blob/master/src/msm.md).
+
+
+## Best-Practices
+
+We recommend to run the benchmarks in [ZK-containers](../ZK-containers.md) to save your time and mental energy.
+
+## Run benchmark
+
+Inside the container,
+
+```sh
+./compile.sh
+./run.sh
+```
+
diff --git a/benchmarks/msm/benchmark.cu b/benchmarks/msm/benchmark.cu
@@ -0,0 +1,137 @@
+#define CURVE_BN254     1
+#define CURVE_BLS12_381 2
+#define CURVE_BLS12_377 3
+
+#define CURVE CURVE_BLS12_377
+
+#include <stdio.h>
+#include <iostream>
+#include <string>
+#include <cuda_runtime.h>
+#include <nvml.h>
+#include <benchmark/benchmark.h>
+#include "icicle/primitives/field.cuh"
+#include "icicle/utils/storage.cuh"
+#include "icicle/primitives/projective.cuh"
+
+#include "icicle/appUtils/msm/msm.cu"
+
+#if CURVE == CURVE_BN254
+
+#include "icicle/curves/bn254/curve_config.cuh"    
+using namespace BN254;
+const std::string curve = "BN254";
+
+#elif CURVE == CURVE_BLS12_381
+
+#include "icicle/curves/bls12_381/curve_config.cuh"
+using namespace BLS12_381;
+const std::string curve = "BLS12-381";
+
+#elif CURVE == CURVE_BLS12_377
+
+#include "icicle/curves/bls12_377/curve_config.cuh"
+using namespace BLS12_377;
+const std::string curve = "BLS12-377";
+
+#endif
+
+const unsigned max_msm_size = 1<<22;
+unsigned bucket_factor = 12;
+
+// on-host data
+scalar_t* scalars;
+affine_t* points;
+projective_t result;
+
+// on-device data
+scalar_t* scalars_d;
+affine_t* points_d;
+projective_t* result_d;
+
+nvmlDevice_t device;
+cudaStream_t stream;
+
+static void BM_msm(benchmark::State& state) {
+  const uint32_t msm_size=state.range(0);  
+  for (auto _ : state) {
+    large_msm<scalar_t, projective_t, affine_t>(scalars_d, points_d, msm_size, result_d, true, false, bucket_factor, stream);
+    cudaDeviceSynchronize();
+  }
+  unsigned int power;
+  nvmlDeviceGetPowerUsage(device, &power);
+  state.counters["PowerUsage"] = int(1.0e-3 * power);
+  unsigned int temperature;
+  nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &temperature);
+  state.counters["Temperature"] = int(temperature);
+}
+
+BENCHMARK(BM_msm)->MinTime(30.)
+  ->Arg(1<<10)
+  ->Arg(1<<11)
+  ->Arg(1<<12)
+  ->Arg(1<<13)
+  ->Arg(1<<14)
+  ->Arg(1<<15)
+  ->Arg(1<<16)
+  ->Arg(1<<17)
+  ->Arg(1<<18)
+  ->Arg(1<<19)
+  ->Arg(1<<20)
+  ->Arg(1<<21)
+  ->Arg(1<<22);
+
+int main(int argc, char** argv) {
+  cudaDeviceReset();
+  cudaDeviceProp deviceProperties;
+  int deviceId=0;
+  cudaGetDeviceProperties(&deviceProperties, deviceId);
+  std::string gpu_full_name = deviceProperties.name;
+  std::cout << gpu_full_name << std::endl;
+  std::string gpu_name = gpu_full_name;
+  int gpu_clock_mhz = deviceProperties.clockRate/1000.;
+
+  nvmlInit();
+  nvmlDeviceGetHandleByIndex(0, &device);  // for GPU 0
+
+  std::cout << "Setting host data" << std::endl;
+
+  scalars = (scalar_t*) malloc(sizeof(scalar_t) * max_msm_size);
+  points = (affine_t*)malloc(sizeof(affine_t) * max_msm_size);
+  for (unsigned i = 0; i < max_msm_size; i++) {
+    points[i] = (i % max_msm_size < 10) ? projective_t::to_affine(projective_t::rand_host()) : points[i - 10];
+    scalars[i] = scalar_t::rand_host();
+  }
+
+  std::cout << "Moving data to device" << std::endl;
+
+  cudaMalloc(&scalars_d, sizeof(scalar_t) * max_msm_size);
+  cudaMalloc(&points_d, sizeof(affine_t) * max_msm_size);
+  cudaMalloc(&result_d, sizeof(projective_t));
+  cudaMemcpy(scalars_d, scalars, sizeof(scalar_t) * max_msm_size, cudaMemcpyHostToDevice);
+  cudaMemcpy(points_d, points, sizeof(affine_t) * max_msm_size, cudaMemcpyHostToDevice);
+
+
+  std::cout << "Running benchmark" << std::endl;
+
+  cudaStreamCreate(&stream);
+
+  // Run all benchmarks 
+  ::benchmark::Initialize(&argc, argv);
+  if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1;
+  ::benchmark::AddCustomContext("team", "Ingonyama");
+  ::benchmark::AddCustomContext("project", "Icicle");
+  ::benchmark::AddCustomContext("runs_on", gpu_name);
+  ::benchmark::AddCustomContext("frequency_MHz", std::to_string(gpu_clock_mhz));
+  ::benchmark::AddCustomContext("uses", curve);
+  ::benchmark::AddCustomContext("comment", "on-device API");
+  ::benchmark::AddCustomContext("coefficient_C", std::to_string(bucket_factor));
+  ::benchmark::RunSpecifiedBenchmarks();
+
+  cudaFree(scalars_d);
+  cudaFree(points_d);
+  cudaFree(result_d);
+  free(scalars);
+  free(points);
+  return 0;
+}
diff --git a/benchmarks/msm/compile.sh b/benchmarks/msm/compile.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+# Exit immediately on error
+set -e
+
+rm -rf build
+mkdir -p build
+cmake -S . -B build
+cmake --build build
+
+