Skip to content

Commit

Permalink
msm benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
svpolonsky committed Dec 12, 2023
1 parent b4efc90 commit 4b47cf0
Show file tree
Hide file tree
Showing 7 changed files with 394 additions and 0 deletions.
42 changes: 42 additions & 0 deletions benchmarks/msm/.devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Make sure NVIDIA Container Toolkit is installed on your host

# Use the specified base image
FROM nvidia/cuda:12.2.0-devel-ubuntu22.04

# Update and install dependencies
RUN apt-get update && apt-get install -y \
nsight-systems-12.2 \
cmake \
protobuf-compiler \
curl \
build-essential \
git \
libboost-all-dev \
jq \
postgresql-client \
python3-pip \
&& rm -rf /var/lib/apt/lists/*

# apt-get install cuda-nsight-systems-12-2

# Clone Icicle from a GitHub repository
RUN git clone https://github.com/ingonyama-zk/icicle.git /icicle

# Benchmarking in C++
RUN git clone https://github.com/google/benchmark.git /opt/benchmark \
&& cd /opt/benchmark \
&& cmake -E make_directory "build" \
&& cmake -DCMAKE_BUILD_TYPE=Release -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON -S . -B "build" \
&& cmake --build "build" --config Release \
&& cmake --build "build" --config Release --target install


# Set the working directory in the container
WORKDIR /icicle-benchmark
# COPY . .
# RUN mkdir -p build && \
# cmake -S . -B build && \
# cmake --build build

# Specify the default command for the container
CMD ["/bin/bash"]
25 changes: 25 additions & 0 deletions benchmarks/msm/.devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"name": "Icicle Benchmarks - msm",
"build": {
"dockerfile": "Dockerfile"
},
"workspaceMount": "source=${localWorkspaceFolder}/.,target=/icicle-benchmark,type=bind",
"workspaceFolder": "/icicle-benchmark",
"runArgs": [
"--gpus",
"all"
],
"postCreateCommand": [
"nvidia-smi"
],
"customizations": {
"vscode": {
"extensions": [
"ms-vscode.cmake-tools",
"ms-python.python",
"ms-azuretools.vscode-docker",
"ms-vscode.cpptools-extension-pack"
]
}
}
}
29 changes: 29 additions & 0 deletions benchmarks/msm/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
cmake_minimum_required(VERSION 3.18)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
if (${CMAKE_VERSION} VERSION_LESS "3.24.0")
set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH})
else()
set(CMAKE_CUDA_ARCHITECTURES native) # on 3.24+, on earlier it is ignored, and the target is not passed
endif ()
project(zk-benchmarks LANGUAGES CUDA CXX)

#set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
#set(CMAKE_CUDA_FLAGS_RELEASE "")
#set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -g -G -O0")

include_directories("/icicle")
include_directories("/opt/benchmark/include")

add_executable(
benchmark
benchmark.cu
)

find_library(BENCHMARK_LIBRARY benchmark PATHS /usr/local/lib)
find_library(NVML_LIBRARY nvidia-ml PATHS /usr/local/cuda/targets/x86_64-linux/lib/stubs/ )
target_link_libraries(benchmark ${BENCHMARK_LIBRARY} ${NVML_LIBRARY})
set_target_properties(benchmark PROPERTIES CUDA_SEPARABLE_COMPILATION ON)

18 changes: 18 additions & 0 deletions benchmarks/msm/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Icicle benchmark: Multi Scalar Multiplication

The benchmark measures the runtime of [Multi-Scalar Multiplication](https://github.com/ingonyama-zk/ingopedia/blob/master/src/msm.md).


## Best-Practices

We recommend to run the benchmarks in [ZK-containers](../ZK-containers.md) to save your time and mental energy.

## Run benchmark

Inside the container,

```sh
./compile.sh
./run.sh
```

137 changes: 137 additions & 0 deletions benchmarks/msm/benchmark.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
#define CURVE_BN254 1
#define CURVE_BLS12_381 2
#define CURVE_BLS12_377 3

#define CURVE CURVE_BLS12_377

#include <stdio.h>
#include <iostream>
#include <string>
#include <cuda_runtime.h>
#include <nvml.h>
#include <benchmark/benchmark.h>
#include "icicle/primitives/field.cuh"
#include "icicle/utils/storage.cuh"
#include "icicle/primitives/projective.cuh"

#include "icicle/appUtils/msm/msm.cu"

#if CURVE == CURVE_BN254

#include "icicle/curves/bn254/curve_config.cuh"
using namespace BN254;
const std::string curve = "BN254";

#elif CURVE == CURVE_BLS12_381

#include "icicle/curves/bls12_381/curve_config.cuh"
using namespace BLS12_381;
const std::string curve = "BLS12-381";

#elif CURVE == CURVE_BLS12_377

#include "icicle/curves/bls12_377/curve_config.cuh"
using namespace BLS12_377;
const std::string curve = "BLS12-377";

#endif

const unsigned max_msm_size = 1<<22;
unsigned bucket_factor = 12;

// on-host data
scalar_t* scalars;
affine_t* points;
projective_t result;

// on-device data
scalar_t* scalars_d;
affine_t* points_d;
projective_t* result_d;

nvmlDevice_t device;
cudaStream_t stream;

static void BM_msm(benchmark::State& state) {
const uint32_t msm_size=state.range(0);
for (auto _ : state) {
large_msm<scalar_t, projective_t, affine_t>(scalars_d, points_d, msm_size, result_d, true, false, bucket_factor, stream);
cudaDeviceSynchronize();
}
unsigned int power;
nvmlDeviceGetPowerUsage(device, &power);
state.counters["PowerUsage"] = int(1.0e-3 * power);
unsigned int temperature;
nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &temperature);
state.counters["Temperature"] = int(temperature);
}

BENCHMARK(BM_msm)->MinTime(30.)
->Arg(1<<10)
->Arg(1<<11)
->Arg(1<<12)
->Arg(1<<13)
->Arg(1<<14)
->Arg(1<<15)
->Arg(1<<16)
->Arg(1<<17)
->Arg(1<<18)
->Arg(1<<19)
->Arg(1<<20)
->Arg(1<<21)
->Arg(1<<22);

int main(int argc, char** argv) {
cudaDeviceReset();
cudaDeviceProp deviceProperties;
int deviceId=0;
cudaGetDeviceProperties(&deviceProperties, deviceId);
std::string gpu_full_name = deviceProperties.name;
std::cout << gpu_full_name << std::endl;
std::string gpu_name = gpu_full_name;
int gpu_clock_mhz = deviceProperties.clockRate/1000.;

nvmlInit();
nvmlDeviceGetHandleByIndex(0, &device); // for GPU 0

std::cout << "Setting host data" << std::endl;

scalars = (scalar_t*) malloc(sizeof(scalar_t) * max_msm_size);
points = (affine_t*)malloc(sizeof(affine_t) * max_msm_size);
for (unsigned i = 0; i < max_msm_size; i++) {
points[i] = (i % max_msm_size < 10) ? projective_t::to_affine(projective_t::rand_host()) : points[i - 10];
scalars[i] = scalar_t::rand_host();
}

std::cout << "Moving data to device" << std::endl;

cudaMalloc(&scalars_d, sizeof(scalar_t) * max_msm_size);
cudaMalloc(&points_d, sizeof(affine_t) * max_msm_size);
cudaMalloc(&result_d, sizeof(projective_t));
cudaMemcpy(scalars_d, scalars, sizeof(scalar_t) * max_msm_size, cudaMemcpyHostToDevice);
cudaMemcpy(points_d, points, sizeof(affine_t) * max_msm_size, cudaMemcpyHostToDevice);


std::cout << "Running benchmark" << std::endl;

cudaStreamCreate(&stream);

// Run all benchmarks
::benchmark::Initialize(&argc, argv);
if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1;
::benchmark::AddCustomContext("team", "Ingonyama");
::benchmark::AddCustomContext("project", "Icicle");
::benchmark::AddCustomContext("runs_on", gpu_name);
::benchmark::AddCustomContext("frequency_MHz", std::to_string(gpu_clock_mhz));
::benchmark::AddCustomContext("uses", curve);
::benchmark::AddCustomContext("comment", "on-device API");
::benchmark::AddCustomContext("coefficient_C", std::to_string(bucket_factor));
::benchmark::RunSpecifiedBenchmarks();

cudaFree(scalars_d);
cudaFree(points_d);
cudaFree(result_d);
free(scalars);
free(points);
return 0;
}
11 changes: 11 additions & 0 deletions benchmarks/msm/compile.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash

# Exit immediately on error
set -e

rm -rf build
mkdir -p build
cmake -S . -B build
cmake --build build


Loading

0 comments on commit 4b47cf0

Please sign in to comment.