From 340f9627109601d81bdd6d2ef0469ed72b7b6f76 Mon Sep 17 00:00:00 2001 From: aviadingo Date: Sun, 11 Aug 2024 13:58:49 +0300 Subject: [PATCH] ran clang-format --- icicle/include/hash/blake2s/blake2s.cuh | 80 +++--- icicle/src/hash/blake2s/Makefile | 8 +- icicle/src/hash/blake2s/blake2s.cu | 240 +++++++++-------- icicle/src/hash/blake2s/extern.cu | 9 +- icicle/src/hash/blake2s/test_blake2s.cu | 168 ++++++------ .../src/hash/blake2s/test_blake2s_batched.cu | 243 +++++++++--------- .../src/hash/blake2s/test_blake2s_hasher.cu | 110 -------- icicle/src/hash/blake2s/test_blake2s_integ.cu | 111 ++++++++ icicle/src/hash/blake2s/test_blake2s_seq.cu | 165 ++++++------ .../src/hash/blake2s/test_blake2s_seq_sa.cu | 172 ++++++------- icicle/src/hash/blake2s/test_tree.cu | 11 +- 11 files changed, 645 insertions(+), 672 deletions(-) delete mode 100644 icicle/src/hash/blake2s/test_blake2s_hasher.cu create mode 100644 icicle/src/hash/blake2s/test_blake2s_integ.cu diff --git a/icicle/include/hash/blake2s/blake2s.cuh b/icicle/include/hash/blake2s/blake2s.cuh index a4c675ce4..07059f0cd 100644 --- a/icicle/include/hash/blake2s/blake2s.cuh +++ b/icicle/include/hash/blake2s/blake2s.cuh @@ -7,49 +7,45 @@ * This file is released into the Public Domain. */ +#pragma once +typedef unsigned char BYTE; +typedef unsigned int WORD; +typedef unsigned long long LONG; - #pragma once - typedef unsigned char BYTE; - typedef unsigned int WORD; - typedef unsigned long long LONG; - - #include - #include - #include - #include - #include "gpu-utils/device_context.cuh" - #include "gpu-utils/error_handler.cuh" - - #include "hash/hash.cuh" - using namespace hash; +#include +#include +#include +#include +#include "gpu-utils/device_context.cuh" +#include "gpu-utils/error_handler.cuh" -namespace blake2s{ - #define BLAKE2S_ROUNDS 10 - #define BLAKE2S_BLOCK_LENGTH 64 - #define BLAKE2S_CHAIN_SIZE 8 - #define BLAKE2S_CHAIN_LENGTH (BLAKE2S_CHAIN_SIZE * sizeof(uint32_t)) - #define BLAKE2S_STATE_SIZE 16 - #define BLAKE2S_STATE_LENGTH (BLAKE2S_STATE_SIZE * sizeof(uint32_t)) +#include "hash/hash.cuh" +using namespace hash; - class Blake2s : public Hasher - { - public: - cudaError_t run_hash_many_kernel( - const BYTE* input, - BYTE* output, - WORD number_of_states, - WORD input_len, - WORD output_len, - const device_context::DeviceContext& ctx) const override; - - - Blake2s() - : Hasher(BLAKE2S_STATE_SIZE, BLAKE2S_STATE_SIZE, BLAKE2S_STATE_SIZE, 0) - { - } - }; +namespace blake2s { +#define BLAKE2S_ROUNDS 10 +#define BLAKE2S_BLOCK_LENGTH 64 +#define BLAKE2S_CHAIN_SIZE 8 +#define BLAKE2S_CHAIN_LENGTH (BLAKE2S_CHAIN_SIZE * sizeof(uint32_t)) +#define BLAKE2S_STATE_SIZE 16 +#define BLAKE2S_STATE_LENGTH (BLAKE2S_STATE_SIZE * sizeof(uint32_t)) - extern "C" { - void mcm_cuda_blake2s_hash_batch(BYTE *key, WORD keylen, BYTE *in, WORD inlen, BYTE *out, WORD output_len, WORD n_batch); - } -} \ No newline at end of file + class Blake2s : public Hasher + { + public: + cudaError_t run_hash_many_kernel( + const BYTE* input, + BYTE* output, + WORD number_of_states, + WORD input_len, + WORD output_len, + const device_context::DeviceContext& ctx) const override; + + Blake2s() : Hasher(BLAKE2S_STATE_SIZE, BLAKE2S_STATE_SIZE, BLAKE2S_STATE_SIZE, 0) {} + }; + + extern "C" { + void + mcm_cuda_blake2s_hash_batch(BYTE* key, WORD keylen, BYTE* in, WORD inlen, BYTE* out, WORD output_len, WORD n_batch); + } +} // namespace blake2s \ No newline at end of file diff --git a/icicle/src/hash/blake2s/Makefile b/icicle/src/hash/blake2s/Makefile index 32d7c0fce..4d13a3733 100644 --- a/icicle/src/hash/blake2s/Makefile +++ b/icicle/src/hash/blake2s/Makefile @@ -6,9 +6,9 @@ test_blake2s_batched: test_blake2s_batched.cu blake2s.cu nvcc -o test_blake2s_batched -I. -I../../../include test_blake2s_batched.cu -g ./test_blake2s_batched ./batched_test_vectors.csv -test_blake2s_hasher: test_blake2s_hasher.cu blake2s.cu - nvcc -o test_blake2s_hasher -I. -I../../../include test_blake2s_hasher.cu -g - ./test_blake2s_hasher +test_blake2s_integ: test_blake2s_integ.cu blake2s.cu + nvcc -o test_blake2s_integ -I. -I../../../include test_blake2s_integ.cu -g + ./test_blake2s_integ test_blake2s_seq: test_blake2s_seq.cu blake2s.cu nvcc -o test_blake2s_seq -I. -I../../../include test_blake2s_seq.cu -g @@ -23,4 +23,4 @@ test_blake2s_tree: test_tree.cu blake2s.cu ../../merkle-tree/merkle.cu ./test_blake2s_tree clear: - rm test_blake2s test_blake2s_tree test_blake2s_hasher test_blake2s_seq test_blake2s_seq_sa test_blake2s_batched \ No newline at end of file + rm test_blake2s test_blake2s_tree test_blake2s_integ test_blake2s_seq test_blake2s_seq_sa test_blake2s_batched \ No newline at end of file diff --git a/icicle/src/hash/blake2s/blake2s.cu b/icicle/src/hash/blake2s/blake2s.cu index e53ad03d0..9d1b1a44b 100644 --- a/icicle/src/hash/blake2s/blake2s.cu +++ b/icicle/src/hash/blake2s/blake2s.cu @@ -10,8 +10,7 @@ using namespace hash; namespace blake2s { - -typedef struct { + typedef struct { WORD digestlen; BYTE key[32]; WORD keylen; @@ -22,27 +21,24 @@ typedef struct { uint32_t t0; uint32_t t1; uint32_t f0; -} cuda_blake2s_ctx_t; + } cuda_blake2s_ctx_t; -typedef cuda_blake2s_ctx_t CUDA_BLAKE2S_CTX; + typedef cuda_blake2s_ctx_t CUDA_BLAKE2S_CTX; -__constant__ CUDA_BLAKE2S_CTX c_CTX; + __constant__ CUDA_BLAKE2S_CTX c_CTX; -__constant__ uint32_t BLAKE2S_IVS[8] = { - 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, - 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL -}; + __constant__ uint32_t BLAKE2S_IVS[8] = {0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, + 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL}; -const uint32_t CPU_BLAKE2S_IVS[8] = { - 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, - 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL -}; + const uint32_t CPU_BLAKE2S_IVS[8] = {0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, + 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL}; -void cpu_blake2s_init(cuda_blake2s_ctx_t *ctx, BYTE *key, WORD keylen, WORD digestbitlen) { + void cpu_blake2s_init(cuda_blake2s_ctx_t* ctx, BYTE* key, WORD keylen, WORD digestbitlen) + { memset(ctx, 0, sizeof(cuda_blake2s_ctx_t)); if (keylen > 0) { - memcpy(ctx->buff, key, keylen); - memcpy(ctx->key, key, keylen); + memcpy(ctx->buff, key, keylen); + memcpy(ctx->key, key, keylen); } ctx->keylen = keylen; ctx->digestlen = digestbitlen >> 3; @@ -60,32 +56,27 @@ void cpu_blake2s_init(cuda_blake2s_ctx_t *ctx, BYTE *key, WORD keylen, WORD dige ctx->chain[7] = CPU_BLAKE2S_IVS[7]; ctx->pos = (keylen > 0) ? BLAKE2S_BLOCK_LENGTH : 0; -} - -__constant__ uint8_t BLAKE2S_SIGMA[10][16] = { - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, - { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, - { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, - { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, - { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, - { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, - { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, - { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, - { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 } -}; - -__device__ uint32_t cuda_blake2s_leuint32(const BYTE *in) { + } + + __constant__ uint8_t BLAKE2S_SIGMA[10][16] = { + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, + {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, + {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, + {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, + {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}}; + + __device__ uint32_t cuda_blake2s_leuint32(const BYTE* in) + { uint32_t a; memcpy(&a, in, 4); return a; -} + } -__device__ uint32_t cuda_blake2s_ROTR32(uint32_t a, uint8_t b) { - return (a >> b) | (a << (32 - b)); -} + __device__ uint32_t cuda_blake2s_ROTR32(uint32_t a, uint8_t b) { return (a >> b) | (a << (32 - b)); } -__device__ void cuda_blake2s_G(cuda_blake2s_ctx_t *ctx, uint32_t m1, uint32_t m2, int32_t a, int32_t b, int32_t c, int32_t d) { + __device__ void + cuda_blake2s_G(cuda_blake2s_ctx_t* ctx, uint32_t m1, uint32_t m2, int32_t a, int32_t b, int32_t c, int32_t d) + { ctx->state[a] = ctx->state[a] + ctx->state[b] + m1; ctx->state[d] = cuda_blake2s_ROTR32(ctx->state[d] ^ ctx->state[a], 16); ctx->state[c] = ctx->state[c] + ctx->state[d]; @@ -94,9 +85,10 @@ __device__ void cuda_blake2s_G(cuda_blake2s_ctx_t *ctx, uint32_t m1, uint32_t m2 ctx->state[d] = cuda_blake2s_ROTR32(ctx->state[d] ^ ctx->state[a], 8); ctx->state[c] = ctx->state[c] + ctx->state[d]; ctx->state[b] = cuda_blake2s_ROTR32(ctx->state[b] ^ ctx->state[c], 7); -} + } -__device__ __forceinline__ void cuda_blake2s_init_state(cuda_blake2s_ctx_t *ctx) { + __device__ __forceinline__ void cuda_blake2s_init_state(cuda_blake2s_ctx_t* ctx) + { memcpy(ctx->state, ctx->chain, BLAKE2S_CHAIN_LENGTH); // ctx->state[8] = ctx->t0; // ctx->state[9] = ctx->t1; @@ -113,30 +105,32 @@ __device__ __forceinline__ void cuda_blake2s_init_state(cuda_blake2s_ctx_t *ctx) // ctx->state[12] = BLAKE2S_IVS[5]; // ctx->state[13] = BLAKE2S_IVS[6]; // ctx->state[14] = BLAKE2S_IVS[7]; -} + } -__device__ __forceinline__ void cuda_blake2s_compress(cuda_blake2s_ctx_t *ctx, const BYTE *in, WORD inoffset) { + __device__ __forceinline__ void cuda_blake2s_compress(cuda_blake2s_ctx_t* ctx, const BYTE* in, WORD inoffset) + { cuda_blake2s_init_state(ctx); - uint32_t m[16] = { 0 }; + uint32_t m[16] = {0}; for (int j = 0; j < 16; j++) - m[j] = cuda_blake2s_leuint32(in + inoffset + (j << 2)); + m[j] = cuda_blake2s_leuint32(in + inoffset + (j << 2)); for (int round = 0; round < BLAKE2S_ROUNDS; round++) { - cuda_blake2s_G(ctx, m[BLAKE2S_SIGMA[round][0]], m[BLAKE2S_SIGMA[round][1]], 0, 4, 8, 12); - cuda_blake2s_G(ctx, m[BLAKE2S_SIGMA[round][2]], m[BLAKE2S_SIGMA[round][3]], 1, 5, 9, 13); - cuda_blake2s_G(ctx, m[BLAKE2S_SIGMA[round][4]], m[BLAKE2S_SIGMA[round][5]], 2, 6, 10, 14); - cuda_blake2s_G(ctx, m[BLAKE2S_SIGMA[round][6]], m[BLAKE2S_SIGMA[round][7]], 3, 7, 11, 15); - cuda_blake2s_G(ctx, m[BLAKE2S_SIGMA[round][8]], m[BLAKE2S_SIGMA[round][9]], 0, 5, 10, 15); - cuda_blake2s_G(ctx, m[BLAKE2S_SIGMA[round][10]], m[BLAKE2S_SIGMA[round][11]], 1, 6, 11, 12); - cuda_blake2s_G(ctx, m[BLAKE2S_SIGMA[round][12]], m[BLAKE2S_SIGMA[round][13]], 2, 7, 8, 13); - cuda_blake2s_G(ctx, m[BLAKE2S_SIGMA[round][14]], m[BLAKE2S_SIGMA[round][15]], 3, 4, 9, 14); + cuda_blake2s_G(ctx, m[BLAKE2S_SIGMA[round][0]], m[BLAKE2S_SIGMA[round][1]], 0, 4, 8, 12); + cuda_blake2s_G(ctx, m[BLAKE2S_SIGMA[round][2]], m[BLAKE2S_SIGMA[round][3]], 1, 5, 9, 13); + cuda_blake2s_G(ctx, m[BLAKE2S_SIGMA[round][4]], m[BLAKE2S_SIGMA[round][5]], 2, 6, 10, 14); + cuda_blake2s_G(ctx, m[BLAKE2S_SIGMA[round][6]], m[BLAKE2S_SIGMA[round][7]], 3, 7, 11, 15); + cuda_blake2s_G(ctx, m[BLAKE2S_SIGMA[round][8]], m[BLAKE2S_SIGMA[round][9]], 0, 5, 10, 15); + cuda_blake2s_G(ctx, m[BLAKE2S_SIGMA[round][10]], m[BLAKE2S_SIGMA[round][11]], 1, 6, 11, 12); + cuda_blake2s_G(ctx, m[BLAKE2S_SIGMA[round][12]], m[BLAKE2S_SIGMA[round][13]], 2, 7, 8, 13); + cuda_blake2s_G(ctx, m[BLAKE2S_SIGMA[round][14]], m[BLAKE2S_SIGMA[round][15]], 3, 4, 9, 14); } for (int offset = 0; offset < BLAKE2S_CHAIN_SIZE; offset++) - ctx->chain[offset] = ctx->chain[offset] ^ ctx->state[offset] ^ ctx->state[offset + 8]; -} + ctx->chain[offset] = ctx->chain[offset] ^ ctx->state[offset] ^ ctx->state[offset + 8]; + } -__device__ void cuda_blake2s_init(cuda_blake2s_ctx_t *ctx, BYTE *key, WORD keylen, WORD digestbitlen) { + __device__ void cuda_blake2s_init(cuda_blake2s_ctx_t* ctx, BYTE* key, WORD keylen, WORD digestbitlen) + { memset(ctx, 0, sizeof(cuda_blake2s_ctx_t)); ctx->keylen = keylen; ctx->digestlen = digestbitlen >> 3; @@ -152,57 +146,56 @@ __device__ void cuda_blake2s_init(cuda_blake2s_ctx_t *ctx, BYTE *key, WORD keyle ctx->chain[5] = BLAKE2S_IVS[5]; ctx->chain[6] = BLAKE2S_IVS[6]; ctx->chain[7] = BLAKE2S_IVS[7]; - + if (keylen > 0) { - memcpy(ctx->buff, key, keylen); - memcpy(ctx->key, key, keylen); + memcpy(ctx->buff, key, keylen); + memcpy(ctx->key, key, keylen); } ctx->pos = (keylen > 0) ? BLAKE2S_BLOCK_LENGTH : 0; -} + } -__device__ void cuda_blake2s_update(cuda_blake2s_ctx_t *ctx, const BYTE *in, LONG inlen) { - if (inlen == 0) - return; + __device__ void cuda_blake2s_update(cuda_blake2s_ctx_t* ctx, const BYTE* in, LONG inlen) + { + if (inlen == 0) return; WORD start = 0; int64_t in_index = 0, block_index = 0; if (ctx->pos) { - start = BLAKE2S_BLOCK_LENGTH - ctx->pos; - if (start < inlen) { - memcpy(ctx->buff + ctx->pos, in, start); - ctx->t0 += BLAKE2S_BLOCK_LENGTH; - - if (ctx->t0 == 0) ctx->t1++; - - cuda_blake2s_compress(ctx, ctx->buff, 0); - ctx->pos = 0; - memset(ctx->buff, 0, BLAKE2S_BLOCK_LENGTH); - } else { - memcpy(ctx->buff + ctx->pos, in, inlen); - ctx->pos += inlen; - return; - } + start = BLAKE2S_BLOCK_LENGTH - ctx->pos; + if (start < inlen) { + memcpy(ctx->buff + ctx->pos, in, start); + ctx->t0 += BLAKE2S_BLOCK_LENGTH; + + if (ctx->t0 == 0) ctx->t1++; + + cuda_blake2s_compress(ctx, ctx->buff, 0); + ctx->pos = 0; + memset(ctx->buff, 0, BLAKE2S_BLOCK_LENGTH); + } else { + memcpy(ctx->buff + ctx->pos, in, inlen); + ctx->pos += inlen; + return; + } } block_index = inlen - BLAKE2S_BLOCK_LENGTH; for (in_index = start; in_index < block_index; in_index += BLAKE2S_BLOCK_LENGTH) { - ctx->t0 += BLAKE2S_BLOCK_LENGTH; - if (ctx->t0 == 0) - ctx->t1++; + ctx->t0 += BLAKE2S_BLOCK_LENGTH; + if (ctx->t0 == 0) ctx->t1++; - cuda_blake2s_compress(ctx, in, in_index); + cuda_blake2s_compress(ctx, in, in_index); } memcpy(ctx->buff, in + in_index, inlen - in_index); ctx->pos += inlen - in_index; -} + } -__device__ void cuda_blake2s_final(cuda_blake2s_ctx_t *ctx, BYTE *out) { + __device__ void cuda_blake2s_final(cuda_blake2s_ctx_t* ctx, BYTE* out) + { ctx->f0 = 0xFFFFFFFFUL; ctx->t0 += ctx->pos; - if (ctx->pos > 0 && ctx->t0 == 0) - ctx->t1++; + if (ctx->pos > 0 && ctx->t0 == 0) ctx->t1++; cuda_blake2s_compress(ctx, ctx->buff, 0); memset(ctx->buff, 0, BLAKE2S_BLOCK_LENGTH); @@ -210,34 +203,36 @@ __device__ void cuda_blake2s_final(cuda_blake2s_ctx_t *ctx, BYTE *out) { int i4 = 0; for (int i = 0; i < BLAKE2S_CHAIN_SIZE && ((i4 = i * 4) < ctx->digestlen); i++) { - BYTE *BYTEs = (BYTE*)(&ctx->chain[i]); - if (i4 < ctx->digestlen - 4) - memcpy(out + i4, BYTEs, 4); - else - memcpy(out + i4, BYTEs, ctx->digestlen - i4); + BYTE* BYTEs = (BYTE*)(&ctx->chain[i]); + if (i4 < ctx->digestlen - 4) + memcpy(out + i4, BYTEs, 4); + else + memcpy(out + i4, BYTEs, ctx->digestlen - i4); } -} + } -__global__ void kernel_blake2s_hash(const BYTE *indata, WORD inlen, BYTE *outdata, WORD n_batch, WORD BLAKE2S_BLOCK_SIZE) { + __global__ void + kernel_blake2s_hash(const BYTE* indata, WORD inlen, BYTE* outdata, WORD n_batch, WORD BLAKE2S_BLOCK_SIZE) + { WORD thread = blockIdx.x * blockDim.x + threadIdx.x; - if (thread >= n_batch) { - return; - } - BYTE key[32] = ""; // Null key + if (thread >= n_batch) { return; } + BYTE key[32] = ""; // Null key WORD keylen = 0; CUDA_BLAKE2S_CTX blake_ctx; - const BYTE *in = indata + thread * inlen; - BYTE *out = outdata + thread * BLAKE2S_BLOCK_SIZE; - + const BYTE* in = indata + thread * inlen; + BYTE* out = outdata + thread * BLAKE2S_BLOCK_SIZE; + cuda_blake2s_init(&blake_ctx, key, keylen, (BLAKE2S_BLOCK_SIZE << 3)); cuda_blake2s_update(&blake_ctx, in, inlen); cuda_blake2s_final(&blake_ctx, out); -} - -extern "C" { -void mcm_cuda_blake2s_hash_batch(BYTE *key, WORD keylen, BYTE *in, WORD inlen, BYTE *out, WORD output_len, WORD n_batch) { - BYTE *cuda_indata; - BYTE *cuda_outdata; + } + + extern "C" { + void + mcm_cuda_blake2s_hash_batch(BYTE* key, WORD keylen, BYTE* in, WORD inlen, BYTE* out, WORD output_len, WORD n_batch) + { + BYTE* cuda_indata; + BYTE* cuda_outdata; const WORD BLAKE2S_BLOCK_SIZE = output_len; cudaMalloc(&cuda_indata, inlen * n_batch); cudaMalloc(&cuda_outdata, BLAKE2S_BLOCK_SIZE * n_batch); @@ -255,32 +250,29 @@ void mcm_cuda_blake2s_hash_batch(BYTE *key, WORD keylen, BYTE *in, WORD inlen, B cudaMemcpy(out, cuda_outdata, BLAKE2S_BLOCK_SIZE * n_batch, cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); cudaError_t error = cudaGetLastError(); - if (error != cudaSuccess) { - printf("Error cuda blake2s hash: %s \n", cudaGetErrorString(error)); - } + if (error != cudaSuccess) { printf("Error cuda blake2s hash: %s \n", cudaGetErrorString(error)); } cudaFree(cuda_indata); cudaFree(cuda_outdata); -} -} + } + } -cudaError_t Blake2s::run_hash_many_kernel( + cudaError_t Blake2s::run_hash_many_kernel( const BYTE* input, - BYTE* output, - WORD number_of_states, - WORD input_len, - WORD output_len, + BYTE* output, + WORD number_of_states, + WORD input_len, + WORD output_len, const device_context::DeviceContext& ctx) const - { - const WORD BLAKE2S_BLOCK_SIZE = output_len; - WORD thread = 256; - WORD block = (number_of_states + thread - 1) / thread; - - kernel_blake2s_hash<<>>(input, input_len, output, number_of_states, BLAKE2S_BLOCK_SIZE); - - CHK_IF_RETURN(cudaPeekAtLastError()); - return CHK_LAST(); + { + const WORD BLAKE2S_BLOCK_SIZE = output_len; + WORD thread = 256; + WORD block = (number_of_states + thread - 1) / thread; - } + kernel_blake2s_hash<<>>( + input, input_len, output, number_of_states, BLAKE2S_BLOCK_SIZE); + CHK_IF_RETURN(cudaPeekAtLastError()); + return CHK_LAST(); + } } // namespace blake2s \ No newline at end of file diff --git a/icicle/src/hash/blake2s/extern.cu b/icicle/src/hash/blake2s/extern.cu index 1e5abe1cd..eb9120f78 100644 --- a/icicle/src/hash/blake2s/extern.cu +++ b/icicle/src/hash/blake2s/extern.cu @@ -9,11 +9,10 @@ #include "merkle-tree/merkle.cuh" namespace blake2s { - extern "C" cudaError_t - blake2s_cuda(BYTE * input, BYTE * output, WORD number_of_blocks, WORD input_block_size, WORD output_block_size, HashConfig& config) + extern "C" cudaError_t blake2s_cuda( + BYTE* input, BYTE* output, WORD number_of_blocks, WORD input_block_size, WORD output_block_size, HashConfig& config) { - return Blake2s().hash_many( - input, output, number_of_blocks, input_block_size, output_block_size, config); + return Blake2s().hash_many(input, output, number_of_blocks, input_block_size, output_block_size, config); } extern "C" cudaError_t build_blake2s_merkle_tree_cuda( @@ -28,6 +27,4 @@ namespace blake2s { leaves, digests, height, input_block_len, blake2s, blake2s, tree_config); } - - } // namespace blake2s \ No newline at end of file diff --git a/icicle/src/hash/blake2s/test_blake2s.cu b/icicle/src/hash/blake2s/test_blake2s.cu index 2dc67b0e8..2c68e6d3f 100644 --- a/icicle/src/hash/blake2s/test_blake2s.cu +++ b/icicle/src/hash/blake2s/test_blake2s.cu @@ -9,101 +9,103 @@ #include "hash/blake2s/blake2s.cuh" - using namespace blake2s; #define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now(); -#define END_TIMER(timer, msg) \ +#define END_TIMER(timer, msg) \ printf("%s: %.0f us\n", msg, FpMicroseconds(std::chrono::high_resolution_clock::now() - timer##_start).count()); extern "C" { -void mcm_cuda_blake2s_hash_batch(BYTE *key, WORD keylen, BYTE *in, WORD inlen, BYTE *out, WORD outlen, WORD n_batch); +void mcm_cuda_blake2s_hash_batch(BYTE* key, WORD keylen, BYTE* in, WORD inlen, BYTE* out, WORD outlen, WORD n_batch); } -void print_hash(BYTE *hash, WORD len) { - printf("%d \n", len); - for (WORD i = 0; i < len; i++) { - printf("%02x", hash[i]); - } - printf("\n"); +void print_hash(BYTE* hash, WORD len) +{ + printf("%d \n", len); + for (WORD i = 0; i < len; i++) { + printf("%02x", hash[i]); + } + printf("\n"); } -BYTE *read_file(const char *filename, size_t *filesize) { - FILE *file = fopen(filename, "rb"); - if (!file) { - perror("Failed to open file"); - exit(EXIT_FAILURE); - } - - fseek(file, 0, SEEK_END); - *filesize = ftell(file); - fseek(file, 0, SEEK_SET); - - BYTE *buffer = (BYTE *)malloc(*filesize); - if (!buffer) { - perror("Failed to allocate memory"); - fclose(file); - exit(EXIT_FAILURE); - } - - size_t bytesRead = fread(buffer, 1, *filesize, file); - if (bytesRead != *filesize) { - perror("Failed to read file"); - free(buffer); - fclose(file); - exit(EXIT_FAILURE); - } +BYTE* read_file(const char* filename, size_t* filesize) +{ + FILE* file = fopen(filename, "rb"); + if (!file) { + perror("Failed to open file"); + exit(EXIT_FAILURE); + } + + fseek(file, 0, SEEK_END); + *filesize = ftell(file); + fseek(file, 0, SEEK_SET); + + BYTE* buffer = (BYTE*)malloc(*filesize); + if (!buffer) { + perror("Failed to allocate memory"); + fclose(file); + exit(EXIT_FAILURE); + } + size_t bytesRead = fread(buffer, 1, *filesize, file); + if (bytesRead != *filesize) { + perror("Failed to read file"); + free(buffer); fclose(file); - return buffer; + exit(EXIT_FAILURE); + } + + fclose(file); + return buffer; } -int main(int argc, char **argv) { - using FpMilliseconds = std::chrono::duration; - using FpMicroseconds = std::chrono::duration; - - BYTE *input; - size_t inlen; - const char *input_filename; - const char *default_input = "aaaaaaaaaaa"; - - if (argc < 2) { - // Use default input if no file is provided - input = (BYTE *)default_input; - inlen = strlen(default_input); - } else { - input_filename = argv[1]; - input = read_file(input_filename, &inlen); - } - - // Test parameters - BYTE key[32] = ""; // Example key - WORD keylen = strlen((char *)key); - WORD n_outbit = 256; // Output length in bits - WORD n_batch = 1; // Number of hashes to compute in parallel - - // Allocate memory for the output - WORD outlen = n_outbit / 8; - BYTE *output = (BYTE *)malloc(outlen * n_batch); - if (!output) { - perror("Failed to allocate memory for output"); - if (argc >= 2) free(input); // Free file buffer if it was allocated - return EXIT_FAILURE; - } - - printf("Key len: %d \n", keylen); - - // Perform the hashing - START_TIMER(blake_timer) - mcm_cuda_blake2s_hash_batch(key, keylen, input, inlen, output, outlen, n_batch); - END_TIMER(blake_timer, "Blake Timer") - - // Print the result - printf("BLAKE2S hash:\n"); - print_hash(output, outlen); - - // Clean up - free(output); - if (argc >= 2) free(input); // Free file buffer if it was allocated - return 0; +int main(int argc, char** argv) +{ + using FpMilliseconds = std::chrono::duration; + using FpMicroseconds = std::chrono::duration; + + BYTE* input; + size_t inlen; + const char* input_filename; + const char* default_input = "aaaaaaaaaaa"; + + if (argc < 2) { + // Use default input if no file is provided + input = (BYTE*)default_input; + inlen = strlen(default_input); + } else { + input_filename = argv[1]; + input = read_file(input_filename, &inlen); + } + + // Test parameters + BYTE key[32] = ""; // Example key + WORD keylen = strlen((char*)key); + WORD n_outbit = 256; // Output length in bits + WORD n_batch = 1; // Number of hashes to compute in parallel + + // Allocate memory for the output + WORD outlen = n_outbit / 8; + BYTE* output = (BYTE*)malloc(outlen * n_batch); + if (!output) { + perror("Failed to allocate memory for output"); + if (argc >= 2) free(input); // Free file buffer if it was allocated + return EXIT_FAILURE; + } + + printf("Key len: %d \n", keylen); + + // Perform the hashing + START_TIMER(blake_timer) + mcm_cuda_blake2s_hash_batch(key, keylen, input, inlen, output, outlen, n_batch); + END_TIMER(blake_timer, "Blake Timer") + + // Print the result + printf("BLAKE2S hash:\n"); + print_hash(output, outlen); + + // Clean up + free(output); + if (argc >= 2) free(input); // Free file buffer if it was allocated + return 0; } diff --git a/icicle/src/hash/blake2s/test_blake2s_batched.cu b/icicle/src/hash/blake2s/test_blake2s_batched.cu index 8bef55049..405cd924c 100644 --- a/icicle/src/hash/blake2s/test_blake2s_batched.cu +++ b/icicle/src/hash/blake2s/test_blake2s_batched.cu @@ -14,139 +14,142 @@ using namespace blake2s; #define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now(); -#define END_TIMER(timer, msg) \ +#define END_TIMER(timer, msg) \ printf("%s: %.0f us\n", msg, FpMicroseconds(std::chrono::high_resolution_clock::now() - timer##_start).count()); - -void print_hash(BYTE *hash, WORD len) { - for (WORD i = 0; i < len; i++) { - printf("%02x", hash[i]); - } - printf("\n"); +void print_hash(BYTE* hash, WORD len) +{ + for (WORD i = 0; i < len; i++) { + printf("%02x", hash[i]); + } + printf("\n"); } // Function to trim leading and trailing whitespace from a string -std::string trim(const std::string& str) { - size_t first = str.find_first_not_of(' '); - size_t last = str.find_last_not_of(' '); - return str.substr(first, (last - first + 1)); +std::string trim(const std::string& str) +{ + size_t first = str.find_first_not_of(' '); + size_t last = str.find_last_not_of(' '); + return str.substr(first, (last - first + 1)); } -std::unordered_map load_strings_and_hashes_from_csv(const char *filename) { - std::unordered_map string_hash_map; - std::ifstream file(filename); - if (!file.is_open()) { - perror("Failed to open CSV file"); - exit(EXIT_FAILURE); +std::unordered_map load_strings_and_hashes_from_csv(const char* filename) +{ + std::unordered_map string_hash_map; + std::ifstream file(filename); + if (!file.is_open()) { + perror("Failed to open CSV file"); + exit(EXIT_FAILURE); + } + + std::string line; + while (std::getline(file, line)) { + std::stringstream ss(line); + std::string input_string, hash_value; + if (std::getline(ss, input_string, ',') && std::getline(ss, hash_value, ',')) { + // Trim any whitespace around the strings + input_string = trim(input_string); + hash_value = trim(hash_value); + string_hash_map[input_string] = hash_value; } + } - std::string line; - while (std::getline(file, line)) { - std::stringstream ss(line); - std::string input_string, hash_value; - if (std::getline(ss, input_string, ',') && std::getline(ss, hash_value, ',')) { - // Trim any whitespace around the strings - input_string = trim(input_string); - hash_value = trim(hash_value); - string_hash_map[input_string] = hash_value; - } - } - - file.close(); - return string_hash_map; + file.close(); + return string_hash_map; } -int main(int argc, char **argv) { - using FpMilliseconds = std::chrono::duration; - using FpMicroseconds = std::chrono::duration; - - if (argc < 2) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - - const char *csv_filename = argv[1]; - auto string_hash_map = load_strings_and_hashes_from_csv(csv_filename); - - if (string_hash_map.size() != 10) { - fprintf(stderr, "CSV file must contain exactly 10 strings and hashes.\n"); - return EXIT_FAILURE; - } - - // Prepare the test strings and expected hashes from the map - std::vector test_strings; - std::vector expected_hashes; - for (const auto& pair : string_hash_map) { - test_strings.push_back(pair.first); - expected_hashes.push_back(pair.second); - } - - // Test parameters - BYTE key[32] = ""; // Example key - WORD keylen = strlen((char *)key); - WORD n_outbit = 256; // Output length in bits - WORD n_batch = 10; // Number of different inputs to hash in parallel - size_t max_len = 10; // Max length of the test strings - - // Calculate total input length and allocate memory for the batched input - size_t total_len = 0; - for (const auto& str : test_strings) { - total_len += str.size(); - } - BYTE *batched_input = (BYTE *)malloc(total_len); - WORD *in_lengths = (WORD *)malloc(n_batch * sizeof(WORD)); - - // Copy test strings to batched input and store their lengths - BYTE *current_position = batched_input; - for (int i = 0; i < n_batch; ++i) { - memcpy(current_position, test_strings[i].c_str(), test_strings[i].size()); - current_position += test_strings[i].size(); - } - - // Allocate memory for the output - WORD outlen = n_outbit / 8; - BYTE *output = (BYTE *)malloc(outlen * n_batch); - if (!output) { - perror("Failed to allocate memory for output"); - free(batched_input); - free(in_lengths); - return EXIT_FAILURE; +int main(int argc, char** argv) +{ + using FpMilliseconds = std::chrono::duration; + using FpMicroseconds = std::chrono::duration; + + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return EXIT_FAILURE; + } + + const char* csv_filename = argv[1]; + auto string_hash_map = load_strings_and_hashes_from_csv(csv_filename); + + if (string_hash_map.size() != 10) { + fprintf(stderr, "CSV file must contain exactly 10 strings and hashes.\n"); + return EXIT_FAILURE; + } + + // Prepare the test strings and expected hashes from the map + std::vector test_strings; + std::vector expected_hashes; + for (const auto& pair : string_hash_map) { + test_strings.push_back(pair.first); + expected_hashes.push_back(pair.second); + } + + // Test parameters + BYTE key[32] = ""; // Example key + WORD keylen = strlen((char*)key); + WORD n_outbit = 256; // Output length in bits + WORD n_batch = 10; // Number of different inputs to hash in parallel + size_t max_len = 10; // Max length of the test strings + + // Calculate total input length and allocate memory for the batched input + size_t total_len = 0; + for (const auto& str : test_strings) { + total_len += str.size(); + } + BYTE* batched_input = (BYTE*)malloc(total_len); + WORD* in_lengths = (WORD*)malloc(n_batch * sizeof(WORD)); + + // Copy test strings to batched input and store their lengths + BYTE* current_position = batched_input; + for (int i = 0; i < n_batch; ++i) { + memcpy(current_position, test_strings[i].c_str(), test_strings[i].size()); + current_position += test_strings[i].size(); + } + + // Allocate memory for the output + WORD outlen = n_outbit / 8; + BYTE* output = (BYTE*)malloc(outlen * n_batch); + if (!output) { + perror("Failed to allocate memory for output"); + free(batched_input); + free(in_lengths); + return EXIT_FAILURE; + } + + printf("Key len: %d \n", keylen); + HashConfig config = default_hash_config(); + + // Perform the hashing + START_TIMER(blake_timer) + // mcm_cuda_blake2s_hash_batch(key, keylen, batched_input, max_len, output, outlen, n_batch); + blake2s_cuda(batched_input, output, n_batch, max_len, outlen, config); + END_TIMER(blake_timer, "Blake Timer") + + // Print and compare the results + printf("BLAKE2S hash (batch size = %d):\n", n_batch); + for (WORD i = 0; i < n_batch; i++) { + printf("String: %s\n", test_strings[i].c_str()); + printf("Computed Hash %d: ", i + 1); + print_hash(output + i * outlen, outlen); + std::cout << "Expected Hash " << i + 1 << ": " << expected_hashes[i] << std::endl; + + std::string computed_hash; + for (WORD j = 0; j < outlen; ++j) { + char buffer[3]; + snprintf(buffer, sizeof(buffer), "%02x", output[i * outlen + j]); + computed_hash += buffer; } - printf("Key len: %d \n", keylen); - HashConfig config = default_hash_config(); - - // Perform the hashing - START_TIMER(blake_timer) - // mcm_cuda_blake2s_hash_batch(key, keylen, batched_input, max_len, output, outlen, n_batch); - blake2s_cuda(batched_input, output, n_batch, max_len, outlen, config); - END_TIMER(blake_timer, "Blake Timer") - - // Print and compare the results - printf("BLAKE2S hash (batch size = %d):\n", n_batch); - for (WORD i = 0; i < n_batch; i++) { - printf("String: %s\n", test_strings[i].c_str()); - printf("Computed Hash %d: ", i + 1); - print_hash(output + i * outlen, outlen); - std::cout << "Expected Hash " << i+1 <<": " < -#include "gpu-utils/device_context.cuh" - -#include -#include -#include -#include -#include -#include "extern.cu" - - -using namespace blake2s; - -#define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now(); -#define END_TIMER(timer, msg) \ - printf("%s: %.0f us\n", msg, FpMicroseconds(std::chrono::high_resolution_clock::now() - timer##_start).count()); - -extern "C" { -void mcm_cuda_blake2s_hash_batch(BYTE *key, WORD keylen, BYTE *in, WORD inlen, BYTE *out, WORD n_outbit, WORD n_batch); -} - -void print_hash(BYTE *hash, WORD len) { - printf("Hash Len: %d \n", len); - printf("BLAKE2S hash:\n"); - for (WORD i = 0; i < len; i++) { - printf("%02x", hash[i]); - } - printf("\n"); -} - -BYTE *read_file(const char *filename, size_t *filesize) { - FILE *file = fopen(filename, "rb"); - if (!file) { - perror("Failed to open file"); - exit(EXIT_FAILURE); - } - - fseek(file, 0, SEEK_END); - *filesize = ftell(file); - fseek(file, 0, SEEK_SET); - - BYTE *buffer = (BYTE *)malloc(*filesize); - if (!buffer) { - perror("Failed to allocate memory"); - fclose(file); - exit(EXIT_FAILURE); - } - - size_t bytesRead = fread(buffer, 1, *filesize, file); - if (bytesRead != *filesize) { - perror("Failed to read file"); - free(buffer); - fclose(file); - exit(EXIT_FAILURE); - } - - fclose(file); - return buffer; -} - -int main(int argc, char **argv) { - using FpMilliseconds = std::chrono::duration; - using FpMicroseconds = std::chrono::duration; - - BYTE *input; - size_t inlen; - const char *input_filename; - const char *default_input = "aaaaaaaaaaa"; - - if (argc < 2) { - // Use default input if no file is provided - input = (BYTE *)default_input; - inlen = strlen(default_input); - } else { - input_filename = argv[1]; - input = read_file(input_filename, &inlen); - } - - // Test parameters - BYTE key[32] = ""; // Example key - WORD keylen = strlen((char *)key); - WORD n_outbit = 256; // Output length in bits - WORD n_batch = 1; // Number of hashes to compute in parallel - // Allocate memory for the output - WORD outlen = n_outbit / 8; - BYTE *output = (BYTE *)malloc(outlen * n_batch); - if (!output) { - perror("Failed to allocate memory for output"); - if (argc >= 2) free(input); // Free file buffer if it was allocated - return EXIT_FAILURE; - } - - printf("Key len: %d \n", keylen); - - // Perform the hashing - START_TIMER(blake_timer) - HashConfig config = default_hash_config(); - - blake2s_cuda(input, output, n_batch, inlen, outlen, config); - END_TIMER(blake_timer, "Blake Timer") - - // Print the result - print_hash(output, outlen); - - // Clean up - free(output); - if (argc >= 2) free(input); // Free file buffer if it was allocated - return 0; -} - diff --git a/icicle/src/hash/blake2s/test_blake2s_integ.cu b/icicle/src/hash/blake2s/test_blake2s_integ.cu new file mode 100644 index 000000000..2991333fa --- /dev/null +++ b/icicle/src/hash/blake2s/test_blake2s_integ.cu @@ -0,0 +1,111 @@ +#include +#include "gpu-utils/device_context.cuh" + +#include +#include +#include +#include +#include +#include "extern.cu" + +using namespace blake2s; + +#define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now(); +#define END_TIMER(timer, msg) \ + printf("%s: %.0f us\n", msg, FpMicroseconds(std::chrono::high_resolution_clock::now() - timer##_start).count()); + +extern "C" { +void mcm_cuda_blake2s_hash_batch(BYTE* key, WORD keylen, BYTE* in, WORD inlen, BYTE* out, WORD n_outbit, WORD n_batch); +} + +void print_hash(BYTE* hash, WORD len) +{ + printf("Hash Len: %d \n", len); + printf("BLAKE2S hash:\n"); + for (WORD i = 0; i < len; i++) { + printf("%02x", hash[i]); + } + printf("\n"); +} + +BYTE* read_file(const char* filename, size_t* filesize) +{ + FILE* file = fopen(filename, "rb"); + if (!file) { + perror("Failed to open file"); + exit(EXIT_FAILURE); + } + + fseek(file, 0, SEEK_END); + *filesize = ftell(file); + fseek(file, 0, SEEK_SET); + + BYTE* buffer = (BYTE*)malloc(*filesize); + if (!buffer) { + perror("Failed to allocate memory"); + fclose(file); + exit(EXIT_FAILURE); + } + + size_t bytesRead = fread(buffer, 1, *filesize, file); + if (bytesRead != *filesize) { + perror("Failed to read file"); + free(buffer); + fclose(file); + exit(EXIT_FAILURE); + } + + fclose(file); + return buffer; +} + +int main(int argc, char** argv) +{ + using FpMilliseconds = std::chrono::duration; + using FpMicroseconds = std::chrono::duration; + + BYTE* input; + size_t inlen; + const char* input_filename; + const char* default_input = "aaaaaaaaaaa"; + + if (argc < 2) { + // Use default input if no file is provided + input = (BYTE*)default_input; + inlen = strlen(default_input); + } else { + input_filename = argv[1]; + input = read_file(input_filename, &inlen); + } + + // Test parameters + BYTE key[32] = ""; // Example key + WORD keylen = strlen((char*)key); + WORD n_outbit = 256; // Output length in bits + WORD n_batch = 1; // Number of hashes to compute in parallel + // Allocate memory for the output + WORD outlen = n_outbit / 8; + BYTE* output = (BYTE*)malloc(outlen * n_batch); + if (!output) { + perror("Failed to allocate memory for output"); + if (argc >= 2) free(input); // Free file buffer if it was allocated + return EXIT_FAILURE; + } + + printf("Key len: %d \n", keylen); + + // Perform the hashing + START_TIMER(blake_timer) + HashConfig config = default_hash_config(); + + blake2s_cuda(input, output, n_batch, inlen, outlen, config); + END_TIMER(blake_timer, "Blake Timer") + + // Print the result + print_hash(output, outlen); + + // Clean up + free(output); + if (argc >= 2) free(input); // Free file buffer if it was allocated + return 0; +} diff --git a/icicle/src/hash/blake2s/test_blake2s_seq.cu b/icicle/src/hash/blake2s/test_blake2s_seq.cu index 8e4af10f6..1578d8d59 100644 --- a/icicle/src/hash/blake2s/test_blake2s_seq.cu +++ b/icicle/src/hash/blake2s/test_blake2s_seq.cu @@ -8,106 +8,97 @@ #include #include "extern.cu" - using namespace blake2s; #define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now(); -#define END_TIMER(timer, msg) \ +#define END_TIMER(timer, msg) \ printf("%s: %.0f us\n", msg, FpMicroseconds(std::chrono::high_resolution_clock::now() - timer##_start).count()); - -void print_hash(BYTE *hash, WORD len) { - printf("Hash Len: %d \n", len); - printf("BLAKE2S hash:\n"); - for (WORD i = 0; i < len; i++) { - printf("%02x", hash[i]); - } - printf("\n"); +void print_hash(BYTE* hash, WORD len) +{ + printf("Hash Len: %d \n", len); + printf("BLAKE2S hash:\n"); + for (WORD i = 0; i < len; i++) { + printf("%02x", hash[i]); + } + printf("\n"); } -std::string byte_to_hex(BYTE *data, WORD len) { - std::stringstream ss; - for (WORD i = 0; i < len; i++) { - ss << std::hex << std::setw(2) << std::setfill('0') << (int)data[i]; - } - return ss.str(); +std::string byte_to_hex(BYTE* data, WORD len) +{ + std::stringstream ss; + for (WORD i = 0; i < len; i++) { + ss << std::hex << std::setw(2) << std::setfill('0') << (int)data[i]; + } + return ss.str(); } -std::vector load_csv(const char *filename) { - std::vector hashes; - std::ifstream file(filename); - std::string line; - while (std::getline(file, line)) { - // Directly add the line as a hash, assuming one hash per line - hashes.push_back(line); - } - return hashes; +std::vector load_csv(const char* filename) +{ + std::vector hashes; + std::ifstream file(filename); + std::string line; + while (std::getline(file, line)) { + // Directly add the line as a hash, assuming one hash per line + hashes.push_back(line); + } + return hashes; } +int main(int argc, char** argv) +{ + using FpMilliseconds = std::chrono::duration; + using FpMicroseconds = std::chrono::duration; + + std::vector test_strings = {"0", "01", "012", "0123", "01234", + "012345", "0123456", "01234567", "012345678", "0123456789"}; + + const char* csv_filename = "expected_hashes.csv"; // Replace with your actual CSV file name + std::vector expected_hashes = load_csv(csv_filename); + assert(expected_hashes.size() == test_strings.size() && "Number of hashes in CSV must match number of test strings."); + std::cout << "Loaded hashes from CSV:" << std::endl; + // for (size_t i = 0; i < expected_hashes.size(); ++i) { + // std::cout << "Expected hash " << i << ": " << expected_hashes[i] << std::endl; + // } + + // Test parameters + WORD n_outbit = 256; // Output length in bits + WORD n_batch = 1; // Number of hashes to compute in parallel + + // Allocate memory for the output + WORD outlen = n_outbit / 8; + + // Perform the hashing + HashConfig config = default_hash_config(); + + for (size_t i = 0; i < test_strings.size(); i++) { + BYTE* output = (BYTE*)malloc(outlen * n_batch); + if (!output) { + perror("Failed to allocate memory for output"); + return EXIT_FAILURE; + } - - -int main(int argc, char **argv) { - using FpMilliseconds = std::chrono::duration; - using FpMicroseconds = std::chrono::duration; - - std::vector test_strings = { - "0", "01", "012", "0123", "01234", "012345", "0123456", "01234567", "012345678", "0123456789" - }; - - const char *csv_filename = "expected_hashes.csv"; // Replace with your actual CSV file name - std::vector expected_hashes = load_csv(csv_filename); - assert(expected_hashes.size() == test_strings.size() && "Number of hashes in CSV must match number of test strings."); - std::cout << "Loaded hashes from CSV:" << std::endl; - // for (size_t i = 0; i < expected_hashes.size(); ++i) { - // std::cout << "Expected hash " << i << ": " << expected_hashes[i] << std::endl; - // } - - // Test parameters - WORD n_outbit = 256; // Output length in bits - WORD n_batch = 1; // Number of hashes to compute in parallel - - - - // Allocate memory for the output - WORD outlen = n_outbit / 8; + const std::string& input_str = test_strings[i]; + BYTE* input = (BYTE*)input_str.c_str(); + size_t inlen = input_str.size(); // Perform the hashing - HashConfig config = default_hash_config(); - - for (size_t i = 0; i < test_strings.size(); i++) { - - BYTE *output = (BYTE *)malloc(outlen * n_batch); - if (!output) { - perror("Failed to allocate memory for output"); - return EXIT_FAILURE; - } - - const std::string &input_str = test_strings[i]; - BYTE *input = (BYTE *)input_str.c_str(); - size_t inlen = input_str.size(); - - - // Perform the hashing - START_TIMER(blake_timer) - blake2s_cuda(input, output, n_batch, inlen, outlen, config); - END_TIMER(blake_timer, "Blake Timer") - // Convert the output to hex string - std::string computed_hash = byte_to_hex(output, outlen); - // Compare with the expected hash - - - if (computed_hash == expected_hashes[i]) { - std::cout << "Test " << i << " passed." << std::endl; - } else { - std::cout << "Test " << i << " failed." << std::endl; - std::cout << "Expected: " << expected_hashes[i] << std::endl; - std::cout << "Got: " << computed_hash << std::endl; - } - free(output); + START_TIMER(blake_timer) + blake2s_cuda(input, output, n_batch, inlen, outlen, config); + END_TIMER(blake_timer, "Blake Timer") + // Convert the output to hex string + std::string computed_hash = byte_to_hex(output, outlen); + // Compare with the expected hash + + if (computed_hash == expected_hashes[i]) { + std::cout << "Test " << i << " passed." << std::endl; + } else { + std::cout << "Test " << i << " failed." << std::endl; + std::cout << "Expected: " << expected_hashes[i] << std::endl; + std::cout << "Got: " << computed_hash << std::endl; } - - return 0; - -} + free(output); + } + return 0; +} diff --git a/icicle/src/hash/blake2s/test_blake2s_seq_sa.cu b/icicle/src/hash/blake2s/test_blake2s_seq_sa.cu index 38dff5565..cf8177a30 100644 --- a/icicle/src/hash/blake2s/test_blake2s_seq_sa.cu +++ b/icicle/src/hash/blake2s/test_blake2s_seq_sa.cu @@ -12,108 +12,102 @@ using namespace blake2s; #define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now(); -#define END_TIMER(timer, msg) \ +#define END_TIMER(timer, msg) \ printf("%s: %.0f us\n", msg, FpMicroseconds(std::chrono::high_resolution_clock::now() - timer##_start).count()); extern "C" { -void mcm_cuda_blake2s_hash_batch(BYTE *key, WORD keylen, BYTE *in, WORD inlen, BYTE *out, WORD n_outbit, WORD n_batch); +void mcm_cuda_blake2s_hash_batch(BYTE* key, WORD keylen, BYTE* in, WORD inlen, BYTE* out, WORD n_outbit, WORD n_batch); } -void print_hash(BYTE *hash, WORD len) { - printf("Hash Len: %d \n", len); - printf("BLAKE2S hash:\n"); - for (WORD i = 0; i < len; i++) { - printf("%02x", hash[i]); - } - printf("\n"); +void print_hash(BYTE* hash, WORD len) +{ + printf("Hash Len: %d \n", len); + printf("BLAKE2S hash:\n"); + for (WORD i = 0; i < len; i++) { + printf("%02x", hash[i]); + } + printf("\n"); } -std::string byte_to_hex(BYTE *data, WORD len) { - std::stringstream ss; - for (WORD i = 0; i < len; i++) { - ss << std::hex << std::setw(2) << std::setfill('0') << (int)data[i]; - } - return ss.str(); +std::string byte_to_hex(BYTE* data, WORD len) +{ + std::stringstream ss; + for (WORD i = 0; i < len; i++) { + ss << std::hex << std::setw(2) << std::setfill('0') << (int)data[i]; + } + return ss.str(); } -std::vector load_csv(const char *filename) { - std::vector hashes; - std::ifstream file(filename); - std::string line; - while (std::getline(file, line)) { - // Directly add the line as a hash, assuming one hash per line - hashes.push_back(line); - } - return hashes; +std::vector load_csv(const char* filename) +{ + std::vector hashes; + std::ifstream file(filename); + std::string line; + while (std::getline(file, line)) { + // Directly add the line as a hash, assuming one hash per line + hashes.push_back(line); + } + return hashes; } +int main(int argc, char** argv) +{ + using FpMilliseconds = std::chrono::duration; + using FpMicroseconds = std::chrono::duration; + + std::vector test_strings = {"0", "01", "012", "0123", "01234", + "012345", "0123456", "01234567", "012345678", "0123456789"}; + + const char* csv_filename = "expected_hashes.csv"; // Replace with your actual CSV file name + std::vector expected_hashes = load_csv(csv_filename); + assert(expected_hashes.size() == test_strings.size() && "Number of hashes in CSV must match number of test strings."); + std::cout << "Loaded hashes from CSV:" << std::endl; + // for (size_t i = 0; i < expected_hashes.size(); ++i) { + // std::cout << "Expected hash " << i << ": " << expected_hashes[i] << std::endl; + // } + + // Test parameters + WORD n_outbit = 256; // Output length in bits + WORD n_batch = 1; // Number of hashes to compute in parallel + + // Test parameters + BYTE key[32] = ""; // Example key + WORD keylen = strlen((char*)key); + + // Allocate memory for the output + WORD outlen = n_outbit / 8; + + // Perform the hashing + HashConfig config = default_hash_config(); + + for (size_t i = 0; i < test_strings.size(); i++) { + BYTE* output = (BYTE*)malloc(outlen * n_batch); + if (!output) { + perror("Failed to allocate memory for output"); + return EXIT_FAILURE; + } - - -int main(int argc, char **argv) { - using FpMilliseconds = std::chrono::duration; - using FpMicroseconds = std::chrono::duration; - - std::vector test_strings = { - "0", "01", "012", "0123", "01234", "012345", "0123456", "01234567", "012345678", "0123456789" - }; - - const char *csv_filename = "expected_hashes.csv"; // Replace with your actual CSV file name - std::vector expected_hashes = load_csv(csv_filename); - assert(expected_hashes.size() == test_strings.size() && "Number of hashes in CSV must match number of test strings."); - std::cout << "Loaded hashes from CSV:" << std::endl; - // for (size_t i = 0; i < expected_hashes.size(); ++i) { - // std::cout << "Expected hash " << i << ": " << expected_hashes[i] << std::endl; - // } - - // Test parameters - WORD n_outbit = 256; // Output length in bits - WORD n_batch = 1; // Number of hashes to compute in parallel - - // Test parameters - BYTE key[32] = ""; // Example key - WORD keylen = strlen((char *)key); - - // Allocate memory for the output - WORD outlen = n_outbit / 8; + const std::string& input_str = test_strings[i]; + BYTE* input = (BYTE*)input_str.c_str(); + size_t inlen = input_str.size(); // Perform the hashing - HashConfig config = default_hash_config(); - - for (size_t i = 0; i < test_strings.size(); i++) { - - BYTE *output = (BYTE *)malloc(outlen * n_batch); - if (!output) { - perror("Failed to allocate memory for output"); - return EXIT_FAILURE; - } - - const std::string &input_str = test_strings[i]; - BYTE *input = (BYTE *)input_str.c_str(); - size_t inlen = input_str.size(); - - - - // Perform the hashing - START_TIMER(blake_timer) - mcm_cuda_blake2s_hash_batch(key, keylen, input, inlen, output, outlen, n_batch); - END_TIMER(blake_timer, "Blake Timer") - // Convert the output to hex string - std::string computed_hash = byte_to_hex(output, outlen); - // Compare with the expected hash - - - if (computed_hash == expected_hashes[i]) { - std::cout << "Test " << i << " passed." << std::endl; - } else { - std::cout << "Test " << i << " failed." << std::endl; - std::cout << "Expected: " << expected_hashes[i] << std::endl; - std::cout << "Got: " << computed_hash << std::endl; - } - free(output); + START_TIMER(blake_timer) + mcm_cuda_blake2s_hash_batch(key, keylen, input, inlen, output, outlen, n_batch); + END_TIMER(blake_timer, "Blake Timer") + // Convert the output to hex string + std::string computed_hash = byte_to_hex(output, outlen); + // Compare with the expected hash + + if (computed_hash == expected_hashes[i]) { + std::cout << "Test " << i << " passed." << std::endl; + } else { + std::cout << "Test " << i << " failed." << std::endl; + std::cout << "Expected: " << expected_hashes[i] << std::endl; + std::cout << "Got: " << computed_hash << std::endl; } - - return 0; - -} + free(output); + } + return 0; +} diff --git a/icicle/src/hash/blake2s/test_tree.cu b/icicle/src/hash/blake2s/test_tree.cu index b111ae3b5..85145ccf0 100644 --- a/icicle/src/hash/blake2s/test_tree.cu +++ b/icicle/src/hash/blake2s/test_tree.cu @@ -11,7 +11,6 @@ using namespace blake2s; - #define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now(); #define END_TIMER(timer, msg) \ printf("%s: %.0f ms\n", msg, FpMilliseconds(std::chrono::high_resolution_clock::now() - timer##_start).count()); @@ -56,7 +55,7 @@ int main(int argc, char* argv[]) /// Allocate memory for digests of {keep_rows} rows of a tree START_TIMER(timer_digests); - size_t digests_mem = digests_len * sizeof(BYTE)*64; + size_t digests_mem = digests_len * sizeof(BYTE) * 64; BYTE* digests = static_cast(malloc(digests_mem)); END_TIMER(timer_digests, "Allocated memory for digests"); @@ -81,15 +80,13 @@ int main(int argc, char* argv[]) for (int i = 0; i < digests_len * 32; i++) { WORD root = digests[i]; - + // Print the current element in hexadecimal format printf("%02x", root); // After every 32 elements, print a newline to start a new row - if ((i + 1) % 32 == 0) { - printf("\n"); - } -} + if ((i + 1) % 32 == 0) { printf("\n"); } + } free(digests); free(leaves);