Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
nonam3e committed Jun 26, 2024
1 parent ef201b0 commit 7ccbfbb
Show file tree
Hide file tree
Showing 11 changed files with 85 additions and 339 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/cpp_cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ jobs:
build_args: -DEXT_FIELD=ON
- name: stark252
build_args: -DEXT_FIELD=OFF
- name: m31
build_args: -DEXT_FIELD=ON
steps:
- name: Checkout Repo
uses: actions/checkout@v4
Expand Down
69 changes: 6 additions & 63 deletions icicle/include/api/babybear.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ extern "C" cudaError_t babybear_extension_mul_cuda(
extern "C" cudaError_t babybear_extension_add_cuda(
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);

extern "C" cudaError_t babybear_extension_accumulate_cuda(
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config);

extern "C" cudaError_t babybear_extension_sub_cuda(
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);

Expand All @@ -61,6 +64,9 @@ extern "C" cudaError_t babybear_mul_cuda(
extern "C" cudaError_t babybear_add_cuda(
babybear::scalar_t* vec_a, babybear::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::scalar_t* result);

extern "C" cudaError_t babybear_accumulate_cuda(
babybear::scalar_t* vec_a, babybear::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);

extern "C" cudaError_t babybear_sub_cuda(
babybear::scalar_t* vec_a, babybear::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::scalar_t* result);

Expand Down Expand Up @@ -104,67 +110,4 @@ extern "C" cudaError_t babybear_release_poseidon2_constants_cuda(
poseidon2::Poseidon2Constants<babybear::scalar_t>* constants,
device_context::DeviceContext& ctx);

extern "C" cudaError_t babybear_mul_cuda(
babybear::scalar_t* vec_a, babybear::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::scalar_t* result);

extern "C" cudaError_t babybear_add_cuda(
babybear::scalar_t* vec_a, babybear::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::scalar_t* result);

extern "C" cudaError_t babybear_accumulate_cuda(
babybear::scalar_t* vec_a, babybear::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);

extern "C" cudaError_t babybear_sub_cuda(
babybear::scalar_t* vec_a, babybear::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::scalar_t* result);

extern "C" cudaError_t babybear_transpose_matrix_cuda(
const babybear::scalar_t* input,
uint32_t row_size,
uint32_t column_size,
babybear::scalar_t* output,
device_context::DeviceContext& ctx,
bool on_device,
bool is_async);

extern "C" cudaError_t babybear_bit_reverse_cuda(
const babybear::scalar_t* input,
uint64_t n,
vec_ops::BitReverseConfig& config,
babybear::scalar_t* output);

extern "C" void babybear_generate_scalars(babybear::scalar_t* scalars, int size);

extern "C" cudaError_t babybear_scalar_convert_montgomery(
babybear::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);

extern "C" cudaError_t babybear_initialize_domain(
babybear::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);

extern "C" cudaError_t babybear_ntt_cuda(
const babybear::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<babybear::scalar_t>& config, babybear::scalar_t* output);

extern "C" cudaError_t babybear_release_domain(device_context::DeviceContext& ctx);

extern "C" void babybear_extension_generate_scalars(babybear::extension_t* scalars, int size);

extern "C" cudaError_t babybear_extension_scalar_convert_montgomery(
babybear::extension_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);

extern "C" cudaError_t babybear_extension_mul_cuda(
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);

extern "C" cudaError_t babybear_extension_add_cuda(
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);

extern "C" cudaError_t babybear_extension_sub_cuda(
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);

extern "C" cudaError_t babybear_extension_transpose_matrix_cuda(
const babybear::extension_t* input,
uint32_t row_size,
uint32_t column_size,
babybear::extension_t* output,
device_context::DeviceContext& ctx,
bool on_device,
bool is_async);

#endif
43 changes: 3 additions & 40 deletions icicle/include/api/bls12_377.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ extern "C" cudaError_t bls12_377_mul_cuda(
extern "C" cudaError_t bls12_377_add_cuda(
bls12_377::scalar_t* vec_a, bls12_377::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_377::scalar_t* result);

extern "C" cudaError_t bls12_377_accumulate_cuda(
bls12_377::scalar_t* vec_a, bls12_377::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);

extern "C" cudaError_t bls12_377_sub_cuda(
bls12_377::scalar_t* vec_a, bls12_377::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_377::scalar_t* result);

Expand Down Expand Up @@ -123,44 +126,4 @@ extern "C" cudaError_t bls12_377_build_poseidon_merkle_tree(
poseidon::PoseidonConstants<bls12_377::scalar_t>& constants,
merkle::TreeBuilderConfig& config);

extern "C" cudaError_t bls12_377_mul_cuda(
bls12_377::scalar_t* vec_a, bls12_377::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_377::scalar_t* result);

extern "C" cudaError_t bls12_377_add_cuda(
bls12_377::scalar_t* vec_a, bls12_377::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_377::scalar_t* result);

extern "C" cudaError_t bls12_377_accumulate_cuda(
bls12_377::scalar_t* vec_a, bls12_377::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);

extern "C" cudaError_t bls12_377_sub_cuda(
bls12_377::scalar_t* vec_a, bls12_377::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_377::scalar_t* result);

extern "C" cudaError_t bls12_377_transpose_matrix_cuda(
const bls12_377::scalar_t* input,
uint32_t row_size,
uint32_t column_size,
bls12_377::scalar_t* output,
device_context::DeviceContext& ctx,
bool on_device,
bool is_async);

extern "C" cudaError_t bls12_377_bit_reverse_cuda(
const bls12_377::scalar_t* input,
uint64_t n,
vec_ops::BitReverseConfig& config,
bls12_377::scalar_t* output);

extern "C" void bls12_377_generate_scalars(bls12_377::scalar_t* scalars, int size);

extern "C" cudaError_t bls12_377_scalar_convert_montgomery(
bls12_377::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);

extern "C" cudaError_t bls12_377_initialize_domain(
bls12_377::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);

extern "C" cudaError_t bls12_377_ntt_cuda(
const bls12_377::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bls12_377::scalar_t>& config, bls12_377::scalar_t* output);

extern "C" cudaError_t bls12_377_release_domain(device_context::DeviceContext& ctx);

#endif
43 changes: 3 additions & 40 deletions icicle/include/api/bls12_381.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ extern "C" cudaError_t bls12_381_mul_cuda(
extern "C" cudaError_t bls12_381_add_cuda(
bls12_381::scalar_t* vec_a, bls12_381::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_381::scalar_t* result);

extern "C" cudaError_t bls12_381_accumulate_cuda(
bls12_381::scalar_t* vec_a, bls12_381::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);

extern "C" cudaError_t bls12_381_sub_cuda(
bls12_381::scalar_t* vec_a, bls12_381::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_381::scalar_t* result);

Expand Down Expand Up @@ -123,44 +126,4 @@ extern "C" cudaError_t bls12_381_build_poseidon_merkle_tree(
poseidon::PoseidonConstants<bls12_381::scalar_t>& constants,
merkle::TreeBuilderConfig& config);

extern "C" cudaError_t bls12_381_mul_cuda(
bls12_381::scalar_t* vec_a, bls12_381::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_381::scalar_t* result);

extern "C" cudaError_t bls12_381_add_cuda(
bls12_381::scalar_t* vec_a, bls12_381::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_381::scalar_t* result);

extern "C" cudaError_t bls12_381_accumulate_cuda(
bls12_381::scalar_t* vec_a, bls12_381::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);

extern "C" cudaError_t bls12_381_sub_cuda(
bls12_381::scalar_t* vec_a, bls12_381::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_381::scalar_t* result);

extern "C" cudaError_t bls12_381_transpose_matrix_cuda(
const bls12_381::scalar_t* input,
uint32_t row_size,
uint32_t column_size,
bls12_381::scalar_t* output,
device_context::DeviceContext& ctx,
bool on_device,
bool is_async);

extern "C" cudaError_t bls12_381_bit_reverse_cuda(
const bls12_381::scalar_t* input,
uint64_t n,
vec_ops::BitReverseConfig& config,
bls12_381::scalar_t* output);

extern "C" void bls12_381_generate_scalars(bls12_381::scalar_t* scalars, int size);

extern "C" cudaError_t bls12_381_scalar_convert_montgomery(
bls12_381::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);

extern "C" cudaError_t bls12_381_initialize_domain(
bls12_381::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);

extern "C" cudaError_t bls12_381_ntt_cuda(
const bls12_381::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bls12_381::scalar_t>& config, bls12_381::scalar_t* output);

extern "C" cudaError_t bls12_381_release_domain(device_context::DeviceContext& ctx);

#endif
43 changes: 3 additions & 40 deletions icicle/include/api/bn254.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ extern "C" cudaError_t bn254_mul_cuda(
extern "C" cudaError_t bn254_add_cuda(
bn254::scalar_t* vec_a, bn254::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bn254::scalar_t* result);

extern "C" cudaError_t bn254_accumulate_cuda(
bn254::scalar_t* vec_a, bn254::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);

extern "C" cudaError_t bn254_sub_cuda(
bn254::scalar_t* vec_a, bn254::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bn254::scalar_t* result);

Expand Down Expand Up @@ -155,44 +158,4 @@ extern "C" cudaError_t bn254_build_poseidon_merkle_tree(
poseidon::PoseidonConstants<bn254::scalar_t>& constants,
merkle::TreeBuilderConfig& config);

extern "C" cudaError_t bn254_mul_cuda(
bn254::scalar_t* vec_a, bn254::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bn254::scalar_t* result);

extern "C" cudaError_t bn254_add_cuda(
bn254::scalar_t* vec_a, bn254::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bn254::scalar_t* result);

extern "C" cudaError_t bn254_accumulate_cuda(
bn254::scalar_t* vec_a, bn254::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);

extern "C" cudaError_t bn254_sub_cuda(
bn254::scalar_t* vec_a, bn254::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bn254::scalar_t* result);

extern "C" cudaError_t bn254_transpose_matrix_cuda(
const bn254::scalar_t* input,
uint32_t row_size,
uint32_t column_size,
bn254::scalar_t* output,
device_context::DeviceContext& ctx,
bool on_device,
bool is_async);

extern "C" cudaError_t bn254_bit_reverse_cuda(
const bn254::scalar_t* input,
uint64_t n,
vec_ops::BitReverseConfig& config,
bn254::scalar_t* output);

extern "C" void bn254_generate_scalars(bn254::scalar_t* scalars, int size);

extern "C" cudaError_t bn254_scalar_convert_montgomery(
bn254::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);

extern "C" cudaError_t bn254_initialize_domain(
bn254::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);

extern "C" cudaError_t bn254_ntt_cuda(
const bn254::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bn254::scalar_t>& config, bn254::scalar_t* output);

extern "C" cudaError_t bn254_release_domain(device_context::DeviceContext& ctx);

#endif
43 changes: 3 additions & 40 deletions icicle/include/api/bw6_761.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ extern "C" cudaError_t bw6_761_mul_cuda(
extern "C" cudaError_t bw6_761_add_cuda(
bw6_761::scalar_t* vec_a, bw6_761::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bw6_761::scalar_t* result);

extern "C" cudaError_t bw6_761_accumulate_cuda(
bw6_761::scalar_t* vec_a, bw6_761::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);

extern "C" cudaError_t bw6_761_sub_cuda(
bw6_761::scalar_t* vec_a, bw6_761::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bw6_761::scalar_t* result);

Expand Down Expand Up @@ -123,44 +126,4 @@ extern "C" cudaError_t bw6_761_build_poseidon_merkle_tree(
poseidon::PoseidonConstants<bw6_761::scalar_t>& constants,
merkle::TreeBuilderConfig& config);

extern "C" cudaError_t bw6_761_mul_cuda(
bw6_761::scalar_t* vec_a, bw6_761::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bw6_761::scalar_t* result);

extern "C" cudaError_t bw6_761_add_cuda(
bw6_761::scalar_t* vec_a, bw6_761::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bw6_761::scalar_t* result);

extern "C" cudaError_t bw6_761_accumulate_cuda(
bw6_761::scalar_t* vec_a, bw6_761::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);

extern "C" cudaError_t bw6_761_sub_cuda(
bw6_761::scalar_t* vec_a, bw6_761::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bw6_761::scalar_t* result);

extern "C" cudaError_t bw6_761_transpose_matrix_cuda(
const bw6_761::scalar_t* input,
uint32_t row_size,
uint32_t column_size,
bw6_761::scalar_t* output,
device_context::DeviceContext& ctx,
bool on_device,
bool is_async);

extern "C" cudaError_t bw6_761_bit_reverse_cuda(
const bw6_761::scalar_t* input,
uint64_t n,
vec_ops::BitReverseConfig& config,
bw6_761::scalar_t* output);

extern "C" void bw6_761_generate_scalars(bw6_761::scalar_t* scalars, int size);

extern "C" cudaError_t bw6_761_scalar_convert_montgomery(
bw6_761::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);

extern "C" cudaError_t bw6_761_initialize_domain(
bw6_761::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);

extern "C" cudaError_t bw6_761_ntt_cuda(
const bw6_761::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bw6_761::scalar_t>& config, bw6_761::scalar_t* output);

extern "C" cudaError_t bw6_761_release_domain(device_context::DeviceContext& ctx);

#endif
35 changes: 3 additions & 32 deletions icicle/include/api/grumpkin.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ extern "C" cudaError_t grumpkin_mul_cuda(
extern "C" cudaError_t grumpkin_add_cuda(
grumpkin::scalar_t* vec_a, grumpkin::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, grumpkin::scalar_t* result);

extern "C" cudaError_t grumpkin_accumulate_cuda(
grumpkin::scalar_t* vec_a, grumpkin::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);

extern "C" cudaError_t grumpkin_sub_cuda(
grumpkin::scalar_t* vec_a, grumpkin::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, grumpkin::scalar_t* result);

Expand Down Expand Up @@ -88,36 +91,4 @@ extern "C" cudaError_t grumpkin_build_poseidon_merkle_tree(
poseidon::PoseidonConstants<grumpkin::scalar_t>& constants,
merkle::TreeBuilderConfig& config);

extern "C" cudaError_t grumpkin_mul_cuda(
grumpkin::scalar_t* vec_a, grumpkin::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, grumpkin::scalar_t* result);

extern "C" cudaError_t grumpkin_add_cuda(
grumpkin::scalar_t* vec_a, grumpkin::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, grumpkin::scalar_t* result);

extern "C" cudaError_t grumpkin_accumulate_cuda(
grumpkin::scalar_t* vec_a, grumpkin::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);

extern "C" cudaError_t grumpkin_sub_cuda(
grumpkin::scalar_t* vec_a, grumpkin::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, grumpkin::scalar_t* result);

extern "C" cudaError_t grumpkin_transpose_matrix_cuda(
const grumpkin::scalar_t* input,
uint32_t row_size,
uint32_t column_size,
grumpkin::scalar_t* output,
device_context::DeviceContext& ctx,
bool on_device,
bool is_async);

extern "C" cudaError_t grumpkin_bit_reverse_cuda(
const grumpkin::scalar_t* input,
uint64_t n,
vec_ops::BitReverseConfig& config,
grumpkin::scalar_t* output);

extern "C" void grumpkin_generate_scalars(grumpkin::scalar_t* scalars, int size);

extern "C" cudaError_t grumpkin_scalar_convert_montgomery(
grumpkin::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);

#endif
19 changes: 11 additions & 8 deletions icicle/include/api/m31.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,19 @@ extern "C" cudaError_t m31_mul_cuda(
extern "C" cudaError_t m31_add_cuda(
m31::scalar_t* vec_a, m31::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::scalar_t* result);

extern "C" cudaError_t m31_accumulate_cuda(
m31::scalar_t* vec_a, m31::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);

extern "C" cudaError_t m31_sub_cuda(
m31::scalar_t* vec_a, m31::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::scalar_t* result);

// extern "C" cudaError_t m31_transpose_matrix_cuda(
// const m31::scalar_t* input,
// uint32_t row_size,
// uint32_t column_size,
// m31::scalar_t* output,
// device_context::DeviceContext& ctx,
// bool on_device,
// bool is_async);
extern "C" cudaError_t m31_transpose_matrix_cuda(
const m31::scalar_t* input,
uint32_t row_size,
uint32_t column_size,
m31::scalar_t* output,
device_context::DeviceContext& ctx,
bool on_device,
bool is_async);

#endif
Loading

0 comments on commit 7ccbfbb

Please sign in to comment.