You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
What is your question?
I added split_k serial of cutlass 2.x to cutlass 3.x, slice_k as a parameter of problem_size. Now I want to use cutlass_profiler to test whether I should add a parameter to problem_size in gemm_operation_profiler, or use the existing split_k_slices parameter , but split_k_slices is not a parameter of problem_size
/////////////////////////////////////////////////////////////////////////////////////////////////
int main(int argc, char* argv[]){
int m = 256;
int n = 256;
int k = 32;
int slice_k = 1;
if(argc >= 2){
m = atoi(argv[1]);
}
if(argc >= 3){
n = atoi(argv[2]);
}
if(argc >= 4){
k = atoi(argv[3]);
}
if(argc >= 5){
slice_k = atoi(argv[4]);
}
using GemmKernel = cutlass::gemm::kernel::GemmUniversal<
Shape<int,int,int,int>,
Config::CollectiveMainloop,
Config::CollectiveEpilogue
;
using Gemm = cutlass::gemm::device::GemmUniversalAdapter;
using ElementScalar = typename Gemm::EpilogueOutputOp::ElementScalar;
using ProblemShapeType = typename Gemm::GemmKernel::ProblemShape;
using Testbed = test::gemm::device::Testbed3x<Gemm, cutlass::epilogue::thread::Identity>;
but split_k_slices is not a parameter of problem_size
The "right" way to do split K in 3.x is to write a custom tile scheduler similar to streamK. Then the arguments of the scheduler would contain the num splits argument.
Aside: our StreamK scheduler supports a pure split K mode as well. Curious why you are implementing your own split K only scheduler when we support it OOTB?
What is your question?
I added split_k serial of cutlass 2.x to cutlass 3.x, slice_k as a parameter of problem_size. Now I want to use cutlass_profiler to test whether I should add a parameter to problem_size in gemm_operation_profiler, or use the existing split_k_slices parameter , but split_k_slices is not a parameter of problem_size
/***************************************************************************************************
**************************************************************************************************/
/*! \file
\brief Tests for device-wide GEMM interface
*/
#include
#include "cutlass/cutlass.h"
#include "cute/tensor.hpp"
#include "cute/atom/mma_atom.hpp"
#include "cutlass/numeric_types.h"
#include "cutlass/gemm/device/gemm_universal_adapter.h"
#include "default_gemm_configuration.hpp"
#include "common/gemm_test_3x.hpp"
using namespace cute;
/////////////////////////////////////////////////////////////////////////////////////////////////
int main(int argc, char* argv[]){
int m = 256;
int n = 256;
int k = 32;
int slice_k = 1;
if(argc >= 2){
m = atoi(argv[1]);
}
if(argc >= 3){
n = atoi(argv[2]);
}
if(argc >= 4){
k = atoi(argv[3]);
}
if(argc >= 5){
slice_k = atoi(argv[4]);
}
using Config = cutlass::gemm::device::DefaultGemmConfigurationToCutlass3Types<
cutlass::arch::OpClassTensorOp, cutlass::arch::Sm75,
half_t, cutlass::layout::ColumnMajor,
half_t, cutlass::layout::RowMajor,
half_t, cutlass::layout::ColumnMajor,
float>;
using GemmKernel = cutlass::gemm::kernel::GemmUniversal<
Shape<int,int,int,int>,
Config::CollectiveMainloop,
Config::CollectiveEpilogue
using Gemm = cutlass::gemm::device::GemmUniversalAdapter;
using ElementScalar = typename Gemm::EpilogueOutputOp::ElementScalar;
using ProblemShapeType = typename Gemm::GemmKernel::ProblemShape;
using Testbed = test::gemm::device::Testbed3x<Gemm, cutlass::epilogue::thread::Identity>;
Testbed testbed = {};
ProblemShapeType problem_size;
double alpha,beta;
problem_size = ProblemShapeType{m, n, k, slice_k};
alpha = 1.0;
beta = 0.0;
bool passed = testbed.run(
problem_size,
cutlass::from_real(alpha),
cutlass::from_real(beta)
);
int iterations = 100;
char perf = 'N';
if(perf == 'Y')
test::gemm::device::TestGemm3x(iterations);
}
/////////////////////////////////////////////////////////////////////////////////////////////////
The text was updated successfully, but these errors were encountered: