-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.cu
42 lines (34 loc) · 1.31 KB
/
main.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#include <iostream>
#include <cuda_runtime.h>
#include <cudnn.h>
#include <opencv2/opencv.hpp>
#include "conv2d_impl.cuh"
int main() {
cv::Mat src = cv::imread("/home/penghuiwei/MyWorkspace/ubuntu/CPP/cudaCudnnConv/image/lena.jpg");
cv::Mat src_fp;
src.convertTo(src_fp, CV_32FC3);
// Input
Tensor tensor_x, tensor_w, tensor_y;
tensor_x.alloc_gpu(1, 3, src.rows, src.cols);
float* dev_ptr = tensor_x.get_ptr();
CHECK_CUDA(cudaMemcpy(dev_ptr, src_fp.data, tensor_x.size_byte, cudaMemcpyHostToDevice));
// kernel & conv
Conv2dParam param;
param.pad_h = param.pad_w = 1;
param.dilation_h = param.dilation_w = 1;
param.u = param.v = 1;
make_kernel(tensor_w);
cudnn_conv2d_out(tensor_x, tensor_w, param, tensor_y);
// tensor_y.save("tensor_y1.dat");
// tensor_y = cudnn_conv2d(tensor_x, tensor_w, param);
// tensor_y.save("tensor_y3.dat");
cv::Mat dst_fp(cv::Size2d(tensor_y.w, tensor_y.h), CV_32FC(tensor_y.c));
cv::normalize(dst_fp,dst_fp, 255, 0, cv::NORM_MINMAX);
cv::Mat dst;
CHECK_CUDA(cudaMemcpy(dst_fp.data, tensor_y.get_ptr(), tensor_y.size_byte, cudaMemcpyDeviceToHost));
dst_fp.convertTo(dst, CV_8UC(tensor_y.c));
cv::imwrite("dst.png", dst);
// cv::imshow("src", src);
// cv::imshow("dst", dst);
// cv::waitKey(0);
}