forked from michael-lehn/FLENS
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtest_cublas.cu
128 lines (77 loc) · 2.4 KB
/
test_cublas.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
// $ nvcc -std=c++11 -ccbin=g++-4.7 -I. -o test_cublas test_cublas.cu -L/usr/local/cuda/lib64 -lcublas -L/opt/OpenBLAS/lib -lopenblas
#include <iostream>
#include <complex>
#include <thrust/device_malloc_allocator.h>
#define WITH_CUBLAS
#define WITH_OPENBLAS
#define CXXBLAS_DEBUG
#define CXXLAPACK_DEBUG
#include "flens/flens.cxx"
using namespace flens;
using namespace std;
template <typename T>
using CPUArray = Array<T>;
template <typename T>
using CPUFull = FullStorage<T,ColMajor>;
#if 1
template <typename T>
using GPUArray = Array<T,IndexOptions<>,thrust::device_malloc_allocator<T> >;
template <typename T>
using GPUFull = FullStorage<T,ColMajor,IndexOptions<>,thrust::device_malloc_allocator<T> >;
#else
template <typename T>
using GPUArray = CPUArray<T>;
template <typename T>
using GPUFull = CPUFull<T>;
#endif
int main() {
//using T = std::complex<double>;
using T = double;
typedef DenseVector<GPUArray<T> > GPUVector;
typedef GeMatrix<GPUFull<T> > GPUMatrix;
typedef DenseVector<CPUArray<T> > CPUVector;
typedef GeMatrix<CPUFull<T> > CPUMatrix;
typedef typename GPUVector::IndexType IndexType;
cxxblas::CublasEnv::init(); // XXX: revisit
std::cout << cxxblas::CudaEnv::getInfo() << std::endl;
GPUVector x(5);
x = 1, 2, 3, 4, 5;
cout << "x.range() = " << x.range() << endl;
cout << "x.length() = " << x.length() << endl;
cout << "x = " << x << endl;
for (IndexType i=x.firstIndex(); i<=x.lastIndex(); ++i) {
x(i) = i*i;
}
cout << "x = " << x << endl;
CPUVector cpux = x;
x = 1, 2, 3, 4, 5;
cout << "cpux.range() = " << cpux.range() << endl;
cout << "cpux.length() = " << cpux.length() << endl;
cout << "cpux = " << cpux << endl;
const Underscore<IndexType> _;
GPUVector::View y = x(_(2,3));
y = 666;
GPUVector::NoView z = x(_(2,3));
z = 42;
cout << "x = " << x << endl;
cout << "y = " << y << endl;
cout << "z = " << z << endl;
GPUVector z2 = 2.0*x(_(1,2,5));
cout << "z2 = " << z2 << endl;
GPUMatrix A(5,5);
A = 0;
A.diag(1) = -1;
cout << "A = " << A << endl;
CPUMatrix CPUA = A;
cout << "CPUA = " << CPUA << endl;
GPUVector a = A*x;
cout << "a = " << a << endl;
GPUMatrix B(5,5), C(5,5);
B = 1;
std::cout << "Performing mm" << std::endl;
double alpha = 2;
C += alpha * A * B;
cout << "C = " << C << endl;
cxxblas::CublasEnv::release(); // XXX: revisit
return 0;
}