1
+ /* *
2
+ * Copyright (c) 2023-2024 The ggml authors
3
+ *
4
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
5
+ * of this software and associated documentation files (the "Software"), to
6
+ * deal in the Software without restriction, including without limitation the
7
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8
+ * sell copies of the Software, and to permit persons to whom the Software is
9
+ * furnished to do so, subject to the following conditions:
10
+ *
11
+ * The above copyright notice and this permission notice shall be included in
12
+ * all copies or substantial portions of the Software.
13
+ *
14
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20
+ * IN THE SOFTWARE.
21
+ */
22
+
1
23
#include " acl_tensor.h"
2
24
3
25
#include < algorithm>
4
26
#include < cstring>
5
27
6
- /* *
7
- * Mapping ggml_tensor type to acl_tensor type.
8
- */
9
28
aclDataType type_mapping (ggml_type type) {
10
29
switch (type) {
11
30
case GGML_TYPE_F32:
@@ -24,50 +43,51 @@ aclDataType type_mapping(ggml_type type) {
24
43
return ACL_DT_UNDEFINED;
25
44
}
26
45
27
-
28
- /* *
29
- * Transform ggml_tensor to acl_tensor. Note that ggml_tensor dimension order
30
- * is reversed compared to acl_tensor.
31
- *
32
- * If bcast_ne and bcast_nb is nullptr, use ggml_tensor's ne and nb.
33
- * otherwise, use bcast_ne bcast_nb, which means tensor dims should be
34
- * changed to satisfy the broadcast. @sa: get_bcast_shape.
35
- */
36
- aclTensor* create_acl_tensor (const ggml_tensor* tensor, int64_t * bcast_ne,
37
- size_t * bcast_nb, int64_t bcast_dims,
38
- aclFormat format, size_t offset) {
46
+ aclTensor* create_acl_tensor (const ggml_tensor* tensor, int64_t * ne, size_t * nb,
47
+ int64_t dims, aclFormat format, size_t offset) {
39
48
// If tensor is bcasted, Up to GGML_MAX_DIMS additional dimensions will be
40
49
// added.
41
50
int64_t acl_ne[GGML_MAX_DIMS * 2 ], acl_stride[GGML_MAX_DIMS * 2 ];
42
- int64_t acl_storage_ne = 0 ;
43
- if (bcast_ne == nullptr ) {
44
- acl_storage_ne = ggml_nbytes (tensor);
51
+
52
+ int64_t acl_storage_len = 0 ;
53
+ if (ne == nullptr ) {
54
+ acl_storage_len = ggml_nbytes (tensor);
45
55
for (int i = 0 ; i < GGML_MAX_DIMS; i++) {
46
56
acl_ne[i] = tensor->ne [i];
47
57
// The step size of acl is in elements.
48
58
acl_stride[i] = tensor->nb [i] / ggml_element_size (tensor);
49
59
}
50
60
} else {
51
61
// With bcast
52
- for (int i = 0 ; i < bcast_dims ; i++) {
53
- acl_storage_ne += (bcast_ne [i] - 1 )*bcast_nb [i];
54
- acl_ne[i] = bcast_ne [i];
55
- acl_stride[i] = bcast_nb [i] / ggml_element_size (tensor);
62
+ for (int i = 0 ; i < dims ; i++) {
63
+ acl_storage_len += (ne [i] - 1 ) * nb [i];
64
+ acl_ne[i] = ne [i];
65
+ acl_stride[i] = nb [i] / ggml_element_size (tensor);
56
66
}
57
67
}
58
68
59
- int64_t dims = (bcast_dims == 0 ? GGML_MAX_DIMS : bcast_dims);
60
- std::reverse (acl_ne, acl_ne + dims);
61
- std::reverse (acl_stride, acl_stride + dims);
69
+ // Reverse ne and stride.
70
+ int64_t final_dims = (dims == 0 ? GGML_MAX_DIMS : dims);
71
+ std::reverse (acl_ne, acl_ne + final_dims);
72
+ std::reverse (acl_stride, acl_stride + final_dims);
62
73
63
- aclTensor* acl_tensor = aclCreateTensor (
64
- acl_ne, dims , type_mapping (tensor->type ), acl_stride ,
65
- offset / ggml_element_size (tensor), format, &acl_storage_ne, 1 ,
66
- tensor->data );
74
+ aclTensor* acl_tensor =
75
+ aclCreateTensor ( acl_ne, final_dims , type_mapping (tensor->type ),
76
+ acl_stride, offset / ggml_element_size (tensor), format,
77
+ &acl_storage_len, 1 , tensor->data );
67
78
68
79
return acl_tensor;
69
80
}
70
81
82
+ bool need_bcast (const ggml_tensor* t0, const ggml_tensor* t1) {
83
+ for (int i = 0 ; i < GGML_MAX_DIMS; i++) {
84
+ if (t1->ne [i] != t0->ne [i] && t1->ne [i] != 1 ) {
85
+ return true ;
86
+ }
87
+ }
88
+ return false ;
89
+ }
90
+
71
91
aclTensor* create_acl_tensor (void * data_ptr, aclDataType dtype,
72
92
size_t type_size, int64_t * ne, size_t * nb,
73
93
int64_t dims, aclFormat format, size_t offset) {
@@ -82,126 +102,95 @@ aclTensor* create_acl_tensor(void* data_ptr, aclDataType dtype,
82
102
std::reverse (tmp_ne, tmp_ne + dims);
83
103
std::reverse (tmp_stride, tmp_stride + dims);
84
104
85
- int64_t acl_storage_ne = 0 ;
105
+ int64_t acl_storage_len = 0 ;
86
106
for (int i = 0 ; i < dims; i++) {
87
- acl_storage_ne += (ne[i] - 1 )* nb[i];
107
+ acl_storage_len += (ne[i] - 1 ) * nb[i];
88
108
}
89
109
90
- aclTensor* acl_tensor = aclCreateTensor (tmp_ne, dims, dtype, tmp_stride,
91
- offset / type_size, format, &acl_storage_ne ,
92
- 1 , data_ptr);
110
+ aclTensor* acl_tensor =
111
+ aclCreateTensor (tmp_ne, dims, dtype, tmp_stride, offset / type_size,
112
+ format, &acl_storage_len, 1 , data_ptr);
93
113
94
114
return acl_tensor;
95
115
}
96
116
97
- /* *
98
- * Add extra dims to satisfy acl kernel's broadcast rules (same as numpy).
99
- * ggml_tensor dimension order is reversed compared to Python.
100
- * bcast src1 with src0 though adding a extra dim.
101
- * for example:
102
- * src0 -> (32,10,10,10)
103
- * src1 -> (16,10,10,10)
104
- * bcast_ne_src0 -> (16,2,10,10,10)
105
- * bcast_ne_src1 -> (16,1,10,10,10)
106
- *
107
- * if dim0 has padding.
108
- * a -> (2, 2) padding = 2
109
- * a: [[1, 2, *, *]
110
- * [2, 3, *, *]]
111
- * nb = (8, 4, 2)
112
- *
113
- * if a should bcast with b -> (2, 4)
114
- * b' -> (2, 2, 2)
115
- * b : [[1, 2, 3, 4, *, *]
116
- * [5, 6, 7, 8, *, *]]
117
- * nb = (12, 6, 1)
118
- *
119
- * after bcast:
120
- * a' -> (2, 1, 2)
121
- * a': [[[1, 2], *, *]
122
- * [[2, 3], *, *]]
123
- * nb = (8, 4, 2, 1)
124
- *
125
- * b' : [[[1, 2], [3, 4], *, *]
126
- * [[5, 6], [7, 8], *, *]]
127
- * nb = (12, 6, 2, 1)
128
- *
129
- * because dim1 in a inserted dim, should add nb for dim1,
130
- * and all other nb moves to next in order.
131
- */
132
117
int64_t get_bcast_shape (const ggml_tensor* src0, const ggml_tensor* src1,
133
- int64_t * bcast_ne_src0 , int64_t * bcast_ne_src1 ,
134
- size_t * bcast_nb_src0 , size_t * bcast_nb_src1 ) {
118
+ int64_t * bcast_src0_ne , int64_t * bcast_src1_ne ,
119
+ size_t * bcast_src0_nb , size_t * bcast_src1_nb ) {
135
120
GGML_ASSERT (ggml_can_repeat (src1, src0));
136
121
int bcast_dim_cnt = 0 ;
137
122
for (int i = 0 ; i < GGML_MAX_DIMS; i++) {
138
123
int64_t nr = src0->ne [i] / src1->ne [i];
139
- bcast_ne_src0 [bcast_dim_cnt] = src0->ne [i] / nr;
140
- bcast_ne_src1 [bcast_dim_cnt] = src1->ne [i];
141
- bcast_nb_src0 [bcast_dim_cnt] = src0->nb [i];
142
- bcast_nb_src1 [bcast_dim_cnt] = src1->nb [i];
124
+ bcast_src0_ne [bcast_dim_cnt] = src0->ne [i] / nr;
125
+ bcast_src1_ne [bcast_dim_cnt] = src1->ne [i];
126
+ bcast_src0_nb [bcast_dim_cnt] = src0->nb [i];
127
+ bcast_src1_nb [bcast_dim_cnt] = src1->nb [i];
143
128
bcast_dim_cnt++;
144
129
if (nr != 1 ) {
145
130
// Need to add an extra dim.
146
- bcast_ne_src0 [bcast_dim_cnt] = nr;
147
- bcast_ne_src1 [bcast_dim_cnt] = 1 ;
148
- bcast_nb_src0 [bcast_dim_cnt] = bcast_nb_src0 [bcast_dim_cnt - 1 ] *
149
- bcast_ne_src0 [bcast_dim_cnt - 1 ];
150
- bcast_nb_src1 [bcast_dim_cnt] = bcast_nb_src1 [bcast_dim_cnt - 1 ] *
151
- bcast_ne_src1 [bcast_dim_cnt - 1 ];
131
+ bcast_src0_ne [bcast_dim_cnt] = nr;
132
+ bcast_src1_ne [bcast_dim_cnt] = 1 ;
133
+ bcast_src0_nb [bcast_dim_cnt] = bcast_src0_nb [bcast_dim_cnt - 1 ] *
134
+ bcast_src0_ne [bcast_dim_cnt - 1 ];
135
+ bcast_src1_nb [bcast_dim_cnt] = bcast_src1_nb [bcast_dim_cnt - 1 ] *
136
+ bcast_src1_ne [bcast_dim_cnt - 1 ];
152
137
bcast_dim_cnt++;
153
138
}
154
139
}
155
140
return bcast_dim_cnt;
156
141
}
157
142
158
- int64_t get_bcast_shape (const int64_t * src0_ne, const int64_t * src1_ne, const size_t * src0_nb, const size_t * src1_nb,
159
- int64_t * bcast_ne_src0, int64_t * bcast_ne_src1,
160
- size_t * bcast_nb_src0, size_t * bcast_nb_src1, int32_t start_dim) {
143
+ int64_t get_mul_mat_bcast_shape (const int64_t * input_ne,
144
+ const int64_t * weight_ne, const int64_t * dst_ne,
145
+ const size_t * input_nb, const size_t * weight_nb,
146
+ const size_t * dst_nb, int64_t * bcast_input_ne,
147
+ int64_t * bcast_weight_ne, int64_t * bcast_dst_ne,
148
+ size_t * bcast_input_nb, size_t * bcast_weight_nb,
149
+ size_t * bcast_dst_nb) {
150
+ // input and dst shoule in same shape, except first two dims.
151
+ GGML_ASSERT (input_ne[2 ] == dst_ne[2 ]);
152
+ GGML_ASSERT (input_ne[3 ] == dst_ne[3 ]);
153
+
161
154
int bcast_dim_cnt = 0 ;
162
- int i = 0 ;
163
- for (;i<start_dim;i++) {
164
- bcast_ne_src0[bcast_dim_cnt] = src0_ne[i];
165
- bcast_ne_src1[bcast_dim_cnt] = src1_ne[i];
166
- bcast_nb_src0[bcast_dim_cnt] = src0_nb[i];
167
- bcast_nb_src1[bcast_dim_cnt] = src1_nb [i];
168
- bcast_dim_cnt++;
169
- }
170
- for (; i < GGML_MAX_DIMS; i++ ) {
171
- int64_t nr = src0_ne[i] / src1_ne [i];
172
- if (nr != 1 ) {
173
- // Need to add an extra dim.
174
- bcast_ne_src0[bcast_dim_cnt] = nr;
175
- bcast_ne_src1 [bcast_dim_cnt] = 1 ;
176
- bcast_nb_src0 [bcast_dim_cnt] = src0_nb [i];
177
- bcast_nb_src1 [bcast_dim_cnt] = src1_nb [i];
155
+
156
+ // For mul_mat, a dimension needs to be added before the dimension that
157
+ // weight needs to be expanded to satisfy the bcast rule of matrix
158
+ // multiplication.
159
+ for ( int i = 0 ; i < GGML_MAX_DIMS; i++) {
160
+ int64_t nr = input_ne[i] / weight_ne [i];
161
+ // Do not use bcast in the first two dimensions because we only support
162
+ // the bcast batch dimension. Just copy them.
163
+ if ( i < 2 || nr == 1 ) {
164
+ bcast_input_ne[bcast_dim_cnt] = input_ne [i];
165
+ bcast_weight_ne[bcast_dim_cnt] = weight_ne[i];
166
+ bcast_dst_ne[bcast_dim_cnt] = dst_ne[i];
167
+
168
+ bcast_input_nb [bcast_dim_cnt] = input_nb[i] ;
169
+ bcast_weight_nb [bcast_dim_cnt] = weight_nb [i];
170
+ bcast_dst_nb [bcast_dim_cnt] = dst_nb [i];
178
171
bcast_dim_cnt++;
179
- bcast_ne_src0[bcast_dim_cnt] = src0_ne[i] / nr;
180
- bcast_ne_src1[bcast_dim_cnt] = src1_ne[i];
181
- bcast_nb_src0[bcast_dim_cnt] = bcast_nb_src0[bcast_dim_cnt - 1 ] * bcast_ne_src0[bcast_dim_cnt - 1 ];
182
- bcast_nb_src1[bcast_dim_cnt] = bcast_nb_src1[bcast_dim_cnt - 1 ] * bcast_ne_src1[bcast_dim_cnt - 1 ];
172
+ } else {
173
+ // Need to add an extra dim.
174
+ bcast_input_ne[bcast_dim_cnt] = nr;
175
+ bcast_dst_ne[bcast_dim_cnt] = nr;
176
+ bcast_weight_ne[bcast_dim_cnt] = 1 ;
177
+ bcast_input_nb[bcast_dim_cnt] = input_nb[i];
178
+ bcast_dst_nb[bcast_dim_cnt] = dst_nb[i];
179
+ bcast_weight_nb[bcast_dim_cnt] = weight_nb[i];
183
180
bcast_dim_cnt++;
184
- }
185
- else {
186
- bcast_ne_src0[bcast_dim_cnt] = src0_ne[i];
187
- bcast_ne_src1[bcast_dim_cnt] = src1_ne[i];
188
- bcast_nb_src0[bcast_dim_cnt] = src0_nb[i];
189
- bcast_nb_src1[bcast_dim_cnt] = src1_nb[i];
181
+
182
+ bcast_input_ne[bcast_dim_cnt] = input_ne[i] / nr;
183
+ bcast_dst_ne[bcast_dim_cnt] = dst_ne[i] / nr;
184
+ bcast_weight_ne[bcast_dim_cnt] = weight_ne[i];
185
+ bcast_input_nb[bcast_dim_cnt] = bcast_input_nb[bcast_dim_cnt - 1 ] *
186
+ bcast_input_ne[bcast_dim_cnt - 1 ];
187
+ bcast_dst_nb[bcast_dim_cnt] = bcast_dst_nb[bcast_dim_cnt - 1 ] *
188
+ bcast_dst_ne[bcast_dim_cnt - 1 ];
189
+ bcast_weight_nb[bcast_dim_cnt] =
190
+ bcast_weight_nb[bcast_dim_cnt - 1 ] *
191
+ bcast_weight_ne[bcast_dim_cnt - 1 ];
190
192
bcast_dim_cnt++;
191
193
}
192
194
}
193
195
return bcast_dim_cnt;
194
196
}
195
-
196
- /* *
197
- * Check if shape are not same, and no dim equals 1.
198
- * if any dim equals 1, acl kernel will do the broadcast.
199
- */
200
- bool need_bcast (const ggml_tensor* t0, const ggml_tensor* t1) {
201
- for (int i = 0 ; i < GGML_MAX_DIMS; i++) {
202
- if (t1->ne [i] != t0->ne [i] && t1->ne [i] != 1 ) {
203
- return true ;
204
- }
205
- }
206
- return false ;
207
- }
0 commit comments