Skip to content

Commit 0372d36

Browse files
committed
add comments
1 parent 70d51e2 commit 0372d36

File tree

4 files changed

+338
-236
lines changed

4 files changed

+338
-236
lines changed

ggml/src/ggml-cann/acl_ops.h

-72
This file was deleted.

ggml/src/ggml-cann/acl_tensor.cpp

+112-123
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,30 @@
1+
/**
2+
* Copyright (c) 2023-2024 The ggml authors
3+
*
4+
* Permission is hereby granted, free of charge, to any person obtaining a copy
5+
* of this software and associated documentation files (the "Software"), to
6+
* deal in the Software without restriction, including without limitation the
7+
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8+
* sell copies of the Software, and to permit persons to whom the Software is
9+
* furnished to do so, subject to the following conditions:
10+
*
11+
* The above copyright notice and this permission notice shall be included in
12+
* all copies or substantial portions of the Software.
13+
*
14+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20+
* IN THE SOFTWARE.
21+
*/
22+
123
#include "acl_tensor.h"
224

325
#include <algorithm>
426
#include <cstring>
527

6-
/**
7-
* Mapping ggml_tensor type to acl_tensor type.
8-
*/
928
aclDataType type_mapping(ggml_type type) {
1029
switch (type) {
1130
case GGML_TYPE_F32:
@@ -24,50 +43,51 @@ aclDataType type_mapping(ggml_type type) {
2443
return ACL_DT_UNDEFINED;
2544
}
2645

27-
28-
/**
29-
* Transform ggml_tensor to acl_tensor. Note that ggml_tensor dimension order
30-
* is reversed compared to acl_tensor.
31-
*
32-
* If bcast_ne and bcast_nb is nullptr, use ggml_tensor's ne and nb.
33-
* otherwise, use bcast_ne bcast_nb, which means tensor dims should be
34-
* changed to satisfy the broadcast. @sa: get_bcast_shape.
35-
*/
36-
aclTensor* create_acl_tensor(const ggml_tensor* tensor, int64_t* bcast_ne,
37-
size_t* bcast_nb, int64_t bcast_dims,
38-
aclFormat format, size_t offset) {
46+
aclTensor* create_acl_tensor(const ggml_tensor* tensor, int64_t* ne, size_t* nb,
47+
int64_t dims, aclFormat format, size_t offset) {
3948
// If tensor is bcasted, Up to GGML_MAX_DIMS additional dimensions will be
4049
// added.
4150
int64_t acl_ne[GGML_MAX_DIMS * 2], acl_stride[GGML_MAX_DIMS * 2];
42-
int64_t acl_storage_ne = 0;
43-
if (bcast_ne == nullptr) {
44-
acl_storage_ne = ggml_nbytes(tensor);
51+
52+
int64_t acl_storage_len = 0;
53+
if (ne == nullptr) {
54+
acl_storage_len = ggml_nbytes(tensor);
4555
for (int i = 0; i < GGML_MAX_DIMS; i++) {
4656
acl_ne[i] = tensor->ne[i];
4757
// The step size of acl is in elements.
4858
acl_stride[i] = tensor->nb[i] / ggml_element_size(tensor);
4959
}
5060
} else {
5161
// With bcast
52-
for (int i = 0; i < bcast_dims; i++) {
53-
acl_storage_ne += (bcast_ne[i] - 1)*bcast_nb[i];
54-
acl_ne[i] = bcast_ne[i];
55-
acl_stride[i] = bcast_nb[i] / ggml_element_size(tensor);
62+
for (int i = 0; i < dims; i++) {
63+
acl_storage_len += (ne[i] - 1) * nb[i];
64+
acl_ne[i] = ne[i];
65+
acl_stride[i] = nb[i] / ggml_element_size(tensor);
5666
}
5767
}
5868

59-
int64_t dims = (bcast_dims == 0 ? GGML_MAX_DIMS : bcast_dims);
60-
std::reverse(acl_ne, acl_ne + dims);
61-
std::reverse(acl_stride, acl_stride + dims);
69+
// Reverse ne and stride.
70+
int64_t final_dims = (dims == 0 ? GGML_MAX_DIMS : dims);
71+
std::reverse(acl_ne, acl_ne + final_dims);
72+
std::reverse(acl_stride, acl_stride + final_dims);
6273

63-
aclTensor* acl_tensor = aclCreateTensor(
64-
acl_ne, dims, type_mapping(tensor->type), acl_stride,
65-
offset / ggml_element_size(tensor), format, &acl_storage_ne, 1,
66-
tensor->data);
74+
aclTensor* acl_tensor =
75+
aclCreateTensor(acl_ne, final_dims, type_mapping(tensor->type),
76+
acl_stride, offset / ggml_element_size(tensor), format,
77+
&acl_storage_len, 1, tensor->data);
6778

6879
return acl_tensor;
6980
}
7081

82+
bool need_bcast(const ggml_tensor* t0, const ggml_tensor* t1) {
83+
for (int i = 0; i < GGML_MAX_DIMS; i++) {
84+
if (t1->ne[i] != t0->ne[i] && t1->ne[i] != 1) {
85+
return true;
86+
}
87+
}
88+
return false;
89+
}
90+
7191
aclTensor* create_acl_tensor(void* data_ptr, aclDataType dtype,
7292
size_t type_size, int64_t* ne, size_t* nb,
7393
int64_t dims, aclFormat format, size_t offset) {
@@ -82,126 +102,95 @@ aclTensor* create_acl_tensor(void* data_ptr, aclDataType dtype,
82102
std::reverse(tmp_ne, tmp_ne + dims);
83103
std::reverse(tmp_stride, tmp_stride + dims);
84104

85-
int64_t acl_storage_ne = 0;
105+
int64_t acl_storage_len = 0;
86106
for (int i = 0; i < dims; i++) {
87-
acl_storage_ne += (ne[i] - 1)*nb[i];
107+
acl_storage_len += (ne[i] - 1) * nb[i];
88108
}
89109

90-
aclTensor* acl_tensor = aclCreateTensor(tmp_ne, dims, dtype, tmp_stride,
91-
offset / type_size, format, &acl_storage_ne,
92-
1, data_ptr);
110+
aclTensor* acl_tensor =
111+
aclCreateTensor(tmp_ne, dims, dtype, tmp_stride, offset / type_size,
112+
format, &acl_storage_len, 1, data_ptr);
93113

94114
return acl_tensor;
95115
}
96116

97-
/**
98-
* Add extra dims to satisfy acl kernel's broadcast rules (same as numpy).
99-
* ggml_tensor dimension order is reversed compared to Python.
100-
* bcast src1 with src0 though adding a extra dim.
101-
* for example:
102-
* src0 -> (32,10,10,10)
103-
* src1 -> (16,10,10,10)
104-
* bcast_ne_src0 -> (16,2,10,10,10)
105-
* bcast_ne_src1 -> (16,1,10,10,10)
106-
*
107-
* if dim0 has padding.
108-
* a -> (2, 2) padding = 2
109-
* a: [[1, 2, *, *]
110-
* [2, 3, *, *]]
111-
* nb = (8, 4, 2)
112-
*
113-
* if a should bcast with b -> (2, 4)
114-
* b' -> (2, 2, 2)
115-
* b : [[1, 2, 3, 4, *, *]
116-
* [5, 6, 7, 8, *, *]]
117-
* nb = (12, 6, 1)
118-
*
119-
* after bcast:
120-
* a' -> (2, 1, 2)
121-
* a': [[[1, 2], *, *]
122-
* [[2, 3], *, *]]
123-
* nb = (8, 4, 2, 1)
124-
*
125-
* b' : [[[1, 2], [3, 4], *, *]
126-
* [[5, 6], [7, 8], *, *]]
127-
* nb = (12, 6, 2, 1)
128-
*
129-
* because dim1 in a inserted dim, should add nb for dim1,
130-
* and all other nb moves to next in order.
131-
*/
132117
int64_t get_bcast_shape(const ggml_tensor* src0, const ggml_tensor* src1,
133-
int64_t* bcast_ne_src0, int64_t* bcast_ne_src1,
134-
size_t* bcast_nb_src0, size_t* bcast_nb_src1) {
118+
int64_t* bcast_src0_ne, int64_t* bcast_src1_ne,
119+
size_t* bcast_src0_nb, size_t* bcast_src1_nb) {
135120
GGML_ASSERT(ggml_can_repeat(src1, src0));
136121
int bcast_dim_cnt = 0;
137122
for (int i = 0; i < GGML_MAX_DIMS; i++) {
138123
int64_t nr = src0->ne[i] / src1->ne[i];
139-
bcast_ne_src0[bcast_dim_cnt] = src0->ne[i] / nr;
140-
bcast_ne_src1[bcast_dim_cnt] = src1->ne[i];
141-
bcast_nb_src0[bcast_dim_cnt] = src0->nb[i];
142-
bcast_nb_src1[bcast_dim_cnt] = src1->nb[i];
124+
bcast_src0_ne[bcast_dim_cnt] = src0->ne[i] / nr;
125+
bcast_src1_ne[bcast_dim_cnt] = src1->ne[i];
126+
bcast_src0_nb[bcast_dim_cnt] = src0->nb[i];
127+
bcast_src1_nb[bcast_dim_cnt] = src1->nb[i];
143128
bcast_dim_cnt++;
144129
if (nr != 1) {
145130
// Need to add an extra dim.
146-
bcast_ne_src0[bcast_dim_cnt] = nr;
147-
bcast_ne_src1[bcast_dim_cnt] = 1;
148-
bcast_nb_src0[bcast_dim_cnt] = bcast_nb_src0[bcast_dim_cnt - 1] *
149-
bcast_ne_src0[bcast_dim_cnt - 1];
150-
bcast_nb_src1[bcast_dim_cnt] = bcast_nb_src1[bcast_dim_cnt - 1] *
151-
bcast_ne_src1[bcast_dim_cnt - 1];
131+
bcast_src0_ne[bcast_dim_cnt] = nr;
132+
bcast_src1_ne[bcast_dim_cnt] = 1;
133+
bcast_src0_nb[bcast_dim_cnt] = bcast_src0_nb[bcast_dim_cnt - 1] *
134+
bcast_src0_ne[bcast_dim_cnt - 1];
135+
bcast_src1_nb[bcast_dim_cnt] = bcast_src1_nb[bcast_dim_cnt - 1] *
136+
bcast_src1_ne[bcast_dim_cnt - 1];
152137
bcast_dim_cnt++;
153138
}
154139
}
155140
return bcast_dim_cnt;
156141
}
157142

158-
int64_t get_bcast_shape(const int64_t* src0_ne, const int64_t* src1_ne, const size_t* src0_nb, const size_t* src1_nb,
159-
int64_t* bcast_ne_src0, int64_t* bcast_ne_src1,
160-
size_t* bcast_nb_src0, size_t* bcast_nb_src1, int32_t start_dim) {
143+
int64_t get_mul_mat_bcast_shape(const int64_t* input_ne,
144+
const int64_t* weight_ne, const int64_t* dst_ne,
145+
const size_t* input_nb, const size_t* weight_nb,
146+
const size_t* dst_nb, int64_t* bcast_input_ne,
147+
int64_t* bcast_weight_ne, int64_t* bcast_dst_ne,
148+
size_t* bcast_input_nb, size_t* bcast_weight_nb,
149+
size_t* bcast_dst_nb) {
150+
// input and dst shoule in same shape, except first two dims.
151+
GGML_ASSERT(input_ne[2] == dst_ne[2]);
152+
GGML_ASSERT(input_ne[3] == dst_ne[3]);
153+
161154
int bcast_dim_cnt = 0;
162-
int i = 0;
163-
for(;i<start_dim;i++) {
164-
bcast_ne_src0[bcast_dim_cnt] = src0_ne[i];
165-
bcast_ne_src1[bcast_dim_cnt] = src1_ne[i];
166-
bcast_nb_src0[bcast_dim_cnt] = src0_nb[i];
167-
bcast_nb_src1[bcast_dim_cnt] = src1_nb[i];
168-
bcast_dim_cnt++;
169-
}
170-
for (;i < GGML_MAX_DIMS; i++) {
171-
int64_t nr = src0_ne[i] / src1_ne[i];
172-
if (nr != 1) {
173-
// Need to add an extra dim.
174-
bcast_ne_src0[bcast_dim_cnt] = nr;
175-
bcast_ne_src1[bcast_dim_cnt] = 1;
176-
bcast_nb_src0[bcast_dim_cnt] = src0_nb[i];
177-
bcast_nb_src1[bcast_dim_cnt] = src1_nb[i];
155+
156+
// For mul_mat, a dimension needs to be added before the dimension that
157+
// weight needs to be expanded to satisfy the bcast rule of matrix
158+
// multiplication.
159+
for (int i = 0; i < GGML_MAX_DIMS; i++) {
160+
int64_t nr = input_ne[i] / weight_ne[i];
161+
// Do not use bcast in the first two dimensions because we only support
162+
// the bcast batch dimension. Just copy them.
163+
if (i < 2 || nr == 1) {
164+
bcast_input_ne[bcast_dim_cnt] = input_ne[i];
165+
bcast_weight_ne[bcast_dim_cnt] = weight_ne[i];
166+
bcast_dst_ne[bcast_dim_cnt] = dst_ne[i];
167+
168+
bcast_input_nb[bcast_dim_cnt] = input_nb[i];
169+
bcast_weight_nb[bcast_dim_cnt] = weight_nb[i];
170+
bcast_dst_nb[bcast_dim_cnt] = dst_nb[i];
178171
bcast_dim_cnt++;
179-
bcast_ne_src0[bcast_dim_cnt] = src0_ne[i] / nr;
180-
bcast_ne_src1[bcast_dim_cnt] = src1_ne[i];
181-
bcast_nb_src0[bcast_dim_cnt] = bcast_nb_src0[bcast_dim_cnt - 1] * bcast_ne_src0[bcast_dim_cnt - 1];
182-
bcast_nb_src1[bcast_dim_cnt] = bcast_nb_src1[bcast_dim_cnt - 1] * bcast_ne_src1[bcast_dim_cnt - 1];
172+
} else {
173+
// Need to add an extra dim.
174+
bcast_input_ne[bcast_dim_cnt] = nr;
175+
bcast_dst_ne[bcast_dim_cnt] = nr;
176+
bcast_weight_ne[bcast_dim_cnt] = 1;
177+
bcast_input_nb[bcast_dim_cnt] = input_nb[i];
178+
bcast_dst_nb[bcast_dim_cnt] = dst_nb[i];
179+
bcast_weight_nb[bcast_dim_cnt] = weight_nb[i];
183180
bcast_dim_cnt++;
184-
}
185-
else {
186-
bcast_ne_src0[bcast_dim_cnt] = src0_ne[i];
187-
bcast_ne_src1[bcast_dim_cnt] = src1_ne[i];
188-
bcast_nb_src0[bcast_dim_cnt] = src0_nb[i];
189-
bcast_nb_src1[bcast_dim_cnt] = src1_nb[i];
181+
182+
bcast_input_ne[bcast_dim_cnt] = input_ne[i] / nr;
183+
bcast_dst_ne[bcast_dim_cnt] = dst_ne[i] / nr;
184+
bcast_weight_ne[bcast_dim_cnt] = weight_ne[i];
185+
bcast_input_nb[bcast_dim_cnt] = bcast_input_nb[bcast_dim_cnt - 1] *
186+
bcast_input_ne[bcast_dim_cnt - 1];
187+
bcast_dst_nb[bcast_dim_cnt] = bcast_dst_nb[bcast_dim_cnt - 1] *
188+
bcast_dst_ne[bcast_dim_cnt - 1];
189+
bcast_weight_nb[bcast_dim_cnt] =
190+
bcast_weight_nb[bcast_dim_cnt - 1] *
191+
bcast_weight_ne[bcast_dim_cnt - 1];
190192
bcast_dim_cnt++;
191193
}
192194
}
193195
return bcast_dim_cnt;
194196
}
195-
196-
/**
197-
* Check if shape are not same, and no dim equals 1.
198-
* if any dim equals 1, acl kernel will do the broadcast.
199-
*/
200-
bool need_bcast(const ggml_tensor* t0, const ggml_tensor* t1) {
201-
for (int i = 0; i < GGML_MAX_DIMS; i++) {
202-
if (t1->ne[i] != t0->ne[i] && t1->ne[i] != 1) {
203-
return true;
204-
}
205-
}
206-
return false;
207-
}

0 commit comments

Comments
 (0)