Skip to content

Commit

Permalink
add comments
Browse files Browse the repository at this point in the history
  • Loading branch information
hipudding committed Jul 15, 2024
1 parent 70d51e2 commit 0372d36
Show file tree
Hide file tree
Showing 4 changed files with 338 additions and 236 deletions.
72 changes: 0 additions & 72 deletions ggml/src/ggml-cann/acl_ops.h

This file was deleted.

235 changes: 112 additions & 123 deletions ggml/src/ggml-cann/acl_tensor.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,30 @@
/**
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/

#include "acl_tensor.h"

#include <algorithm>
#include <cstring>

/**
* Mapping ggml_tensor type to acl_tensor type.
*/
aclDataType type_mapping(ggml_type type) {
switch (type) {
case GGML_TYPE_F32:
Expand All @@ -24,50 +43,51 @@ aclDataType type_mapping(ggml_type type) {
return ACL_DT_UNDEFINED;
}


/**
* Transform ggml_tensor to acl_tensor. Note that ggml_tensor dimension order
* is reversed compared to acl_tensor.
*
* If bcast_ne and bcast_nb is nullptr, use ggml_tensor's ne and nb.
* otherwise, use bcast_ne bcast_nb, which means tensor dims should be
* changed to satisfy the broadcast. @sa: get_bcast_shape.
*/
aclTensor* create_acl_tensor(const ggml_tensor* tensor, int64_t* bcast_ne,
size_t* bcast_nb, int64_t bcast_dims,
aclFormat format, size_t offset) {
aclTensor* create_acl_tensor(const ggml_tensor* tensor, int64_t* ne, size_t* nb,
int64_t dims, aclFormat format, size_t offset) {
// If tensor is bcasted, Up to GGML_MAX_DIMS additional dimensions will be
// added.
int64_t acl_ne[GGML_MAX_DIMS * 2], acl_stride[GGML_MAX_DIMS * 2];
int64_t acl_storage_ne = 0;
if (bcast_ne == nullptr) {
acl_storage_ne = ggml_nbytes(tensor);

int64_t acl_storage_len = 0;
if (ne == nullptr) {
acl_storage_len = ggml_nbytes(tensor);
for (int i = 0; i < GGML_MAX_DIMS; i++) {
acl_ne[i] = tensor->ne[i];
// The step size of acl is in elements.
acl_stride[i] = tensor->nb[i] / ggml_element_size(tensor);
}
} else {
// With bcast
for (int i = 0; i < bcast_dims; i++) {
acl_storage_ne += (bcast_ne[i] - 1)*bcast_nb[i];
acl_ne[i] = bcast_ne[i];
acl_stride[i] = bcast_nb[i] / ggml_element_size(tensor);
for (int i = 0; i < dims; i++) {
acl_storage_len += (ne[i] - 1) * nb[i];
acl_ne[i] = ne[i];
acl_stride[i] = nb[i] / ggml_element_size(tensor);
}
}

int64_t dims = (bcast_dims == 0 ? GGML_MAX_DIMS : bcast_dims);
std::reverse(acl_ne, acl_ne + dims);
std::reverse(acl_stride, acl_stride + dims);
// Reverse ne and stride.
int64_t final_dims = (dims == 0 ? GGML_MAX_DIMS : dims);
std::reverse(acl_ne, acl_ne + final_dims);
std::reverse(acl_stride, acl_stride + final_dims);

aclTensor* acl_tensor = aclCreateTensor(
acl_ne, dims, type_mapping(tensor->type), acl_stride,
offset / ggml_element_size(tensor), format, &acl_storage_ne, 1,
tensor->data);
aclTensor* acl_tensor =
aclCreateTensor(acl_ne, final_dims, type_mapping(tensor->type),
acl_stride, offset / ggml_element_size(tensor), format,
&acl_storage_len, 1, tensor->data);

return acl_tensor;
}

bool need_bcast(const ggml_tensor* t0, const ggml_tensor* t1) {
for (int i = 0; i < GGML_MAX_DIMS; i++) {
if (t1->ne[i] != t0->ne[i] && t1->ne[i] != 1) {
return true;
}
}
return false;
}

aclTensor* create_acl_tensor(void* data_ptr, aclDataType dtype,
size_t type_size, int64_t* ne, size_t* nb,
int64_t dims, aclFormat format, size_t offset) {
Expand All @@ -82,126 +102,95 @@ aclTensor* create_acl_tensor(void* data_ptr, aclDataType dtype,
std::reverse(tmp_ne, tmp_ne + dims);
std::reverse(tmp_stride, tmp_stride + dims);

int64_t acl_storage_ne = 0;
int64_t acl_storage_len = 0;
for (int i = 0; i < dims; i++) {
acl_storage_ne += (ne[i] - 1)*nb[i];
acl_storage_len += (ne[i] - 1) * nb[i];
}

aclTensor* acl_tensor = aclCreateTensor(tmp_ne, dims, dtype, tmp_stride,
offset / type_size, format, &acl_storage_ne,
1, data_ptr);
aclTensor* acl_tensor =
aclCreateTensor(tmp_ne, dims, dtype, tmp_stride, offset / type_size,
format, &acl_storage_len, 1, data_ptr);

return acl_tensor;
}

/**
* Add extra dims to satisfy acl kernel's broadcast rules (same as numpy).
* ggml_tensor dimension order is reversed compared to Python.
* bcast src1 with src0 though adding a extra dim.
* for example:
* src0 -> (32,10,10,10)
* src1 -> (16,10,10,10)
* bcast_ne_src0 -> (16,2,10,10,10)
* bcast_ne_src1 -> (16,1,10,10,10)
*
* if dim0 has padding.
* a -> (2, 2) padding = 2
* a: [[1, 2, *, *]
* [2, 3, *, *]]
* nb = (8, 4, 2)
*
* if a should bcast with b -> (2, 4)
* b' -> (2, 2, 2)
* b : [[1, 2, 3, 4, *, *]
* [5, 6, 7, 8, *, *]]
* nb = (12, 6, 1)
*
* after bcast:
* a' -> (2, 1, 2)
* a': [[[1, 2], *, *]
* [[2, 3], *, *]]
* nb = (8, 4, 2, 1)
*
* b' : [[[1, 2], [3, 4], *, *]
* [[5, 6], [7, 8], *, *]]
* nb = (12, 6, 2, 1)
*
* because dim1 in a inserted dim, should add nb for dim1,
* and all other nb moves to next in order.
*/
int64_t get_bcast_shape(const ggml_tensor* src0, const ggml_tensor* src1,
int64_t* bcast_ne_src0, int64_t* bcast_ne_src1,
size_t* bcast_nb_src0, size_t* bcast_nb_src1) {
int64_t* bcast_src0_ne, int64_t* bcast_src1_ne,
size_t* bcast_src0_nb, size_t* bcast_src1_nb) {
GGML_ASSERT(ggml_can_repeat(src1, src0));
int bcast_dim_cnt = 0;
for (int i = 0; i < GGML_MAX_DIMS; i++) {
int64_t nr = src0->ne[i] / src1->ne[i];
bcast_ne_src0[bcast_dim_cnt] = src0->ne[i] / nr;
bcast_ne_src1[bcast_dim_cnt] = src1->ne[i];
bcast_nb_src0[bcast_dim_cnt] = src0->nb[i];
bcast_nb_src1[bcast_dim_cnt] = src1->nb[i];
bcast_src0_ne[bcast_dim_cnt] = src0->ne[i] / nr;
bcast_src1_ne[bcast_dim_cnt] = src1->ne[i];
bcast_src0_nb[bcast_dim_cnt] = src0->nb[i];
bcast_src1_nb[bcast_dim_cnt] = src1->nb[i];
bcast_dim_cnt++;
if (nr != 1) {
// Need to add an extra dim.
bcast_ne_src0[bcast_dim_cnt] = nr;
bcast_ne_src1[bcast_dim_cnt] = 1;
bcast_nb_src0[bcast_dim_cnt] = bcast_nb_src0[bcast_dim_cnt - 1] *
bcast_ne_src0[bcast_dim_cnt - 1];
bcast_nb_src1[bcast_dim_cnt] = bcast_nb_src1[bcast_dim_cnt - 1] *
bcast_ne_src1[bcast_dim_cnt - 1];
bcast_src0_ne[bcast_dim_cnt] = nr;
bcast_src1_ne[bcast_dim_cnt] = 1;
bcast_src0_nb[bcast_dim_cnt] = bcast_src0_nb[bcast_dim_cnt - 1] *
bcast_src0_ne[bcast_dim_cnt - 1];
bcast_src1_nb[bcast_dim_cnt] = bcast_src1_nb[bcast_dim_cnt - 1] *
bcast_src1_ne[bcast_dim_cnt - 1];
bcast_dim_cnt++;
}
}
return bcast_dim_cnt;
}

int64_t get_bcast_shape(const int64_t* src0_ne, const int64_t* src1_ne, const size_t* src0_nb, const size_t* src1_nb,
int64_t* bcast_ne_src0, int64_t* bcast_ne_src1,
size_t* bcast_nb_src0, size_t* bcast_nb_src1, int32_t start_dim) {
int64_t get_mul_mat_bcast_shape(const int64_t* input_ne,
const int64_t* weight_ne, const int64_t* dst_ne,
const size_t* input_nb, const size_t* weight_nb,
const size_t* dst_nb, int64_t* bcast_input_ne,
int64_t* bcast_weight_ne, int64_t* bcast_dst_ne,
size_t* bcast_input_nb, size_t* bcast_weight_nb,
size_t* bcast_dst_nb) {
// input and dst shoule in same shape, except first two dims.
GGML_ASSERT(input_ne[2] == dst_ne[2]);
GGML_ASSERT(input_ne[3] == dst_ne[3]);

int bcast_dim_cnt = 0;
int i = 0;
for(;i<start_dim;i++) {
bcast_ne_src0[bcast_dim_cnt] = src0_ne[i];
bcast_ne_src1[bcast_dim_cnt] = src1_ne[i];
bcast_nb_src0[bcast_dim_cnt] = src0_nb[i];
bcast_nb_src1[bcast_dim_cnt] = src1_nb[i];
bcast_dim_cnt++;
}
for (;i < GGML_MAX_DIMS; i++) {
int64_t nr = src0_ne[i] / src1_ne[i];
if (nr != 1) {
// Need to add an extra dim.
bcast_ne_src0[bcast_dim_cnt] = nr;
bcast_ne_src1[bcast_dim_cnt] = 1;
bcast_nb_src0[bcast_dim_cnt] = src0_nb[i];
bcast_nb_src1[bcast_dim_cnt] = src1_nb[i];

// For mul_mat, a dimension needs to be added before the dimension that
// weight needs to be expanded to satisfy the bcast rule of matrix
// multiplication.
for (int i = 0; i < GGML_MAX_DIMS; i++) {
int64_t nr = input_ne[i] / weight_ne[i];
// Do not use bcast in the first two dimensions because we only support
// the bcast batch dimension. Just copy them.
if (i < 2 || nr == 1) {
bcast_input_ne[bcast_dim_cnt] = input_ne[i];
bcast_weight_ne[bcast_dim_cnt] = weight_ne[i];
bcast_dst_ne[bcast_dim_cnt] = dst_ne[i];

bcast_input_nb[bcast_dim_cnt] = input_nb[i];
bcast_weight_nb[bcast_dim_cnt] = weight_nb[i];
bcast_dst_nb[bcast_dim_cnt] = dst_nb[i];
bcast_dim_cnt++;
bcast_ne_src0[bcast_dim_cnt] = src0_ne[i] / nr;
bcast_ne_src1[bcast_dim_cnt] = src1_ne[i];
bcast_nb_src0[bcast_dim_cnt] = bcast_nb_src0[bcast_dim_cnt - 1] * bcast_ne_src0[bcast_dim_cnt - 1];
bcast_nb_src1[bcast_dim_cnt] = bcast_nb_src1[bcast_dim_cnt - 1] * bcast_ne_src1[bcast_dim_cnt - 1];
} else {
// Need to add an extra dim.
bcast_input_ne[bcast_dim_cnt] = nr;
bcast_dst_ne[bcast_dim_cnt] = nr;
bcast_weight_ne[bcast_dim_cnt] = 1;
bcast_input_nb[bcast_dim_cnt] = input_nb[i];
bcast_dst_nb[bcast_dim_cnt] = dst_nb[i];
bcast_weight_nb[bcast_dim_cnt] = weight_nb[i];
bcast_dim_cnt++;
}
else {
bcast_ne_src0[bcast_dim_cnt] = src0_ne[i];
bcast_ne_src1[bcast_dim_cnt] = src1_ne[i];
bcast_nb_src0[bcast_dim_cnt] = src0_nb[i];
bcast_nb_src1[bcast_dim_cnt] = src1_nb[i];

bcast_input_ne[bcast_dim_cnt] = input_ne[i] / nr;
bcast_dst_ne[bcast_dim_cnt] = dst_ne[i] / nr;
bcast_weight_ne[bcast_dim_cnt] = weight_ne[i];
bcast_input_nb[bcast_dim_cnt] = bcast_input_nb[bcast_dim_cnt - 1] *
bcast_input_ne[bcast_dim_cnt - 1];
bcast_dst_nb[bcast_dim_cnt] = bcast_dst_nb[bcast_dim_cnt - 1] *
bcast_dst_ne[bcast_dim_cnt - 1];
bcast_weight_nb[bcast_dim_cnt] =
bcast_weight_nb[bcast_dim_cnt - 1] *
bcast_weight_ne[bcast_dim_cnt - 1];
bcast_dim_cnt++;
}
}
return bcast_dim_cnt;
}

/**
* Check if shape are not same, and no dim equals 1.
* if any dim equals 1, acl kernel will do the broadcast.
*/
bool need_bcast(const ggml_tensor* t0, const ggml_tensor* t1) {
for (int i = 0; i < GGML_MAX_DIMS; i++) {
if (t1->ne[i] != t0->ne[i] && t1->ne[i] != 1) {
return true;
}
}
return false;
}
Loading

0 comments on commit 0372d36

Please sign in to comment.