Skip to content

Commit

Permalink
add cann legacy mempool
Browse files Browse the repository at this point in the history
  • Loading branch information
hipudding committed Jul 10, 2024
1 parent e9a550c commit 63947bc
Show file tree
Hide file tree
Showing 7 changed files with 482 additions and 415 deletions.
145 changes: 103 additions & 42 deletions ggml/src/ggml-cann.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
#include <mutex>

#include "ggml-backend-impl.h"
#include "ggml-cann/acl_ops.h"
#include "ggml-cann/aclnn_ops.h"
#include "ggml-cann/common.h"

Expand Down Expand Up @@ -64,30 +63,122 @@ const ggml_cann_device_info& ggml_cann_info() {
return info;
}

#define DEBUG_CANN_MALLOC

// buffer pool for cann (legacy)
struct ggml_cann_pool_leg : public ggml_cann_pool {
static const int MAX_BUFFERS = 256;

int device;
struct ggml_cann_buffer {
void * ptr = nullptr;
size_t size = 0;
};

ggml_cann_buffer buffer_pool[MAX_BUFFERS] = {};
size_t pool_size = 0;

explicit ggml_cann_pool_leg(int device) :
device(device) {
}

~ggml_cann_pool_leg() {
ggml_cann_set_device(device);
for (int i = 0; i < MAX_BUFFERS; ++i) {
ggml_cann_buffer & b = buffer_pool[i];
if (b.ptr != nullptr) {
ACL_CHECK(aclrtFree(b.ptr));
pool_size -= b.size;
}
}
GGML_ASSERT(pool_size == 0);
}

void * alloc(size_t size, size_t * actual_size) override {
#ifdef DEBUG_CANN_MALLOC
int nnz = 0;
size_t max_size = 0;
#endif
size_t best_diff = 1ull << 36;
int ibest = -1;
for (int i = 0; i < MAX_BUFFERS; ++i) {
ggml_cann_buffer& b = buffer_pool[i];
if (b.ptr != nullptr) {
#ifdef DEBUG_CANN_MALLOC
++nnz;
if (b.size > max_size) max_size = b.size;
#endif
if (b.size >= size) {
size_t diff = b.size - size;
if (diff < best_diff) {
best_diff = diff;
ibest = i;
if (!best_diff) {
void * ptr = b.ptr;
*actual_size = b.size;
b.ptr = nullptr;
b.size = 0;
return ptr;
}
}
}
}
}
if (ibest >= 0) {
ggml_cann_buffer& b = buffer_pool[ibest];
void * ptr = b.ptr;
*actual_size = b.size;
b.ptr = nullptr;
b.size = 0;
return ptr;
}
void * ptr;
size_t look_ahead_size = (size_t) (1.05 * size);
look_ahead_size = 256 * ((look_ahead_size + 255)/256);
ggml_cann_set_device(device);
ACL_CHECK(aclrtMalloc(&ptr, look_ahead_size, ACL_MEM_MALLOC_HUGE_FIRST));
*actual_size = look_ahead_size;
pool_size += look_ahead_size;
#ifdef DEBUG_CANN_MALLOC
printf("%s[%d]: %d buffers, max_size = %u MB, pool_size = %u MB, requested %u MB\n", __func__, device, nnz,
(uint32_t)(max_size / 1024 / 1024), (uint32_t)(pool_size / 1024 / 1024), (uint32_t)(size / 1024 / 1024));
#endif
return ptr;
}

void free(void * ptr, size_t size) override {
for (int i = 0; i < MAX_BUFFERS; ++i) {
ggml_cann_buffer& b = buffer_pool[i];
if (b.ptr == nullptr) {
b.ptr = ptr;
b.size = size;
return;
}
}
// memory should always buffered. these memory may still needed by
// tasks in stream.
// TODO, fix me.
GGML_ASSERT(!"Cann buffer pool full, increase MAX_CANN_BUFFERS\n");
}
};

std::unique_ptr<ggml_cann_pool> ggml_backend_cann_context::new_pool_for_device(int device) {
return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_leg(device));
}

// cann buffer
struct ggml_backend_cann_buffer_context {
int32_t device;
void* dev_ptr = nullptr;
std::string name;
std::vector<void*> dev_extra_ptrs;

ggml_backend_cann_buffer_context(int32_t device, void* dev_ptr)
: device(device),
dev_ptr(dev_ptr),
name(GGML_CANN_NAME + std::to_string(device)) {}

void* get_extra_ptr(size_t size) {
void* buffer;
ACL_CHECK(aclrtMalloc(&buffer, size, ACL_MEM_MALLOC_HUGE_FIRST));
dev_extra_ptrs.push_back(buffer);
return buffer;
}

~ggml_backend_cann_buffer_context() {
ACL_CHECK(aclrtFree(dev_ptr));
for (auto dev_extra_ptr : dev_extra_ptrs) {
ACL_CHECK(aclrtFree(dev_extra_ptr));
}
}
};

Expand Down Expand Up @@ -270,32 +361,10 @@ GGML_CALL static bool need_transform(ggml_type type) {
}
}

static void set_tensor_extra(ggml_backend_buffer_t buffer,
ggml_tensor* tensor) {
// if tensor is need transform, make sure all meta data are copied to
// npu.
// TODO: All tensors should copy meta data to npu, but extra is used to
// record memory usage. Only used for perf test.
size_t tensor_meta_size = sizeof(ggml_tensor);
ggml_backend_cann_buffer_context* ctx =
(ggml_backend_cann_buffer_context*)buffer->context;
tensor->extra = ctx->get_extra_ptr(tensor_meta_size);
ACL_CHECK(aclrtMemcpy(tensor->extra, tensor_meta_size, tensor,
tensor_meta_size, ACL_MEMCPY_HOST_TO_DEVICE));
}

static void update_tensor_extra(ggml_tensor* tensor) {
// when tensor->ne/nb changed, make sure ne/nb in extra data also changed.
size_t tensor_meta_size = sizeof(ggml_tensor);
ACL_CHECK(aclrtMemcpy(tensor->extra, tensor_meta_size, tensor,
tensor_meta_size, ACL_MEMCPY_HOST_TO_DEVICE));
}

GGML_CALL static void ggml_backend_cann_buffer_init_tensor(
ggml_backend_buffer_t buffer, ggml_tensor* tensor) {
if (tensor->view_src != NULL && tensor->view_offs == 0) {
GGML_ASSERT(tensor->view_src->buffer->buft == buffer->buft);
set_tensor_extra(buffer, tensor);
return;
}

Expand All @@ -313,7 +382,6 @@ GGML_CALL static void ggml_backend_cann_buffer_init_tensor(
memset_size, 0, memset_size));
}
}
set_tensor_extra(buffer, tensor);
}

// TODO: need handle tensor which pas paddings.
Expand Down Expand Up @@ -650,7 +718,6 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
case GGML_OP_VIEW:
case GGML_OP_PERMUTE:
case GGML_OP_TRANSPOSE:
update_tensor_extra(dst);
break;
case GGML_OP_DIAG_MASK_INF:
ggml_cann_diag_mask(ctx, dst, -INFINITY);
Expand Down Expand Up @@ -692,7 +759,6 @@ GGML_CALL static void ggml_backend_cann_free(ggml_backend_t backend) {
ggml_backend_cann_context* cann_ctx =
(ggml_backend_cann_context*)backend->context;
ACL_CHECK(aclrtSynchronizeDevice());
cann_ctx->free_device_buffers();
ACL_CHECK(aclrtResetDevice(cann_ctx->device));
delete cann_ctx;
delete backend;
Expand Down Expand Up @@ -837,9 +903,6 @@ GGML_CALL static void ggml_backend_cann_synchronize(ggml_backend_t backend) {
ggml_cann_set_device(cann_ctx->device);

ACL_CHECK(aclrtSynchronizeStream(cann_ctx->stream()));

// Free temp buffers binding to stream.
cann_ctx->free_stream_buffers(0);
}

GGML_CALL static enum ggml_status ggml_backend_cann_graph_compute(
Expand All @@ -856,8 +919,6 @@ GGML_CALL static enum ggml_status ggml_backend_cann_graph_compute(
continue;
}

// if tensor is reused, free temp buffers first.
cann_ctx->free_tensor_buffers(node);
bool ok = ggml_cann_compute_forward(*cann_ctx, node);

if (!ok) {
Expand Down
80 changes: 0 additions & 80 deletions ggml/src/ggml-cann/acl_ops.cpp

This file was deleted.

1 change: 1 addition & 0 deletions ggml/src/ggml-cann/acl_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ aclDataType type_mapping(ggml_type type) {
return ACL_DT_UNDEFINED;
}


/**
* Transform ggml_tensor to acl_tensor. Note that ggml_tensor dimension order
* is reversed compared to acl_tensor.
Expand Down
Loading

0 comments on commit 63947bc

Please sign in to comment.