Skip to content

Commit

Permalink
[CANN] Add Ascend NPU backend
Browse files Browse the repository at this point in the history
Ascend is a full-stack AI computing infrastructure for industry
applications and services based on Huawei Ascend processors and
software.

CANN (Compute Architecture of Neural Networks), developped by
Huawei, is a heterogeneous computing architecture for AI.

Co-authored-by: wangshuai09 <[email protected]>
  • Loading branch information
hipudding and wangshuai09 committed Jul 5, 2024
1 parent f09b7cb commit a135013
Show file tree
Hide file tree
Showing 31 changed files with 6,505 additions and 8 deletions.
4 changes: 4 additions & 0 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@
#define GGML_USE_CUDA_SYCL_VULKAN
#endif

#if (defined(GGML_USE_CUDA) || defined(GGML_USE_SYCL)) || defined(GGML_USE_CANN)
#define GGML_USE_CUDA_SYCL_CANN
#endif

#if defined(LLAMA_USE_CURL)
#ifdef __linux__
#include <linux/limits.h>
Expand Down
15 changes: 15 additions & 0 deletions examples/llama-bench/llama-bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
#include "ggml-cuda.h"
#include "ggml-sycl.h"

#ifdef GGML_USE_CANN
#include "ggml-cann.h"
#endif

// utils
static uint64_t get_time_ns() {
using clock = std::chrono::high_resolution_clock;
Expand Down Expand Up @@ -120,6 +124,17 @@ static std::string get_gpu_info() {
id += "/";
}
}
#endif
#ifdef GGML_USE_CANN
uint32_t count = ggml_backend_cann_get_device_count();
for (uint32_t i = 0; i < count; i++) {
char buf[128];
ggml_backend_cann_get_device_description(i, buf, sizeof(buf));
id += buf;
if (i < count - 1) {
id += "/";
}
}
#endif
// TODO: other backends
return id;
Expand Down
9 changes: 9 additions & 0 deletions examples/llava/clip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
#include "ggml-metal.h"
#endif

#ifdef GGML_USE_CANN
#include "ggml-cann.h"
#endif

#define STB_IMAGE_IMPLEMENTATION
#include "stb_image.h"

Expand Down Expand Up @@ -1001,6 +1005,11 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
LOG_TEE("%s: CLIP using Metal backend\n", __func__);
#endif

#ifdef GGML_USE_CANN
new_clip->backend = ggml_backend_cann_init(0);
printf("%s: CLIP using CANN backend\n", __func__);
#endif


if (!new_clip->backend) {
new_clip->backend = ggml_backend_cpu_init();
Expand Down
46 changes: 46 additions & 0 deletions ggml/include/ggml-cann.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#pragma once

#include "ggml-backend.h"
#include "ggml.h"

#define GGML_CANN_NAME "CANN"

#ifdef __cplusplus
extern "C" {
#endif

#define GGML_CANN_MAX_DEVICES 16

#define QK4_0 32
typedef struct {
uint16_t d; // delta
uint8_t qs[QK4_0 / 2]; // nibbles / quants
} block_q4_0;


#define QK8_0 32
typedef struct {
uint16_t d; // delta
int8_t qs[QK8_0]; // quants
} block_q8_0;

// backend API
GGML_API GGML_CALL ggml_backend_t ggml_backend_cann_init(int32_t device);

GGML_API GGML_CALL bool ggml_backend_is_cann(ggml_backend_t backend);

// device buffer
GGML_API GGML_CALL ggml_backend_buffer_type_t
ggml_backend_cann_buffer_type(int32_t device);

GGML_API GGML_CALL int32_t ggml_backend_cann_get_device_count(void);
GGML_API GGML_CALL void ggml_backend_cann_get_device_description(
int32_t device, char* description, size_t description_size);
GGML_API GGML_CALL void ggml_backend_cann_get_device_memory(int32_t device,
size_t* free,
size_t* total);
void ggml_cann_backend_init(void);
void ggml_cann_backend_free(void);
#ifdef __cplusplus
}
#endif
3 changes: 3 additions & 0 deletions ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,8 @@ extern "C" {
GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);

GGML_API bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1);

// use this to compute the memory overhead of a tensor
GGML_API size_t ggml_tensor_overhead(void);

Expand Down Expand Up @@ -2391,6 +2393,7 @@ extern "C" {
GGML_API int ggml_cpu_has_rpc (void);
GGML_API int ggml_cpu_has_vsx (void);
GGML_API int ggml_cpu_has_matmul_int8(void);
GGML_API int ggml_cpu_has_cann (void);

//
// Internal types and functions exposed for tests and benchmarks
Expand Down
69 changes: 69 additions & 0 deletions ggml/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,74 @@ if (GGML_CPU_HBM)
target_link_libraries(ggml PUBLIC memkind)
endif()

if (LLAMA_CANN)
if ("cann${CANN_INSTALL_DIR}" STREQUAL "cann" AND DEFINED ENV{ASCEND_TOOLKIT_HOME})
set(CANN_INSTALL_DIR $ENV{ASCEND_TOOLKIT_HOME})
message(STATUS "CANN: updated CANN_INSTALL_DIR from ASCEND_TOOLKIT_HOME=$ENV{ASCEND_TOOLKIT_HOME}")
endif()

if (CANN_INSTALL_DIR)
# Only Support Linux.
if (LLAMA_CANN)
if (NOT UNIX)
set(LLAMA_CANN OFF)
message(WARNING "CANN: CANN toolkit supports unix but not ${CMAKE_SYSTEM_NAME}. Turning off LLAMA_CANN")
endif()
endif()

# Supported platforms: x86-64, arm64
if (LLAMA_CANN)
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64")
else()
set(LLAMA_CANN OFF)
message(WARNING "CANN: CANN toolkit supports x86-64 and arm64 but not ${CMAKE_SYSTEM_PROCESSOR}. Turning off LLAMA_CANN")
endif()
endif()

# Set header and libs
if(LLAMA_CANN)
set(CANN_INCLUDE_DIRS
${CANN_INSTALL_DIR}/include
${CANN_INSTALL_DIR}/include/aclnn
${CANN_INSTALL_DIR}/acllib/include
)

# TODO: find libs
link_directories(
${CANN_INSTALL_DIR}/lib64
)

add_subdirectory(ggml-cann/kernels)
list(APPEND CANN_LIBRARIES
ascendcl
nnopbase
opapi
acl_op_compiler
ascendc_kernels
)

set(GGML_HEADERS_CANN "../include/ggml-cann.h")
file(GLOB GGML_SOURCES_CANN "ggml-cann/*.cpp")
list(APPEND GGML_SOURCES_CANN "ggml-cann.cpp")

message(STATUS "CANN: CANN_INCLUDE_DIRS = ${CANN_INCLUDE_DIRS}")
message(STATUS "CANN: CANN_LIBRARIES = ${CANN_LIBRARIES}")

set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${CANN_LIBRARIES} )
set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${CANN_INCLUDE_DIRS})
list(APPEND GGML_CDEF_PUBLIC GGML_USE_CANN)
endif()
else()
set(LLAMA_CANN OFF)
message(WARNING "CANN: Can't find CANN_INSTALL_DIR, do you forget to source set_var.sh. Turning off LLAMA_CANN")
endif()

if(NOT LLAMA_CANN)
message(WARNING "CANN: LLAMA_CANN is turned OFF, see above for details.")
endif()
endif()

function(get_flags CCID CCVER)
set(C_FLAGS "")
set(CXX_FLAGS "")
Expand Down Expand Up @@ -1153,6 +1221,7 @@ add_library(ggml
${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
${GGML_SOURCES_BLAS} ${GGML_HEADERS_BLAS}
${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE}
${GGML_SOURCES_CANN} ${GGML_HEADERS_CANN}
)

if (EMSCRIPTEN)
Expand Down
5 changes: 5 additions & 0 deletions ggml/src/ggml-backend.c
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,11 @@ GGML_CALL static void ggml_backend_registry_init(void) {
extern GGML_CALL void ggml_backend_kompute_reg_devices(void);
ggml_backend_kompute_reg_devices();
#endif

#ifdef GGML_USE_CANN
extern GGML_CALL int ggml_backend_cann_reg_devices(void);
ggml_backend_cann_reg_devices();
#endif
}

GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) {
Expand Down
Loading

0 comments on commit a135013

Please sign in to comment.