Skip to content

Commit 655a3fb

Browse files
committed
extract cpu extra bufts and convert to C++
- hbm - "aarch64"
1 parent 9849c64 commit 655a3fb

12 files changed

+327
-267
lines changed

Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -947,6 +947,7 @@ OBJ_GGML = \
947947
$(DIR_GGML)/src/ggml-cpu/ggml-cpu.o \
948948
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-cpp.o \
949949
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-aarch64.o \
950+
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-hbm.o \
950951
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-quants.o \
951952
$(OBJ_GGML_EXT)
952953

Package.swift

+2-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ var sources = [
1515
"ggml/src/ggml-backend-reg.cpp",
1616
"ggml/src/ggml-cpu/ggml-cpu.c",
1717
"ggml/src/ggml-cpu/ggml-cpu.cpp",
18-
"ggml/src/ggml-cpu/ggml-cpu-aarch64.c",
18+
"ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp",
19+
"ggml/src/ggml-cpu/ggml-cpu-hbm.cpp",
1920
"ggml/src/ggml-cpu/ggml-cpu-quants.c",
2021
"ggml/src/ggml-threading.cpp",
2122
"ggml/src/ggml-quants.c",

ggml/include/ggml-cpu.h

-7
Original file line numberDiff line numberDiff line change
@@ -156,13 +156,6 @@ extern "C" {
156156

157157
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
158158

159-
#ifdef GGML_USE_CPU_HBM
160-
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
161-
#endif
162-
163-
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void);
164-
GGML_BACKEND_API bool ggml_backend_cpu_buft_is_aarch64(ggml_backend_buffer_type_t buft);
165-
166159
#ifdef __cplusplus
167160
}
168161
#endif

ggml/src/ggml-common.h

+41-17
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,20 @@
66
typedef uint16_t ggml_half;
77
typedef uint32_t ggml_half2;
88

9-
#define GGML_COMMON_AGGR
9+
#define GGML_COMMON_AGGR_U
10+
#define GGML_COMMON_AGGR_S
11+
12+
#define GGML_COMMON_DECL
13+
#elif defined(GGML_COMMON_DECL_CPP)
14+
#include <cstdint>
15+
16+
typedef uint16_t ggml_half;
17+
typedef uint32_t ggml_half2;
18+
19+
// std-c++ allow anonymous unions but some compiler warn on it
20+
#define GGML_COMMON_AGGR_U data
21+
// std-c++ do not allow it.
22+
#define GGML_COMMON_AGGR_S data
1023

1124
#define GGML_COMMON_DECL
1225
#elif defined(GGML_COMMON_DECL_METAL)
@@ -15,7 +28,8 @@ typedef uint32_t ggml_half2;
1528
typedef half ggml_half;
1629
typedef half2 ggml_half2;
1730

18-
#define GGML_COMMON_AGGR
31+
#define GGML_COMMON_AGGR_U
32+
#define GGML_COMMON_AGGR_S
1933

2034
#define GGML_COMMON_DECL
2135
#elif defined(GGML_COMMON_DECL_CUDA)
@@ -29,7 +43,8 @@ typedef half2 ggml_half2;
2943
typedef half ggml_half;
3044
typedef half2 ggml_half2;
3145

32-
#define GGML_COMMON_AGGR data
46+
#define GGML_COMMON_AGGR_U
47+
#define GGML_COMMON_AGGR_S data
3348

3449
#define GGML_COMMON_DECL
3550
#elif defined(GGML_COMMON_DECL_HIP)
@@ -39,7 +54,8 @@ typedef half2 ggml_half2;
3954
typedef half ggml_half;
4055
typedef half2 ggml_half2;
4156

42-
#define GGML_COMMON_AGGR data
57+
#define GGML_COMMON_AGGR_U
58+
#define GGML_COMMON_AGGR_S data
4359

4460
#define GGML_COMMON_DECL
4561
#elif defined(GGML_COMMON_DECL_SYCL)
@@ -49,7 +65,8 @@ typedef half2 ggml_half2;
4965
typedef sycl::half ggml_half;
5066
typedef sycl::half2 ggml_half2;
5167

52-
#define GGML_COMMON_AGGR data
68+
#define GGML_COMMON_AGGR_U
69+
#define GGML_COMMON_AGGR_S data
5370

5471
#define GGML_COMMON_DECL
5572
#endif
@@ -154,9 +171,9 @@ typedef struct {
154171
struct {
155172
ggml_half d; // delta
156173
ggml_half m; // min
157-
} GGML_COMMON_AGGR;
174+
} GGML_COMMON_AGGR_S;
158175
ggml_half2 dm;
159-
};
176+
} GGML_COMMON_AGGR_U;
160177
uint8_t qs[QK4_1 / 2]; // nibbles / quants
161178
} block_q4_1;
162179
static_assert(sizeof(block_q4_1) == 2 * sizeof(ggml_half) + QK4_1 / 2, "wrong q4_1 block size/padding");
@@ -175,9 +192,9 @@ typedef struct {
175192
struct {
176193
ggml_half d; // delta
177194
ggml_half m; // min
178-
} GGML_COMMON_AGGR;
195+
} GGML_COMMON_AGGR_S;
179196
ggml_half2 dm;
180-
};
197+
} GGML_COMMON_AGGR_U;
181198
uint8_t qh[4]; // 5-th bit of quants
182199
uint8_t qs[QK5_1 / 2]; // nibbles / quants
183200
} block_q5_1;
@@ -196,9 +213,9 @@ typedef struct {
196213
struct {
197214
ggml_half d; // delta
198215
ggml_half s; // d * sum(qs[i])
199-
} GGML_COMMON_AGGR;
216+
} GGML_COMMON_AGGR_S;
200217
ggml_half2 ds;
201-
};
218+
} GGML_COMMON_AGGR_U;
202219
int8_t qs[QK8_1]; // quants
203220
} block_q8_1;
204221
static_assert(sizeof(block_q8_1) == 2*sizeof(ggml_half) + QK8_1, "wrong q8_1 block size/padding");
@@ -237,9 +254,9 @@ typedef struct {
237254
struct {
238255
ggml_half d; // super-block scale for quantized scales
239256
ggml_half dmin; // super-block scale for quantized mins
240-
} GGML_COMMON_AGGR;
257+
} GGML_COMMON_AGGR_S;
241258
ggml_half2 dm;
242-
};
259+
} GGML_COMMON_AGGR_U;
243260
} block_q2_K;
244261
static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_half) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding");
245262

@@ -264,9 +281,9 @@ typedef struct {
264281
struct {
265282
ggml_half d; // super-block scale for quantized scales
266283
ggml_half dmin; // super-block scale for quantized mins
267-
} GGML_COMMON_AGGR;
284+
} GGML_COMMON_AGGR_S;
268285
ggml_half2 dm;
269-
};
286+
} GGML_COMMON_AGGR_U;
270287
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
271288
uint8_t qs[QK_K/2]; // 4--bit quants
272289
} block_q4_K;
@@ -281,9 +298,9 @@ typedef struct {
281298
struct {
282299
ggml_half d; // super-block scale for quantized scales
283300
ggml_half dmin; // super-block scale for quantized mins
284-
} GGML_COMMON_AGGR;
301+
} GGML_COMMON_AGGR_S;
285302
ggml_half2 dm;
286-
};
303+
} GGML_COMMON_AGGR_U;
287304
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
288305
uint8_t qh[QK_K/8]; // quants, high bit
289306
uint8_t qs[QK_K/2]; // quants, low 4 bits
@@ -407,6 +424,13 @@ static_assert(sizeof(block_iq4_xs) == sizeof(ggml_half) + sizeof(uint16_t) + QK_
407424
#define GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
408425
#define GGML_TABLE_END() };
409426

427+
#define GGML_COMMON_IMPL
428+
#elif defined(GGML_COMMON_IMPL_CPP)
429+
#include <cstdint>
430+
431+
#define GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
432+
#define GGML_TABLE_END() };
433+
410434
#define GGML_COMMON_IMPL
411435
#elif defined(GGML_COMMON_IMPL_METAL)
412436
#include <metal_stdlib>

ggml/src/ggml-cpu/CMakeLists.txt

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
add_library(ggml-cpu
22
ggml-cpu.c
33
ggml-cpu.cpp
4-
ggml-cpu-aarch64.c
4+
ggml-cpu-aarch64.cpp
55
ggml-cpu-aarch64.h
6+
ggml-cpu-hbm.cpp
7+
ggml-cpu-hbm.h
68
ggml-cpu-quants.c
79
ggml-cpu-quants.h
810
)

0 commit comments

Comments
 (0)