Skip to content

Commit 0aa6488

Browse files
committed
ggml-cpu: add __ARM_FEATURE_DOTPROD guard
1 parent f56013d commit 0aa6488

File tree

3 files changed

+25
-9
lines changed

3 files changed

+25
-9
lines changed

ggml/include/ggml-cpu.h

+1
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ extern "C" {
9191
GGML_BACKEND_API int ggml_cpu_has_neon (void);
9292
GGML_BACKEND_API int ggml_cpu_has_arm_fma (void);
9393
GGML_BACKEND_API int ggml_cpu_has_fp16_va (void);
94+
GGML_BACKEND_API int ggml_cpu_has_dotprod (void);
9495
GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
9596
GGML_BACKEND_API int ggml_cpu_has_sve (void);
9697
GGML_BACKEND_API int ggml_cpu_get_sve_cnt (void); // sve vector length in bytes

ggml/src/ggml-cpu/ggml-cpu-aarch64.c

+8-8
Original file line numberDiff line numberDiff line change
@@ -530,7 +530,7 @@ void ggml_gemv_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void *
530530
UNUSED(blocklen);
531531

532532
#if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON)
533-
if (ggml_cpu_has_neon()) {
533+
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
534534
const void * b_ptr = vx;
535535
const void * a_ptr = vy;
536536
float * res_ptr = s;
@@ -1017,8 +1017,8 @@ void ggml_gemv_iq4_nl_4x4_q8_0(int n, float * restrict s, size_t bs, const void
10171017
UNUSED(ncols_interleaved);
10181018
UNUSED(blocklen);
10191019

1020-
#if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON)
1021-
if (ggml_cpu_has_neon()) {
1020+
#if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON) && defined(__ARM_FEATURE_DOTPROD)
1021+
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
10221022
const int8x16_t kvalues = vld1q_s8(kvalues_iq4nl);
10231023
const block_q8_0 * a_ptr = (const block_q8_0 *) vy;
10241024
float * res_ptr = s;
@@ -1115,7 +1115,7 @@ void ggml_gemm_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void *
11151115
UNUSED(blocklen);
11161116

11171117
#if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON)
1118-
if (ggml_cpu_has_neon()) {
1118+
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
11191119
const void * b_ptr = vx;
11201120
const void * a_ptr = vy;
11211121
float * res_ptr = s;
@@ -3504,8 +3504,8 @@ void ggml_gemm_iq4_nl_4x4_q8_0(int n, float * restrict s, size_t bs, const void
35043504
UNUSED(ncols_interleaved);
35053505
UNUSED(blocklen);
35063506

3507-
#if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON)
3508-
if (ggml_cpu_has_neon()) {
3507+
#if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON) && defined(__ARM_FEATURE_DOTPROD)
3508+
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
35093509
const int8x16_t kvalues = vld1q_s8(kvalues_iq4nl);
35103510

35113511
for (int y = 0; y < nr / 4; y++) {
@@ -3834,11 +3834,11 @@ enum ggml_type ggml_aarch64_get_optimal_repack_type(const struct ggml_tensor * c
38343834
if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
38353835
return GGML_TYPE_Q4_0_4_8;
38363836
}
3837-
if (ggml_cpu_has_neon()) {
3837+
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
38383838
return GGML_TYPE_Q4_0_4_4;
38393839
}
38403840
} else if (cur->type == GGML_TYPE_IQ4_NL) {
3841-
if (ggml_cpu_has_neon()) {
3841+
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
38423842
return GGML_TYPE_IQ4_NL_4_4;
38433843
}
38443844
}

ggml/src/ggml-cpu/ggml-cpu.c

+16-1
Original file line numberDiff line numberDiff line change
@@ -109,10 +109,11 @@ static ggml_fp16_t ggml_table_gelu_quick_f16[1 << 16];
109109
#if defined(__ARM_ARCH)
110110
struct ggml_arm_arch_features_type {
111111
int has_neon;
112+
int has_dotprod;
112113
int has_i8mm;
113114
int has_sve;
114115
int sve_cnt;
115-
} ggml_arm_arch_features = {-1, -1, -1, 0};
116+
} ggml_arm_arch_features = {-1, -1, -1, -1, 0};
116117
#endif
117118

118119

@@ -2448,6 +2449,7 @@ static void ggml_init_arm_arch_features(void) {
24482449
uint32_t hwcap2 = getauxval(AT_HWCAP2);
24492450

24502451
ggml_arm_arch_features.has_neon = !!(hwcap & HWCAP_ASIMD);
2452+
ggml_arm_arch_features.has_dotprod = !!(hwcap && HWCAP_ASIMDDP);
24512453
ggml_arm_arch_features.has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
24522454
ggml_arm_arch_features.has_sve = !!(hwcap & HWCAP_SVE);
24532455

@@ -2462,6 +2464,11 @@ static void ggml_init_arm_arch_features(void) {
24622464
}
24632465
ggml_arm_arch_features.has_neon = oldp;
24642466

2467+
if (sysctlbyname("hw.optional.arm.FEAT_DotProd", &oldp, &size, NULL, 0) != 0) {
2468+
oldp = 0;
2469+
}
2470+
ggml_arm_arch_features.has_dotprod = oldp;
2471+
24652472
if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) != 0) {
24662473
oldp = 0;
24672474
}
@@ -13890,6 +13897,14 @@ int ggml_cpu_has_neon(void) {
1389013897
#endif
1389113898
}
1389213899

13900+
int ggml_cpu_has_dotprod(void) {
13901+
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_DOTPROD)
13902+
return ggml_arm_arch_features.has_dotprod;
13903+
#else
13904+
return 0;
13905+
#endif
13906+
}
13907+
1389313908
int ggml_cpu_has_sve(void) {
1389413909
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SVE)
1389513910
return ggml_arm_arch_features.has_sve;

0 commit comments

Comments
 (0)