@@ -530,7 +530,7 @@ void ggml_gemv_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void *
530
530
UNUSED (blocklen );
531
531
532
532
#if ! ((defined(_MSC_VER )) && ! defined(__clang__ )) && defined(__aarch64__ ) && defined(__ARM_NEON )
533
- if (ggml_cpu_has_neon ()) {
533
+ if (ggml_cpu_has_neon () && ggml_cpu_has_dotprod () ) {
534
534
const void * b_ptr = vx ;
535
535
const void * a_ptr = vy ;
536
536
float * res_ptr = s ;
@@ -1017,8 +1017,8 @@ void ggml_gemv_iq4_nl_4x4_q8_0(int n, float * restrict s, size_t bs, const void
1017
1017
UNUSED (ncols_interleaved );
1018
1018
UNUSED (blocklen );
1019
1019
1020
- #if ! ((defined(_MSC_VER )) && ! defined(__clang__ )) && defined(__aarch64__ ) && defined(__ARM_NEON )
1021
- if (ggml_cpu_has_neon ()) {
1020
+ #if ! ((defined(_MSC_VER )) && ! defined(__clang__ )) && defined(__aarch64__ ) && defined(__ARM_NEON ) && defined( __ARM_FEATURE_DOTPROD )
1021
+ if (ggml_cpu_has_neon () && ggml_cpu_has_dotprod () ) {
1022
1022
const int8x16_t kvalues = vld1q_s8 (kvalues_iq4nl );
1023
1023
const block_q8_0 * a_ptr = (const block_q8_0 * ) vy ;
1024
1024
float * res_ptr = s ;
@@ -1115,7 +1115,7 @@ void ggml_gemm_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void *
1115
1115
UNUSED (blocklen );
1116
1116
1117
1117
#if ! ((defined(_MSC_VER )) && ! defined(__clang__ )) && defined(__aarch64__ ) && defined(__ARM_NEON )
1118
- if (ggml_cpu_has_neon ()) {
1118
+ if (ggml_cpu_has_neon () && ggml_cpu_has_dotprod () ) {
1119
1119
const void * b_ptr = vx ;
1120
1120
const void * a_ptr = vy ;
1121
1121
float * res_ptr = s ;
@@ -3504,8 +3504,8 @@ void ggml_gemm_iq4_nl_4x4_q8_0(int n, float * restrict s, size_t bs, const void
3504
3504
UNUSED (ncols_interleaved );
3505
3505
UNUSED (blocklen );
3506
3506
3507
- #if ! ((defined(_MSC_VER )) && ! defined(__clang__ )) && defined(__aarch64__ ) && defined(__ARM_NEON )
3508
- if (ggml_cpu_has_neon ()) {
3507
+ #if ! ((defined(_MSC_VER )) && ! defined(__clang__ )) && defined(__aarch64__ ) && defined(__ARM_NEON ) && defined( __ARM_FEATURE_DOTPROD )
3508
+ if (ggml_cpu_has_neon () && ggml_cpu_has_dotprod () ) {
3509
3509
const int8x16_t kvalues = vld1q_s8 (kvalues_iq4nl );
3510
3510
3511
3511
for (int y = 0 ; y < nr / 4 ; y ++ ) {
@@ -3834,11 +3834,11 @@ enum ggml_type ggml_aarch64_get_optimal_repack_type(const struct ggml_tensor * c
3834
3834
if (ggml_cpu_has_neon () && ggml_cpu_has_matmul_int8 ()) {
3835
3835
return GGML_TYPE_Q4_0_4_8 ;
3836
3836
}
3837
- if (ggml_cpu_has_neon ()) {
3837
+ if (ggml_cpu_has_neon () && ggml_cpu_has_dotprod () ) {
3838
3838
return GGML_TYPE_Q4_0_4_4 ;
3839
3839
}
3840
3840
} else if (cur -> type == GGML_TYPE_IQ4_NL ) {
3841
- if (ggml_cpu_has_neon ()) {
3841
+ if (ggml_cpu_has_neon () && ggml_cpu_has_dotprod () ) {
3842
3842
return GGML_TYPE_IQ4_NL_4_4 ;
3843
3843
}
3844
3844
}
0 commit comments