We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent fee824a commit 9e2301fCopy full SHA for 9e2301f
ggml/src/ggml-metal/ggml-metal.m
@@ -997,9 +997,10 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex
997
return ggml_is_contiguous(op->src[0]);
998
case GGML_OP_SUM_ROWS:
999
case GGML_OP_SOFT_MAX:
1000
- case GGML_OP_RMS_NORM:
1001
case GGML_OP_GROUP_NORM:
1002
return has_simdgroup_reduction;
+ case GGML_OP_RMS_NORM:
1003
+ return has_simdgroup_reduction && (op->ne[0] % 4 == 0);
1004
case GGML_OP_NORM:
1005
case GGML_OP_ROPE:
1006
return true;
@@ -2672,7 +2673,6 @@ static void ggml_metal_encode_node(
2672
2673
} break;
2674
2675
{
- GGML_ASSERT(ne00 % 4 == 0);
2676
GGML_ASSERT(ggml_is_contiguous(src0));
2677
2678
float eps;
0 commit comments