Skip to content

Commit

Permalink
x86/fma: Use 128 bit fnmadd_pd to do 256 bit fnmadd_pd (#1197)
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexK-BD authored Jul 20, 2024
1 parent 6f52a1d commit bd05320
Showing 1 changed file with 10 additions and 5 deletions.
15 changes: 10 additions & 5 deletions simde/x86/fma.h
Original file line number Diff line number Diff line change
Expand Up @@ -464,11 +464,16 @@ simde_mm256_fnmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
b_ = simde__m256d_to_private(b),
c_ = simde__m256d_to_private(c);

SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i];
}

#if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
r_.m128d[i] = simde_mm_fnmadd_pd(a_.m128d[i], b_.m128d[i], c_.m128d[i]);
}
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i];
}
#endif
return simde__m256d_from_private(r_);
#endif
}
Expand Down

0 comments on commit bd05320

Please sign in to comment.