Skip to content

Commit de13084

Browse files
committed
swresample/x86/resample: extend resample_double to support avx and fma3
benchmark: sse2 10.670s avx 8.763s fma3 8.380s Signed-off-by: Muhammad Faiz <[email protected]>
1 parent 3d5c216 commit de13084

File tree

2 files changed

+22
-3
lines changed

2 files changed

+22
-3
lines changed

libswresample/x86/resample.asm

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_count, dst, frac, \
203203
; horizontal sum & store
204204
%if mmsize == 32
205205
vextractf128 xm1, m0, 0x1
206-
addps xm0, xm1
206+
addp%4 xm0, xm1
207207
%endif
208208
movhlps xm1, xm0
209209
%ifidn %1, float
@@ -489,8 +489,8 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \
489489
%if mmsize == 32
490490
vextractf128 xm1, m0, 0x1
491491
vextractf128 xm3, m2, 0x1
492-
addps xm0, xm1
493-
addps xm2, xm3
492+
addp%4 xm0, xm1
493+
addp%4 xm2, xm3
494494
%endif
495495
cvtsi2s%4 xm1, fracd
496496
subp%4 xm2, xm0
@@ -608,3 +608,12 @@ RESAMPLE_FNS int16, 2, 1
608608

609609
INIT_XMM sse2
610610
RESAMPLE_FNS double, 8, 3, d, pdbl_1
611+
612+
%if HAVE_AVX_EXTERNAL
613+
INIT_YMM avx
614+
RESAMPLE_FNS double, 8, 3, d, pdbl_1
615+
%endif
616+
%if HAVE_FMA3_EXTERNAL
617+
INIT_YMM fma3
618+
RESAMPLE_FNS double, 8, 3, d, pdbl_1
619+
%endif

libswresample/x86/resample_init.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ RESAMPLE_FUNCS(float, avx);
4242
RESAMPLE_FUNCS(float, fma3);
4343
RESAMPLE_FUNCS(float, fma4);
4444
RESAMPLE_FUNCS(double, sse2);
45+
RESAMPLE_FUNCS(double, avx);
46+
RESAMPLE_FUNCS(double, fma3);
4547

4648
av_cold void swri_resample_dsp_x86_init(ResampleContext *c)
4749
{
@@ -85,6 +87,14 @@ av_cold void swri_resample_dsp_x86_init(ResampleContext *c)
8587
c->dsp.resample_linear = ff_resample_linear_double_sse2;
8688
c->dsp.resample_common = ff_resample_common_double_sse2;
8789
}
90+
if (EXTERNAL_AVX_FAST(mm_flags)) {
91+
c->dsp.resample_linear = ff_resample_linear_double_avx;
92+
c->dsp.resample_common = ff_resample_common_double_avx;
93+
}
94+
if (EXTERNAL_FMA3_FAST(mm_flags)) {
95+
c->dsp.resample_linear = ff_resample_linear_double_fma3;
96+
c->dsp.resample_common = ff_resample_common_double_fma3;
97+
}
8898
break;
8999
}
90100
}

0 commit comments

Comments
 (0)