Skip to content

Commit 02e8837

Browse files
andrula-songkv2019i
authored andcommitted
Audio: Component: Add HiFi5 implementation of cir_buf_copy.
Add HiFi3 & HiFi5 implementation of function cir_buf_copy. Compared with generic C version, the HiFi3 version can save about 3% cycles and HiFi5 version can save about 40% cycles. Signed-off-by: Andrula Song <[email protected]>
1 parent 40c8e47 commit 02e8837

File tree

1 file changed

+90
-2
lines changed

1 file changed

+90
-2
lines changed

src/audio/component.c

+90-2
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,50 @@ int audio_stream_copy(const struct audio_stream *source, uint32_t ioffset,
232232
return samples;
233233
}
234234

235+
void cir_buf_copy(void *src, void *src_addr, void *src_end, void *dst,
236+
void *dst_addr, void *dst_end, size_t byte_size)
237+
{
238+
size_t bytes = byte_size;
239+
size_t bytes_src;
240+
size_t bytes_dst;
241+
size_t bytes_copied;
242+
size_t short_copied;
243+
int left, m, i;
244+
ae_int16x4 in_sample1, in_sample2;
245+
ae_valignx2 inu;
246+
ae_valignx2 outu = AE_ZALIGN128();
247+
ae_int16x8 *in = (ae_int16x8 *)src;
248+
ae_int16x8 *out = (ae_int16x8 *)dst;
249+
250+
while (bytes) {
251+
bytes_src = cir_buf_bytes_without_wrap(in, src_end);
252+
bytes_dst = cir_buf_bytes_without_wrap(out, dst_end);
253+
bytes_copied = MIN(bytes_src, bytes_dst);
254+
bytes_copied = MIN(bytes, bytes_copied);
255+
short_copied = bytes_copied >> 1;
256+
257+
m = short_copied >> 3;
258+
left = short_copied & 0x07;
259+
inu = AE_LA128_PP(in);
260+
/* copy 2 * 4 * 16bit(16 bytes)per loop */
261+
for (i = 0; i < m; i++) {
262+
AE_LA16X4X2_IP(in_sample1, in_sample2, inu, in);
263+
AE_SA16X4X2_IP(in_sample1, in_sample2, outu, out);
264+
}
265+
AE_SA128POS_FP(outu, out);
266+
267+
/* process the left bits that less than 2 * 4 * 16 */
268+
for (i = 0; i < left ; i++) {
269+
AE_L16_IP(in_sample1, (ae_int16 *)in, sizeof(ae_int16));
270+
AE_S16_0_IP(in_sample1, (ae_int16 *)out, sizeof(ae_int16));
271+
}
272+
273+
bytes -= bytes_copied;
274+
in = cir_buf_wrap(in, src_addr, src_end);
275+
out = cir_buf_wrap(out, dst_addr, dst_end);
276+
}
277+
}
278+
235279
#elif defined(STREAMCOPY_HIFI3)
236280

237281
#include <xtensa/tie/xt_hifi3.h>
@@ -279,6 +323,50 @@ int audio_stream_copy(const struct audio_stream *source, uint32_t ioffset,
279323
return samples;
280324
}
281325

326+
void cir_buf_copy(void *src, void *src_addr, void *src_end, void *dst,
327+
void *dst_addr, void *dst_end, size_t byte_size)
328+
{
329+
size_t bytes = byte_size;
330+
size_t bytes_src;
331+
size_t bytes_dst;
332+
size_t bytes_copied;
333+
size_t short_copied;
334+
335+
int left, m, i;
336+
ae_int16x4 in_sample = AE_ZERO16();
337+
ae_valign inu = AE_ZALIGN64();
338+
ae_valign outu = AE_ZALIGN64();
339+
ae_int16x4 *in = (ae_int16x4 *)src;
340+
ae_int16x4 *out = (ae_int16x4 *)dst;
341+
342+
while (bytes) {
343+
bytes_src = cir_buf_bytes_without_wrap(in, src_end);
344+
bytes_dst = cir_buf_bytes_without_wrap(out, dst_end);
345+
bytes_copied = MIN(bytes_src, bytes_dst);
346+
bytes_copied = MIN(bytes, bytes_copied);
347+
short_copied = bytes_copied >> 1;
348+
m = short_copied >> 2;
349+
left = short_copied & 0x03;
350+
inu = AE_LA64_PP(in);
351+
/* copy 4 * 16bit(8 bytes)per loop */
352+
for (i = 0; i < m; i++) {
353+
AE_LA16X4_IP(in_sample, inu, in);
354+
AE_SA16X4_IP(in_sample, outu, out);
355+
}
356+
AE_SA64POS_FP(outu, out);
357+
358+
/* process the left bits that less than 4 * 16 */
359+
for (i = 0; i < left ; i++) {
360+
AE_L16_IP(in_sample, (ae_int16 *)in, sizeof(ae_int16));
361+
AE_S16_0_IP(in_sample, (ae_int16 *)out, sizeof(ae_int16));
362+
}
363+
364+
bytes -= bytes_copied;
365+
in = cir_buf_wrap(in, src_addr, src_end);
366+
out = cir_buf_wrap(out, dst_addr, dst_end);
367+
}
368+
}
369+
282370
#else
283371

284372
int audio_stream_copy(const struct audio_stream *source, uint32_t ioffset,
@@ -308,8 +396,6 @@ int audio_stream_copy(const struct audio_stream *source, uint32_t ioffset,
308396
return samples;
309397
}
310398

311-
#endif
312-
313399
void cir_buf_copy(void *src, void *src_addr, void *src_end, void *dst,
314400
void *dst_addr, void *dst_end, size_t byte_size)
315401
{
@@ -332,6 +418,8 @@ void cir_buf_copy(void *src, void *src_addr, void *src_end, void *dst,
332418
}
333419
}
334420

421+
#endif
422+
335423
void audio_stream_copy_from_linear(const void *linear_source, int ioffset,
336424
struct audio_stream *sink, int ooffset,
337425
unsigned int samples)

0 commit comments

Comments
 (0)