@@ -614,8 +614,9 @@ class vbool4 {
614614template <int i0, int i1, int i2, int i3>
615615OIIO_FORCEINLINE vbool4 shuffle (const vbool4& a);
616616
617- // / shuffle<i>(a) is the same as shuffle<i,i,i,i>(a)
618- template <int i> OIIO_FORCEINLINE vbool4 shuffle (const vbool4& a);
617+ // / broadcast_element<i>(a) returns a simd variable in which all lanes have
618+ // / value a[i].
619+ template <int i> OIIO_FORCEINLINE vbool4 broadcast_element (const vbool4& a);
619620
620621// / Helper: as rapid as possible extraction of one component, when the
621622// / index is fixed.
@@ -765,8 +766,9 @@ class vbool8 {
765766template <int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7>
766767OIIO_FORCEINLINE vbool8 shuffle (const vbool8& a);
767768
768- // / shuffle<i>(a) is the same as shuffle<i,i,i,i>(a)
769- template <int i> OIIO_FORCEINLINE vbool8 shuffle (const vbool8& a);
769+ // / broadcast_element<i>(a) returns a simd variable in which all lanes have
770+ // / value a[i].
771+ template <int i> OIIO_FORCEINLINE vbool8 broadcast_element (const vbool8& a);
770772
771773// / Helper: as rapid as possible extraction of one component, when the
772774// / index is fixed.
@@ -1158,8 +1160,9 @@ vint4 srl (const vint4& val, const unsigned int bits);
11581160template <int i0, int i1, int i2, int i3>
11591161OIIO_FORCEINLINE vint4 shuffle (const vint4& a);
11601162
1161- // / shuffle<i>(a) is the same as shuffle<i,i,i,i>(a)
1162- template <int i> OIIO_FORCEINLINE vint4 shuffle (const vint4& a);
1163+ // / broadcast_element<i>(a) returns a simd variable in which all lanes have
1164+ // / value a[i].
1165+ template <int i> OIIO_FORCEINLINE vint4 broadcast_element (const vint4& a);
11631166
11641167// / Helper: as rapid as possible extraction of one component, when the
11651168// / index is fixed.
@@ -1458,8 +1461,9 @@ vint8 srl (const vint8& val, const unsigned int bits);
14581461template <int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7>
14591462OIIO_FORCEINLINE vint8 shuffle (const vint8& a);
14601463
1461- // / shuffle<i>(a) is the same as shuffle<i,i,i,i>(a)
1462- template <int i> OIIO_FORCEINLINE vint8 shuffle (const vint8& a);
1464+ // / broadcast_element<i>(a) returns a simd variable in which all lanes have
1465+ // / value a[i].
1466+ template <int i> OIIO_FORCEINLINE vint8 broadcast_element (const vint8& a);
14631467
14641468// / Helper: as rapid as possible extraction of one component, when the
14651469// / index is fixed.
@@ -1768,8 +1772,9 @@ template<int i> vint16 shuffle4 (const vint16& a);
17681772template <int i0, int i1, int i2, int i3>
17691773vint16 shuffle (const vint16& a);
17701774
1771- // / shuffle<i>(a) is the same as shuffle<i,i,i,i>(a)
1772- template <int i> vint16 shuffle (const vint16& a);
1775+ // / broadcast_element<i>(a) returns a simd variable in which all lanes have
1776+ // / value a[i].
1777+ template <int i> vint16 broadcast_element (const vint16& a);
17731778
17741779// / Helper: as rapid as possible extraction of one component, when the
17751780// / index is fixed.
@@ -2093,8 +2098,9 @@ class vfloat4 {
20932098template <int i0, int i1, int i2, int i3>
20942099OIIO_FORCEINLINE vfloat4 shuffle (const vfloat4& a);
20952100
2096- // / shuffle<i>(a) is the same as shuffle<i,i,i,i>(a)
2097- template <int i> OIIO_FORCEINLINE vfloat4 shuffle (const vfloat4& a);
2101+ // / broadcast_element<i>(a) returns a simd variable in which all lanes have
2102+ // / value a[i].
2103+ template <int i> OIIO_FORCEINLINE vfloat4 broadcast_element (const vfloat4& a);
20982104
20992105// / Return { a[i0], a[i1], b[i2], b[i3] }, where i0..i3 are the extracted
21002106// / 2-bit indices packed into the template parameter i (going from the low
@@ -2716,8 +2722,8 @@ class vfloat8 {
27162722template <int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7>
27172723OIIO_FORCEINLINE vfloat8 shuffle (const vfloat8& a);
27182724
2719- // / shuffle <i>(a) is the same as shuffle<i,i,i,i,...>(a)
2720- template <int i> OIIO_FORCEINLINE vfloat8 shuffle (const vfloat8& a);
2725+ // / broadcast_element <i>(a) is the same as shuffle<i,i,i,i,...>(a)
2726+ template <int i> OIIO_FORCEINLINE vfloat8 broadcast_element (const vfloat8& a);
27212727
27222728// / Helper: as rapid as possible extraction of one component, when the
27232729// / index is fixed.
@@ -3046,8 +3052,9 @@ template<int i> OIIO_FORCEINLINE vfloat16 shuffle4 (const vfloat16& a);
30463052template <int i0, int i1, int i2, int i3>
30473053OIIO_FORCEINLINE vfloat16 shuffle (const vfloat16& a);
30483054
3049- // / shuffle<i>(a) is the same as shuffle<i,i,i,i>(a)
3050- template <int i> vfloat16 shuffle (const vfloat16& a);
3055+ // / broadcast_element<i>(a) returns a simd variable in which all lanes have
3056+ // / value a[i].
3057+ template <int i> vfloat16 broadcast_element (const vfloat16& a);
30513058
30523059// / Helper: as rapid as possible extraction of one component, when the
30533060// / index is fixed.
@@ -3468,11 +3475,17 @@ OIIO_FORCEINLINE vbool4 shuffle (const vbool4& a) {
34683475#endif
34693476}
34703477
3471- // / shuffle<i>(a) is the same as shuffle<i,i,i,i>(a)
3472- template <int i> OIIO_FORCEINLINE vbool4 shuffle (const vbool4& a) {
3478+ // / broadcast_element<i>(a) returns a simd variable in which all lanes have
3479+ // / value a[i].
3480+ template <int i> OIIO_FORCEINLINE vbool4 broadcast_element (const vbool4& a) {
34733481 return shuffle<i,i,i,i>(a);
34743482}
34753483
3484+ // DEPRECATED(3.1): old name; use broadcast_element instead
3485+ template <int i> OIIO_FORCEINLINE vbool4 shuffle (const vbool4& a) {
3486+ return broadcast_element<i>(a);
3487+ }
3488+
34763489
34773490// / Helper: as rapid as possible extraction of one component, when the
34783491// / index is fixed.
@@ -3796,10 +3809,15 @@ OIIO_FORCEINLINE vbool8 shuffle (const vbool8& a) {
37963809#endif
37973810}
37983811
3799- template <int i> OIIO_FORCEINLINE vbool8 shuffle (const vbool8& a) {
3812+ template <int i> OIIO_FORCEINLINE vbool8 broadcast_element (const vbool8& a) {
38003813 return shuffle<i,i,i,i,i,i,i,i>(a);
38013814}
38023815
3816+ // DEPRECATED(3.1): old name; use broadcast_element instead
3817+ template <int i> OIIO_FORCEINLINE vbool8 shuffle (const vbool8& a) {
3818+ return broadcast_element<i>(a);
3819+ }
3820+
38033821
38043822template <int i>
38053823OIIO_FORCEINLINE bool extract (const vbool8& a) {
@@ -4739,7 +4757,14 @@ OIIO_FORCEINLINE vint4 shuffle (const vint4& a) {
47394757#endif
47404758}
47414759
4742- template <int i> OIIO_FORCEINLINE vint4 shuffle (const vint4& a) { return shuffle<i,i,i,i>(a); }
4760+ template <int i> OIIO_FORCEINLINE vint4 broadcast_element (const vint4& a) {
4761+ return shuffle<i,i,i,i>(a);
4762+ }
4763+
4764+ // DEPRECATED(3.1): old name; use broadcast_element instead
4765+ template <int i> OIIO_FORCEINLINE vint4 shuffle (const vint4& a) {
4766+ return broadcast_element<i>(a);
4767+ }
47434768
47444769
47454770template <int i>
@@ -5579,10 +5604,15 @@ OIIO_FORCEINLINE vint8 shuffle (const vint8& a) {
55795604#endif
55805605}
55815606
5582- template <int i> OIIO_FORCEINLINE vint8 shuffle (const vint8& a) {
5607+ template <int i> OIIO_FORCEINLINE vint8 broadcast_element (const vint8& a) {
55835608 return shuffle<i,i,i,i,i,i,i,i>(a);
55845609}
55855610
5611+ // DEPRECATED(3.1): old name; use broadcast_element instead
5612+ template <int i> OIIO_FORCEINLINE vint8 shuffle (const vint8& a) {
5613+ return broadcast_element<i>(a);
5614+ }
5615+
55865616
55875617template <int i>
55885618OIIO_FORCEINLINE int extract (const vint8& v) {
@@ -6390,8 +6420,15 @@ vint16 shuffle (const vint16& a) {
63906420#endif
63916421}
63926422
6393- template <int i> vint16 shuffle (const vint16& a) {
6394- return shuffle<i,i,i,i> (a);
6423+ template <int i> vint16 broadcast_element (const vint16& a) {
6424+ return a[i];
6425+ }
6426+
6427+ // DEPRECATED(3.1): old name and nonstandard use
6428+ template <int i>
6429+ OIIO_DEPRECATED (" Use broadcast_element (3.1)" )
6430+ vint16 shuffle (const vint16& a) {
6431+ return broadcast_element<i> (a);
63956432}
63966433
63976434
@@ -7248,19 +7285,26 @@ OIIO_FORCEINLINE vfloat4 shuffle (const vfloat4& a) {
72487285#endif
72497286}
72507287
7251- template <int i> OIIO_FORCEINLINE vfloat4 shuffle (const vfloat4& a) { return shuffle<i,i,i,i>(a); }
7288+ template <int i> OIIO_FORCEINLINE vfloat4 broadcast_element (const vfloat4& a) {
7289+ return shuffle<i,i,i,i>(a);
7290+ }
7291+
7292+ // DEPRECATED(3.1): old name; use broadcast_element instead
7293+ template <int i> OIIO_FORCEINLINE vfloat4 shuffle (const vfloat4& a) {
7294+ return broadcast_element<i>(a);
7295+ }
72527296
72537297#if OIIO_SIMD_NEON
7254- template <> OIIO_FORCEINLINE vfloat4 shuffle <0 > (const vfloat4& a) {
7298+ template <> OIIO_FORCEINLINE vfloat4 broadcast_element <0 > (const vfloat4& a) {
72557299 float32x2_t t = vget_low_f32 (a.simd ()); return vdupq_lane_f32 (t,0 );
72567300}
7257- template <> OIIO_FORCEINLINE vfloat4 shuffle <1 > (const vfloat4& a) {
7301+ template <> OIIO_FORCEINLINE vfloat4 broadcast_element <1 > (const vfloat4& a) {
72587302 float32x2_t t = vget_low_f32 (a.simd ()); return vdupq_lane_f32 (t,1 );
72597303}
7260- template <> OIIO_FORCEINLINE vfloat4 shuffle <2 > (const vfloat4& a) {
7304+ template <> OIIO_FORCEINLINE vfloat4 broadcast_element <2 > (const vfloat4& a) {
72617305 float32x2_t t = vget_high_f32 (a.simd ()); return vdupq_lane_f32 (t,0 );
72627306}
7263- template <> OIIO_FORCEINLINE vfloat4 shuffle <3 > (const vfloat4& a) {
7307+ template <> OIIO_FORCEINLINE vfloat4 broadcast_element <3 > (const vfloat4& a) {
72647308 float32x2_t t = vget_high_f32 (a.simd ()); return vdupq_lane_f32 (t,1 );
72657309}
72667310#endif
@@ -8260,9 +8304,9 @@ OIIO_FORCEINLINE matrix44 matrix44::transposed () const {
82608304
82618305OIIO_FORCEINLINE vfloat3 matrix44::transformp (const vfloat3 &V) const {
82628306#if OIIO_SIMD_SSE
8263- vfloat4 R = shuffle <0 >(V) * m_row[0 ] + shuffle <1 >(V) * m_row[1 ] +
8264- shuffle <2 >(V) * m_row[2 ] + m_row[3 ];
8265- R = R / shuffle <3 >(R);
8307+ vfloat4 R = broadcast_element <0 >(V) * m_row[0 ] + broadcast_element <1 >(V) * m_row[1 ] +
8308+ broadcast_element <2 >(V) * m_row[2 ] + m_row[3 ];
8309+ R = R / broadcast_element <3 >(R);
82668310 return vfloat3 (R.xyz0 ());
82678311#else
82688312 value_t a, b, c, w;
@@ -8276,8 +8320,8 @@ OIIO_FORCEINLINE vfloat3 matrix44::transformp (const vfloat3 &V) const {
82768320
82778321OIIO_FORCEINLINE vfloat3 matrix44::transformv (const vfloat3 &V) const {
82788322#if OIIO_SIMD_SSE
8279- vfloat4 R = shuffle <0 >(V) * m_row[0 ] + shuffle <1 >(V) * m_row[1 ] +
8280- shuffle <2 >(V) * m_row[2 ];
8323+ vfloat4 R = broadcast_element <0 >(V) * m_row[0 ] + broadcast_element <1 >(V) * m_row[1 ] +
8324+ broadcast_element <2 >(V) * m_row[2 ];
82818325 return vfloat3 (R.xyz0 ());
82828326#else
82838327 value_t a, b, c;
@@ -8291,8 +8335,8 @@ OIIO_FORCEINLINE vfloat3 matrix44::transformv (const vfloat3 &V) const {
82918335OIIO_FORCEINLINE vfloat3 matrix44::transformvT (const vfloat3 &V) const {
82928336#if OIIO_SIMD_SSE
82938337 matrix44 T = transposed ();
8294- vfloat4 R = shuffle <0 >(V) * T[0 ] + shuffle <1 >(V) * T[1 ] +
8295- shuffle <2 >(V) * T[2 ];
8338+ vfloat4 R = broadcast_element <0 >(V) * T[0 ] + broadcast_element <1 >(V) * T[1 ] +
8339+ broadcast_element <2 >(V) * T[2 ];
82968340 return vfloat3 (R.xyz0 ());
82978341#else
82988342 value_t a, b, c;
@@ -8306,8 +8350,8 @@ OIIO_FORCEINLINE vfloat3 matrix44::transformvT (const vfloat3 &V) const {
83068350OIIO_FORCEINLINE vfloat4 operator * (const vfloat4 &V, const matrix44& M)
83078351{
83088352#if OIIO_SIMD_SSE
8309- return shuffle <0 >(V) * M[0 ] + shuffle <1 >(V) * M[1 ] +
8310- shuffle <2 >(V) * M[2 ] + shuffle <3 >(V) * M[3 ];
8353+ return broadcast_element <0 >(V) * M[0 ] + broadcast_element <1 >(V) * M[1 ] +
8354+ broadcast_element <2 >(V) * M[2 ] + broadcast_element <3 >(V) * M[3 ];
83118355#else
83128356 float a, b, c, w;
83138357 a = V[0 ] * M[0 ][0 ] + V[1 ] * M[1 ][0 ] + V[2 ] * M[2 ][0 ] + V[3 ] * M[3 ][0 ];
@@ -9029,14 +9073,19 @@ OIIO_FORCEINLINE vfloat8 shuffle (const vfloat8& a) {
90299073#endif
90309074}
90319075
9032- template <int i> OIIO_FORCEINLINE vfloat8 shuffle (const vfloat8& a) {
9076+ template <int i> OIIO_FORCEINLINE vfloat8 broadcast_element (const vfloat8& a) {
90339077#if OIIO_SIMD_AVX >= 2
90349078 return _mm256_permutevar8x32_ps (a, vint8 (i));
90359079#else
9036- return shuffle<i,i,i,i,i,i,i,i>(a) ;
9080+ return a[i] ;
90379081#endif
90389082}
90399083
9084+ // DEPRECATED(3.1): old name; use broadcast_element instead
9085+ template <int i> OIIO_FORCEINLINE vfloat8 shuffle (const vfloat8& a) {
9086+ return broadcast_element<i>(a);
9087+ }
9088+
90409089
90419090template <int i>
90429091OIIO_FORCEINLINE float extract (const vfloat8& v) {
@@ -9099,9 +9148,9 @@ OIIO_FORCEINLINE vfloat8 vreduce_add (const vfloat8& v) {
90999148 vfloat8 ab_cd_0_0_ef_gh_0_0 = _mm256_hadd_ps (v.simd (), _mm256_setzero_ps ());
91009149 vfloat8 abcd_0_0_0_efgh_0_0_0 = _mm256_hadd_ps (ab_cd_0_0_ef_gh_0_0, _mm256_setzero_ps ());
91019150 // get efgh in the 0-idx slot
9102- vfloat8 efgh = shuffle <4 >(abcd_0_0_0_efgh_0_0_0);
9151+ vfloat8 efgh = broadcast_element <4 >(abcd_0_0_0_efgh_0_0_0);
91039152 vfloat8 final_sum = abcd_0_0_0_efgh_0_0_0 + efgh;
9104- return shuffle <0 >(final_sum);
9153+ return broadcast_element <0 >(final_sum);
91059154#else
91069155 vfloat4 hadd4 = vreduce_add (v.lo ()) + vreduce_add (v.hi ());
91079156 return vfloat8 (hadd4, hadd4);
@@ -9908,7 +9957,14 @@ vfloat16 shuffle (const vfloat16& a) {
99089957#endif
99099958}
99109959
9911- template <int i> vfloat16 shuffle (const vfloat16& a) {
9960+ template <int i> vfloat16 broadcast_element (const vfloat16& a) {
9961+ return a[i];
9962+ }
9963+
9964+ // DEPRECATED(3.1): old name and nonstandard use
9965+ template <int i>
9966+ OIIO_DEPRECATED (" Use broadcast_element (3.1)" )
9967+ vfloat16 shuffle (const vfloat16& a) {
99129968 return shuffle<i,i,i,i> (a);
99139969}
99149970
0 commit comments