58 #ifndef INCLUDED_volk_32f_s32f_power_32f_a_H
59 #define INCLUDED_volk_32f_s32f_power_32f_a_H
66 #include <tmmintrin.h>
68 #ifdef LV_HAVE_LIB_SIMDMATH
72 static inline void volk_32f_s32f_power_32f_a_sse4_1(
float* cVector,
75 unsigned int num_points)
77 unsigned int number = 0;
79 float* cPtr = cVector;
80 const float* aPtr = aVector;
82 #ifdef LV_HAVE_LIB_SIMDMATH
83 const unsigned int quarterPoints = num_points / 4;
92 for (; number < quarterPoints; number++) {
101 cVal = powf4(aVal, vPower);
111 number = quarterPoints * 4;
114 for (; number < num_points; number++) {
115 *cPtr++ = powf((*aPtr++), power);
123 #include <xmmintrin.h>
125 #ifdef LV_HAVE_LIB_SIMDMATH
126 #include <simdmath.h>
130 const float* aVector,
132 unsigned int num_points)
134 unsigned int number = 0;
136 float* cPtr = cVector;
137 const float* aPtr = aVector;
139 #ifdef LV_HAVE_LIB_SIMDMATH
140 const unsigned int quarterPoints = num_points / 4;
149 for (; number < quarterPoints; number++) {
159 cVal = powf4(aVal, vPower);
171 number = quarterPoints * 4;
174 for (; number < num_points; number++) {
175 *cPtr++ = powf((*aPtr++), power);
182 #ifdef LV_HAVE_GENERIC
185 const float* aVector,
187 unsigned int num_points)
189 float* cPtr = cVector;
190 const float* aPtr = aVector;
191 unsigned int number = 0;
193 for (number = 0; number < num_points; number++) {
194 *cPtr++ = powf((*aPtr++), power);
FORCE_INLINE __m128 _mm_sub_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2834
float32x4_t __m128
Definition: sse2neon.h:235
FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2205
FORCE_INLINE __m128 _mm_set_ps1(float)
Definition: sse2neon.h:2437
FORCE_INLINE __m128 _mm_andnot_ps(__m128 a, __m128 b)
Definition: sse2neon.h:1079
FORCE_INLINE __m128 _mm_setzero_ps(void)
Definition: sse2neon.h:2531
FORCE_INLINE __m128 _mm_and_ps(__m128 a, __m128 b)
Definition: sse2neon.h:1064
FORCE_INLINE __m128 _mm_blendv_ps(__m128 _a, __m128 _b, __m128 _mask)
Definition: sse2neon.h:7458
FORCE_INLINE __m128 _mm_cmplt_ps(__m128 a, __m128 b)
Definition: sse2neon.h:1190
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition: sse2neon.h:2704
FORCE_INLINE __m128 _mm_or_ps(__m128, __m128)
Definition: sse2neon.h:2237
static void volk_32f_s32f_power_32f_a_sse(float *cVector, const float *aVector, const float power, unsigned int num_points)
Definition: volk_32f_s32f_power_32f.h:129
static void volk_32f_s32f_power_32f_generic(float *cVector, const float *aVector, const float power, unsigned int num_points)
Definition: volk_32f_s32f_power_32f.h:184