86 #ifndef INCLUDED_volk_32f_exp_32f_a_H
87 #define INCLUDED_volk_32f_exp_32f_a_H
90 #include <emmintrin.h>
95 float* bPtr = bVector;
96 const float* aPtr = aVector;
98 unsigned int number = 0;
99 unsigned int quarterPoints = num_points / 4;
102 __m128 aVal, bVal, tmp, fx, mask, pow2n, z, y;
103 __m128 one, exp_hi, exp_lo, log2EF, half, exp_C1, exp_C2;
104 __m128 exp_p0, exp_p1, exp_p2, exp_p3, exp_p4, exp_p5;
123 for (; number < quarterPoints; number++) {
159 number = quarterPoints * 4;
160 for (; number < num_points; number++) {
161 *bPtr++ = expf(*aPtr++);
168 #ifdef LV_HAVE_GENERIC
173 float* bPtr = bVector;
174 const float* aPtr = aVector;
175 unsigned int number = 0;
177 for (number = 0; number < num_points; number++) {
178 *bPtr++ = expf(*aPtr++);
186 #ifndef INCLUDED_volk_32f_exp_32f_u_H
187 #define INCLUDED_volk_32f_exp_32f_u_H
190 #include <emmintrin.h>
195 float* bPtr = bVector;
196 const float* aPtr = aVector;
198 unsigned int number = 0;
199 unsigned int quarterPoints = num_points / 4;
202 __m128 aVal, bVal, tmp, fx, mask, pow2n, z, y;
203 __m128 one, exp_hi, exp_lo, log2EF, half, exp_C1, exp_C2;
204 __m128 exp_p0, exp_p1, exp_p2, exp_p3, exp_p4, exp_p5;
224 for (; number < quarterPoints; number++) {
260 number = quarterPoints * 4;
261 for (; number < num_points; number++) {
262 *bPtr++ = expf(*aPtr++);
269 #ifdef LV_HAVE_GENERIC
274 float* bPtr = bVector;
275 const float* aPtr = aVector;
276 unsigned int number = 0;
278 for (number = 0; number < num_points; number++) {
279 *bPtr++ = expf(*aPtr++);
FORCE_INLINE __m128i _mm_slli_epi32(__m128i a, int imm)
Definition: sse2neon.h:5565
FORCE_INLINE __m128 _mm_sub_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2834
float32x4_t __m128
Definition: sse2neon.h:235
FORCE_INLINE __m128i _mm_add_epi32(__m128i a, __m128i b)
Definition: sse2neon.h:2984
FORCE_INLINE __m128i _mm_set1_epi32(int)
Definition: sse2neon.h:5212
FORCE_INLINE void _mm_storeu_ps(float *p, __m128 a)
Definition: sse2neon.h:2787
FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2205
FORCE_INLINE __m128i _mm_cvttps_epi32(__m128 a)
Definition: sse2neon.h:4324
FORCE_INLINE __m128 _mm_set1_ps(float _w)
Definition: sse2neon.h:2503
FORCE_INLINE __m128 _mm_cmpgt_ps(__m128 a, __m128 b)
Definition: sse2neon.h:1154
FORCE_INLINE __m128 _mm_loadu_ps(const float *p)
Definition: sse2neon.h:1941
FORCE_INLINE __m128 _mm_setzero_ps(void)
Definition: sse2neon.h:2531
FORCE_INLINE __m128 _mm_and_ps(__m128 a, __m128 b)
Definition: sse2neon.h:1064
FORCE_INLINE __m128 _mm_castsi128_ps(__m128i a)
Definition: sse2neon.h:3250
FORCE_INLINE __m128 _mm_add_ps(__m128 a, __m128 b)
Definition: sse2neon.h:1039
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858
int64x2_t __m128i
Definition: sse2neon.h:244
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition: sse2neon.h:2704
FORCE_INLINE __m128 _mm_min_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2080
FORCE_INLINE __m128 _mm_cvtepi32_ps(__m128i a)
Definition: sse2neon.h:3937
FORCE_INLINE __m128 _mm_max_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2025
static void volk_32f_exp_32f_a_sse2(float *bVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_exp_32f.h:93
static void volk_32f_exp_32f_u_generic(float *bVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_exp_32f.h:272
static void volk_32f_exp_32f_u_sse2(float *bVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_exp_32f.h:193
static void volk_32f_exp_32f_a_generic(float *bVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_exp_32f.h:171