51 #ifndef INCLUDED_volk_32i_s32f_convert_32f_u_H
52 #define INCLUDED_volk_32i_s32f_convert_32f_u_H
57 #ifdef LV_HAVE_AVX512F
58 #include <immintrin.h>
60 static inline void volk_32i_s32f_convert_32f_u_avx512f(
float* outputVector,
61 const int32_t* inputVector,
63 unsigned int num_points)
65 unsigned int number = 0;
66 const unsigned int onesixteenthPoints = num_points / 16;
68 float* outputVectorPtr = outputVector;
69 const float iScalar = 1.0 / scalar;
70 __m512 invScalar = _mm512_set1_ps(iScalar);
71 int32_t* inputPtr = (int32_t*)inputVector;
75 for (; number < onesixteenthPoints; number++) {
77 inputVal = _mm512_loadu_si512((__m512i*)inputPtr);
79 ret = _mm512_cvtepi32_ps(inputVal);
80 ret = _mm512_mul_ps(ret, invScalar);
82 _mm512_storeu_ps(outputVectorPtr, ret);
84 outputVectorPtr += 16;
88 number = onesixteenthPoints * 16;
89 for (; number < num_points; number++) {
90 outputVector[number] = ((float)(inputVector[number])) * iScalar;
97 #include <immintrin.h>
99 static inline void volk_32i_s32f_convert_32f_u_avx2(
float* outputVector,
100 const int32_t* inputVector,
102 unsigned int num_points)
104 unsigned int number = 0;
105 const unsigned int oneEightPoints = num_points / 8;
107 float* outputVectorPtr = outputVector;
108 const float iScalar = 1.0 / scalar;
109 __m256 invScalar = _mm256_set1_ps(iScalar);
110 int32_t* inputPtr = (int32_t*)inputVector;
114 for (; number < oneEightPoints; number++) {
116 inputVal = _mm256_loadu_si256((__m256i*)inputPtr);
118 ret = _mm256_cvtepi32_ps(inputVal);
119 ret = _mm256_mul_ps(ret, invScalar);
121 _mm256_storeu_ps(outputVectorPtr, ret);
123 outputVectorPtr += 8;
127 number = oneEightPoints * 8;
128 for (; number < num_points; number++) {
129 outputVector[number] = ((float)(inputVector[number])) * iScalar;
136 #include <emmintrin.h>
139 const int32_t* inputVector,
141 unsigned int num_points)
143 unsigned int number = 0;
144 const unsigned int quarterPoints = num_points / 4;
146 float* outputVectorPtr = outputVector;
147 const float iScalar = 1.0 / scalar;
149 int32_t* inputPtr = (int32_t*)inputVector;
153 for (; number < quarterPoints; number++) {
162 outputVectorPtr += 4;
166 number = quarterPoints * 4;
167 for (; number < num_points; number++) {
168 outputVector[number] = ((float)(inputVector[number])) * iScalar;
174 #ifdef LV_HAVE_GENERIC
177 const int32_t* inputVector,
179 unsigned int num_points)
181 float* outputVectorPtr = outputVector;
182 const int32_t* inputVectorPtr = inputVector;
183 unsigned int number = 0;
184 const float iScalar = 1.0 / scalar;
186 for (number = 0; number < num_points; number++) {
187 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
195 #ifndef INCLUDED_volk_32i_s32f_convert_32f_a_H
196 #define INCLUDED_volk_32i_s32f_convert_32f_a_H
198 #include <inttypes.h>
201 #ifdef LV_HAVE_AVX512F
202 #include <immintrin.h>
204 static inline void volk_32i_s32f_convert_32f_a_avx512f(
float* outputVector,
205 const int32_t* inputVector,
207 unsigned int num_points)
209 unsigned int number = 0;
210 const unsigned int onesixteenthPoints = num_points / 16;
212 float* outputVectorPtr = outputVector;
213 const float iScalar = 1.0 / scalar;
214 __m512 invScalar = _mm512_set1_ps(iScalar);
215 int32_t* inputPtr = (int32_t*)inputVector;
219 for (; number < onesixteenthPoints; number++) {
221 inputVal = _mm512_load_si512((__m512i*)inputPtr);
223 ret = _mm512_cvtepi32_ps(inputVal);
224 ret = _mm512_mul_ps(ret, invScalar);
226 _mm512_store_ps(outputVectorPtr, ret);
228 outputVectorPtr += 16;
232 number = onesixteenthPoints * 16;
233 for (; number < num_points; number++) {
234 outputVector[number] = ((float)(inputVector[number])) * iScalar;
240 #include <immintrin.h>
242 static inline void volk_32i_s32f_convert_32f_a_avx2(
float* outputVector,
243 const int32_t* inputVector,
245 unsigned int num_points)
247 unsigned int number = 0;
248 const unsigned int oneEightPoints = num_points / 8;
250 float* outputVectorPtr = outputVector;
251 const float iScalar = 1.0 / scalar;
252 __m256 invScalar = _mm256_set1_ps(iScalar);
253 int32_t* inputPtr = (int32_t*)inputVector;
257 for (; number < oneEightPoints; number++) {
259 inputVal = _mm256_load_si256((__m256i*)inputPtr);
261 ret = _mm256_cvtepi32_ps(inputVal);
262 ret = _mm256_mul_ps(ret, invScalar);
264 _mm256_store_ps(outputVectorPtr, ret);
266 outputVectorPtr += 8;
270 number = oneEightPoints * 8;
271 for (; number < num_points; number++) {
272 outputVector[number] = ((float)(inputVector[number])) * iScalar;
279 #include <emmintrin.h>
282 const int32_t* inputVector,
284 unsigned int num_points)
286 unsigned int number = 0;
287 const unsigned int quarterPoints = num_points / 4;
289 float* outputVectorPtr = outputVector;
290 const float iScalar = 1.0 / scalar;
292 int32_t* inputPtr = (int32_t*)inputVector;
296 for (; number < quarterPoints; number++) {
305 outputVectorPtr += 4;
309 number = quarterPoints * 4;
310 for (; number < num_points; number++) {
311 outputVector[number] = ((float)(inputVector[number])) * iScalar;
317 #ifdef LV_HAVE_GENERIC
320 const int32_t* inputVector,
322 unsigned int num_points)
324 float* outputVectorPtr = outputVector;
325 const int32_t* inputVectorPtr = inputVector;
326 unsigned int number = 0;
327 const float iScalar = 1.0 / scalar;
329 for (number = 0; number < num_points; number++) {
330 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
float32x4_t __m128
Definition: sse2neon.h:235
FORCE_INLINE __m128i _mm_loadu_si128(const __m128i *p)
Definition: sse2neon.h:4570
FORCE_INLINE void _mm_storeu_ps(float *p, __m128 a)
Definition: sse2neon.h:2787
FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2205
FORCE_INLINE __m128 _mm_set_ps1(float)
Definition: sse2neon.h:2437
FORCE_INLINE __m128i _mm_load_si128(const __m128i *p)
Definition: sse2neon.h:4471
int64x2_t __m128i
Definition: sse2neon.h:244
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition: sse2neon.h:2704
FORCE_INLINE __m128 _mm_cvtepi32_ps(__m128i a)
Definition: sse2neon.h:3937
static void volk_32i_s32f_convert_32f_a_generic(float *outputVector, const int32_t *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32i_s32f_convert_32f.h:319
static void volk_32i_s32f_convert_32f_u_sse2(float *outputVector, const int32_t *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32i_s32f_convert_32f.h:138
static void volk_32i_s32f_convert_32f_a_sse2(float *outputVector, const int32_t *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32i_s32f_convert_32f.h:281
static void volk_32i_s32f_convert_32f_generic(float *outputVector, const int32_t *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32i_s32f_convert_32f.h:176