57 #ifndef INCLUDED_volk_32f_s32f_convert_32i_u_H
58 #define INCLUDED_volk_32f_s32f_convert_32i_u_H
65 #include <immintrin.h>
68 const float* inputVector,
70 unsigned int num_points)
72 unsigned int number = 0;
74 const unsigned int eighthPoints = num_points / 8;
76 const float* inputVectorPtr = (
const float*)inputVector;
77 int32_t* outputVectorPtr = outputVector;
79 float min_val = INT_MIN;
80 float max_val = INT_MAX;
83 __m256 vScalar = _mm256_set1_ps(scalar);
86 __m256 vmin_val = _mm256_set1_ps(min_val);
87 __m256 vmax_val = _mm256_set1_ps(max_val);
89 for (; number < eighthPoints; number++) {
90 inputVal1 = _mm256_loadu_ps(inputVectorPtr);
93 inputVal1 = _mm256_max_ps(
94 _mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
95 intInputVal1 = _mm256_cvtps_epi32(inputVal1);
97 _mm256_storeu_si256((__m256i*)outputVectorPtr, intInputVal1);
101 number = eighthPoints * 8;
102 for (; number < num_points; number++) {
103 r = inputVector[number] * scalar;
106 else if (r < min_val)
108 outputVector[number] = (int32_t)
rintf(r);
115 #include <emmintrin.h>
118 const float* inputVector,
120 unsigned int num_points)
122 unsigned int number = 0;
124 const unsigned int quarterPoints = num_points / 4;
126 const float* inputVectorPtr = (
const float*)inputVector;
127 int32_t* outputVectorPtr = outputVector;
129 float min_val = INT_MIN;
130 float max_val = INT_MAX;
139 for (; number < quarterPoints; number++) {
148 outputVectorPtr += 4;
151 number = quarterPoints * 4;
152 for (; number < num_points; number++) {
153 r = inputVector[number] * scalar;
156 else if (r < min_val)
158 outputVector[number] = (int32_t)
rintf(r);
166 #include <xmmintrin.h>
169 const float* inputVector,
171 unsigned int num_points)
173 unsigned int number = 0;
175 const unsigned int quarterPoints = num_points / 4;
177 const float* inputVectorPtr = (
const float*)inputVector;
178 int32_t* outputVectorPtr = outputVector;
180 float min_val = INT_MIN;
181 float max_val = INT_MAX;
191 for (; number < quarterPoints; number++) {
198 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[0]);
199 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[1]);
200 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[2]);
201 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[3]);
204 number = quarterPoints * 4;
205 for (; number < num_points; number++) {
206 r = inputVector[number] * scalar;
209 else if (r < min_val)
211 outputVector[number] = (int32_t)
rintf(r);
218 #ifdef LV_HAVE_GENERIC
221 const float* inputVector,
223 unsigned int num_points)
225 int32_t* outputVectorPtr = outputVector;
226 const float* inputVectorPtr = inputVector;
227 const float min_val = (float)INT_MIN;
228 const float max_val = (float)INT_MAX;
230 for (
unsigned int number = 0; number < num_points; number++) {
231 const float r = *inputVectorPtr++ * scalar;
235 else if (r < min_val)
238 s = (int32_t)
rintf(r);
239 *outputVectorPtr++ = s;
247 #ifndef INCLUDED_volk_32f_s32f_convert_32i_a_H
248 #define INCLUDED_volk_32f_s32f_convert_32i_a_H
250 #include <inttypes.h>
255 #include <immintrin.h>
258 const float* inputVector,
260 unsigned int num_points)
262 unsigned int number = 0;
264 const unsigned int eighthPoints = num_points / 8;
266 const float* inputVectorPtr = (
const float*)inputVector;
267 int32_t* outputVectorPtr = outputVector;
269 float min_val = INT_MIN;
270 float max_val = INT_MAX;
273 __m256 vScalar = _mm256_set1_ps(scalar);
275 __m256i intInputVal1;
276 __m256 vmin_val = _mm256_set1_ps(min_val);
277 __m256 vmax_val = _mm256_set1_ps(max_val);
279 for (; number < eighthPoints; number++) {
280 inputVal1 = _mm256_load_ps(inputVectorPtr);
283 inputVal1 = _mm256_max_ps(
284 _mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
285 intInputVal1 = _mm256_cvtps_epi32(inputVal1);
287 _mm256_store_si256((__m256i*)outputVectorPtr, intInputVal1);
288 outputVectorPtr += 8;
291 number = eighthPoints * 8;
292 for (; number < num_points; number++) {
293 r = inputVector[number] * scalar;
296 else if (r < min_val)
298 outputVector[number] = (int32_t)
rintf(r);
306 #include <emmintrin.h>
309 const float* inputVector,
311 unsigned int num_points)
313 unsigned int number = 0;
315 const unsigned int quarterPoints = num_points / 4;
317 const float* inputVectorPtr = (
const float*)inputVector;
318 int32_t* outputVectorPtr = outputVector;
320 float min_val = INT_MIN;
321 float max_val = INT_MAX;
330 for (; number < quarterPoints; number++) {
339 outputVectorPtr += 4;
342 number = quarterPoints * 4;
343 for (; number < num_points; number++) {
344 r = inputVector[number] * scalar;
347 else if (r < min_val)
349 outputVector[number] = (int32_t)
rintf(r);
357 #include <xmmintrin.h>
360 const float* inputVector,
362 unsigned int num_points)
364 unsigned int number = 0;
366 const unsigned int quarterPoints = num_points / 4;
368 const float* inputVectorPtr = (
const float*)inputVector;
369 int32_t* outputVectorPtr = outputVector;
371 float min_val = INT_MIN;
372 float max_val = INT_MAX;
382 for (; number < quarterPoints; number++) {
389 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[0]);
390 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[1]);
391 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[2]);
392 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[3]);
395 number = quarterPoints * 4;
396 for (; number < num_points; number++) {
397 r = inputVector[number] * scalar;
400 else if (r < min_val)
402 outputVector[number] = (int32_t)
rintf(r);
409 #ifdef LV_HAVE_GENERIC
412 const float* inputVector,
414 unsigned int num_points)
static float rintf(float x)
Definition: config.h:45
FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a)
Definition: sse2neon.h:5937
float32x4_t __m128
Definition: sse2neon.h:235
FORCE_INLINE __m128i _mm_cvtps_epi32(__m128)
Definition: sse2neon.h:4036
FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2205
FORCE_INLINE __m128 _mm_set_ps1(float)
Definition: sse2neon.h:2437
FORCE_INLINE __m128 _mm_loadu_ps(const float *p)
Definition: sse2neon.h:1941
FORCE_INLINE void _mm_storeu_si128(__m128i *p, __m128i a)
Definition: sse2neon.h:6010
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858
int64x2_t __m128i
Definition: sse2neon.h:244
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition: sse2neon.h:2704
FORCE_INLINE __m128 _mm_min_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2080
FORCE_INLINE __m128 _mm_max_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2025
static void volk_32f_s32f_convert_32i_a_sse(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:359
static void volk_32f_s32f_convert_32i_a_avx(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:257
static void volk_32f_s32f_convert_32i_a_generic(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:411
static void volk_32f_s32f_convert_32i_a_sse2(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:308
static void volk_32f_s32f_convert_32i_u_sse(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:168
static void volk_32f_s32f_convert_32i_generic(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:220
static void volk_32f_s32f_convert_32i_u_avx(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:67
static void volk_32f_s32f_convert_32i_u_sse2(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:117
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:65