43 #ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H
44 #define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H
51 #include <immintrin.h>
54 volk_16ic_s32f_deinterleave_real_32f_a_avx2(
float* iBuffer,
57 unsigned int num_points)
59 float* iBufferPtr = iBuffer;
61 unsigned int number = 0;
62 const unsigned int eighthPoints = num_points / 8;
66 const float iScalar = 1.0 / scalar;
67 __m256 invScalar = _mm256_set1_ps(iScalar);
68 __m256i complexVal, iIntVal;
70 int8_t* complexVectorPtr = (int8_t*)complexVector;
72 __m256i moveMask = _mm256_set_epi8(0x80,
105 for (; number < eighthPoints; number++) {
106 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
107 complexVectorPtr += 32;
108 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
109 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
110 complexVal128 = _mm256_extracti128_si256(complexVal, 0);
112 iIntVal = _mm256_cvtepi16_epi32(complexVal128);
113 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
115 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
117 _mm256_store_ps(iBufferPtr, iFloatValue);
122 number = eighthPoints * 8;
123 int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
124 for (; number < num_points; number++) {
125 *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
126 sixteenTComplexVectorPtr++;
131 #ifdef LV_HAVE_SSE4_1
132 #include <smmintrin.h>
135 volk_16ic_s32f_deinterleave_real_32f_a_sse4_1(
float* iBuffer,
138 unsigned int num_points)
140 float* iBufferPtr = iBuffer;
142 unsigned int number = 0;
143 const unsigned int quarterPoints = num_points / 4;
147 const float iScalar = 1.0 / scalar;
150 int8_t* complexVectorPtr = (int8_t*)complexVector;
153 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
155 for (; number < quarterPoints; number++) {
157 complexVectorPtr += 16;
163 iFloatValue =
_mm_mul_ps(iFloatValue, invScalar);
170 number = quarterPoints * 4;
171 int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
172 for (; number < num_points; number++) {
173 *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
174 sixteenTComplexVectorPtr++;
180 #include <xmmintrin.h>
186 unsigned int num_points)
188 float* iBufferPtr = iBuffer;
190 unsigned int number = 0;
191 const unsigned int quarterPoints = num_points / 4;
194 const float iScalar = 1.0 / scalar;
196 int16_t* complexVectorPtr = (int16_t*)complexVector;
200 for (; number < quarterPoints; number++) {
201 floatBuffer[0] = (float)(*complexVectorPtr);
202 complexVectorPtr += 2;
203 floatBuffer[1] = (float)(*complexVectorPtr);
204 complexVectorPtr += 2;
205 floatBuffer[2] = (float)(*complexVectorPtr);
206 complexVectorPtr += 2;
207 floatBuffer[3] = (float)(*complexVectorPtr);
208 complexVectorPtr += 2;
219 number = quarterPoints * 4;
220 complexVectorPtr = (int16_t*)&complexVector[number];
221 for (; number < num_points; number++) {
222 *iBufferPtr++ = ((float)(*complexVectorPtr++)) * iScalar;
228 #ifdef LV_HAVE_GENERIC
233 unsigned int num_points)
235 unsigned int number = 0;
236 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
237 float* iBufferPtr = iBuffer;
238 const float invScalar = 1.0 / scalar;
239 for (number = 0; number < num_points; number++) {
240 *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar;
249 #ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_u_H
250 #define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_u_H
252 #include <inttypes.h>
257 #include <immintrin.h>
260 volk_16ic_s32f_deinterleave_real_32f_u_avx2(
float* iBuffer,
263 unsigned int num_points)
265 float* iBufferPtr = iBuffer;
267 unsigned int number = 0;
268 const unsigned int eighthPoints = num_points / 8;
272 const float iScalar = 1.0 / scalar;
273 __m256 invScalar = _mm256_set1_ps(iScalar);
274 __m256i complexVal, iIntVal;
276 int8_t* complexVectorPtr = (int8_t*)complexVector;
278 __m256i moveMask = _mm256_set_epi8(0x80,
311 for (; number < eighthPoints; number++) {
312 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
313 complexVectorPtr += 32;
314 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
315 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
316 complexVal128 = _mm256_extracti128_si256(complexVal, 0);
318 iIntVal = _mm256_cvtepi16_epi32(complexVal128);
319 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
321 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
323 _mm256_storeu_ps(iBufferPtr, iFloatValue);
328 number = eighthPoints * 8;
329 int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
330 for (; number < num_points; number++) {
331 *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
332 sixteenTComplexVectorPtr++;
float32x4_t __m128
Definition: sse2neon.h:235
FORCE_INLINE __m128i _mm_set_epi8(signed char b15, signed char b14, signed char b13, signed char b12, signed char b11, signed char b10, signed char b9, signed char b8, signed char b7, signed char b6, signed char b5, signed char b4, signed char b3, signed char b2, signed char b1, signed char b0)
Definition: sse2neon.h:5140
FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2205
FORCE_INLINE __m128 _mm_set_ps1(float)
Definition: sse2neon.h:2437
FORCE_INLINE __m128i _mm_shuffle_epi8(__m128i a, __m128i b)
Definition: sse2neon.h:7069
FORCE_INLINE __m128i _mm_cvtepi16_epi32(__m128i a)
Definition: sse2neon.h:7539
FORCE_INLINE __m128i _mm_load_si128(const __m128i *p)
Definition: sse2neon.h:4471
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858
int64x2_t __m128i
Definition: sse2neon.h:244
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition: sse2neon.h:2704
FORCE_INLINE __m128 _mm_cvtepi32_ps(__m128i a)
Definition: sse2neon.h:3937
static void volk_16ic_s32f_deinterleave_real_32f_generic(float *iBuffer, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_deinterleave_real_32f.h:230
static void volk_16ic_s32f_deinterleave_real_32f_a_sse(float *iBuffer, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_deinterleave_real_32f.h:183
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:65
short complex lv_16sc_t
Definition: volk_complex.h:71