43 #ifndef INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a_H
44 #define INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a_H
52 #include <smmintrin.h>
55 volk_8ic_s32f_deinterleave_32f_x2_a_sse4_1(
float* iBuffer,
59 unsigned int num_points)
61 float* iBufferPtr = iBuffer;
62 float* qBufferPtr = qBuffer;
64 unsigned int number = 0;
65 const unsigned int eighthPoints = num_points / 8;
66 __m128 iFloatValue, qFloatValue;
68 const float iScalar = 1.0 / scalar;
70 __m128i complexVal, iIntVal, qIntVal, iComplexVal, qComplexVal;
71 int8_t* complexVectorPtr = (int8_t*)complexVector;
74 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
76 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 13, 11, 9, 7, 5, 3, 1);
78 for (; number < eighthPoints; number++) {
80 complexVectorPtr += 16;
86 iFloatValue =
_mm_mul_ps(iFloatValue, invScalar);
94 iFloatValue =
_mm_mul_ps(iFloatValue, invScalar);
100 qFloatValue =
_mm_mul_ps(qFloatValue, invScalar);
108 qFloatValue =
_mm_mul_ps(qFloatValue, invScalar);
114 number = eighthPoints * 8;
115 for (; number < num_points; number++) {
116 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
117 *qBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
124 #include <xmmintrin.h>
130 unsigned int num_points)
132 float* iBufferPtr = iBuffer;
133 float* qBufferPtr = qBuffer;
135 unsigned int number = 0;
136 const unsigned int quarterPoints = num_points / 4;
137 __m128 cplxValue1, cplxValue2, iValue, qValue;
140 int8_t* complexVectorPtr = (int8_t*)complexVector;
144 for (; number < quarterPoints; number++) {
145 floatBuffer[0] = (float)(complexVectorPtr[0]);
146 floatBuffer[1] = (float)(complexVectorPtr[1]);
147 floatBuffer[2] = (float)(complexVectorPtr[2]);
148 floatBuffer[3] = (float)(complexVectorPtr[3]);
150 floatBuffer[4] = (float)(complexVectorPtr[4]);
151 floatBuffer[5] = (float)(complexVectorPtr[5]);
152 floatBuffer[6] = (float)(complexVectorPtr[6]);
153 floatBuffer[7] = (float)(complexVectorPtr[7]);
158 complexVectorPtr += 8;
160 cplxValue1 =
_mm_mul_ps(cplxValue1, invScalar);
161 cplxValue2 =
_mm_mul_ps(cplxValue2, invScalar);
174 number = quarterPoints * 4;
175 complexVectorPtr = (int8_t*)&complexVector[number];
176 for (; number < num_points; number++) {
177 *iBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
178 *qBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
185 #include <immintrin.h>
187 static inline void volk_8ic_s32f_deinterleave_32f_x2_a_avx2(
float* iBuffer,
191 unsigned int num_points)
193 float* iBufferPtr = iBuffer;
194 float* qBufferPtr = qBuffer;
196 unsigned int number = 0;
197 const unsigned int sixteenthPoints = num_points / 16;
198 __m256 iFloatValue, qFloatValue;
200 const float iScalar = 1.0 / scalar;
201 __m256 invScalar = _mm256_set1_ps(iScalar);
202 __m256i complexVal, iIntVal, qIntVal, iComplexVal, qComplexVal;
203 int8_t* complexVectorPtr = (int8_t*)complexVector;
205 __m256i iMoveMask = _mm256_set_epi8(0x80,
237 __m256i qMoveMask = _mm256_set_epi8(0x80,
270 for (; number < sixteenthPoints; number++) {
271 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
272 complexVectorPtr += 32;
273 iComplexVal = _mm256_shuffle_epi8(complexVal, iMoveMask);
274 qComplexVal = _mm256_shuffle_epi8(complexVal, qMoveMask);
276 iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(iComplexVal));
277 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
278 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
279 _mm256_store_ps(iBufferPtr, iFloatValue);
282 iComplexVal = _mm256_permute4x64_epi64(iComplexVal, 0b11000110);
283 iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(iComplexVal));
284 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
285 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
286 _mm256_store_ps(iBufferPtr, iFloatValue);
289 qIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(qComplexVal));
290 qFloatValue = _mm256_cvtepi32_ps(qIntVal);
291 qFloatValue = _mm256_mul_ps(qFloatValue, invScalar);
292 _mm256_store_ps(qBufferPtr, qFloatValue);
295 qComplexVal = _mm256_permute4x64_epi64(qComplexVal, 0b11000110);
296 qIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(qComplexVal));
297 qFloatValue = _mm256_cvtepi32_ps(qIntVal);
298 qFloatValue = _mm256_mul_ps(qFloatValue, invScalar);
299 _mm256_store_ps(qBufferPtr, qFloatValue);
303 number = sixteenthPoints * 16;
304 for (; number < num_points; number++) {
305 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
306 *qBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
312 #ifdef LV_HAVE_GENERIC
319 unsigned int num_points)
321 const int8_t* complexVectorPtr = (
const int8_t*)complexVector;
322 float* iBufferPtr = iBuffer;
323 float* qBufferPtr = qBuffer;
325 const float invScalar = 1.0 / scalar;
326 for (number = 0; number < num_points; number++) {
327 *iBufferPtr++ = (float)(*complexVectorPtr++) * invScalar;
328 *qBufferPtr++ = (float)(*complexVectorPtr++) * invScalar;
337 #ifndef INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_u_H
338 #define INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_u_H
340 #include <inttypes.h>
345 #include <immintrin.h>
347 static inline void volk_8ic_s32f_deinterleave_32f_x2_u_avx2(
float* iBuffer,
351 unsigned int num_points)
353 float* iBufferPtr = iBuffer;
354 float* qBufferPtr = qBuffer;
356 unsigned int number = 0;
357 const unsigned int sixteenthPoints = num_points / 16;
358 __m256 iFloatValue, qFloatValue;
360 const float iScalar = 1.0 / scalar;
361 __m256 invScalar = _mm256_set1_ps(iScalar);
362 __m256i complexVal, iIntVal, qIntVal;
363 __m128i iComplexVal, qComplexVal;
364 int8_t* complexVectorPtr = (int8_t*)complexVector;
366 __m256i MoveMask = _mm256_set_epi8(15,
399 for (; number < sixteenthPoints; number++) {
400 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
401 complexVectorPtr += 32;
402 complexVal = _mm256_shuffle_epi8(complexVal, MoveMask);
403 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
404 iComplexVal = _mm256_extractf128_si256(complexVal, 0);
405 qComplexVal = _mm256_extractf128_si256(complexVal, 1);
407 iIntVal = _mm256_cvtepi8_epi32(iComplexVal);
408 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
409 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
410 _mm256_storeu_ps(iBufferPtr, iFloatValue);
413 qIntVal = _mm256_cvtepi8_epi32(qComplexVal);
414 qFloatValue = _mm256_cvtepi32_ps(qIntVal);
415 qFloatValue = _mm256_mul_ps(qFloatValue, invScalar);
416 _mm256_storeu_ps(qBufferPtr, qFloatValue);
419 complexVal = _mm256_srli_si256(complexVal, 8);
420 iComplexVal = _mm256_extractf128_si256(complexVal, 0);
421 qComplexVal = _mm256_extractf128_si256(complexVal, 1);
423 iIntVal = _mm256_cvtepi8_epi32(iComplexVal);
424 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
425 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
426 _mm256_storeu_ps(iBufferPtr, iFloatValue);
429 qIntVal = _mm256_cvtepi8_epi32(qComplexVal);
430 qFloatValue = _mm256_cvtepi32_ps(qIntVal);
431 qFloatValue = _mm256_mul_ps(qFloatValue, invScalar);
432 _mm256_storeu_ps(qBufferPtr, qFloatValue);
436 number = sixteenthPoints * 16;
437 for (; number < num_points; number++) {
438 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
439 *qBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
float32x4_t __m128
Definition: sse2neon.h:235
FORCE_INLINE __m128i _mm_set_epi8(signed char b15, signed char b14, signed char b13, signed char b12, signed char b11, signed char b10, signed char b9, signed char b8, signed char b7, signed char b6, signed char b5, signed char b4, signed char b3, signed char b2, signed char b1, signed char b0)
Definition: sse2neon.h:5140
#define _mm_shuffle_ps(a, b, imm)
Definition: sse2neon.h:2586
FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2205
FORCE_INLINE __m128 _mm_set_ps1(float)
Definition: sse2neon.h:2437
FORCE_INLINE __m128i _mm_shuffle_epi8(__m128i a, __m128i b)
Definition: sse2neon.h:7069
FORCE_INLINE __m128i _mm_cvtepi8_epi32(__m128i a)
Definition: sse2neon.h:7574
FORCE_INLINE __m128i _mm_load_si128(const __m128i *p)
Definition: sse2neon.h:4471
FORCE_INLINE __m128i _mm_srli_si128(__m128i a, int imm)
Definition: sse2neon.h:5885
#define _MM_SHUFFLE(fp3, fp2, fp1, fp0)
Definition: sse2neon.h:195
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858
int64x2_t __m128i
Definition: sse2neon.h:244
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition: sse2neon.h:2704
FORCE_INLINE __m128 _mm_cvtepi32_ps(__m128i a)
Definition: sse2neon.h:3937
static void volk_8ic_s32f_deinterleave_32f_x2_a_sse(float *iBuffer, float *qBuffer, const lv_8sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_8ic_s32f_deinterleave_32f_x2.h:126
static void volk_8ic_s32f_deinterleave_32f_x2_generic(float *iBuffer, float *qBuffer, const lv_8sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_8ic_s32f_deinterleave_32f_x2.h:315
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:65
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:70