42 #ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H
43 #define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H
50 #include <immintrin.h>
53 volk_8ic_s32f_deinterleave_real_32f_a_avx2(
float* iBuffer,
56 unsigned int num_points)
58 float* iBufferPtr = iBuffer;
60 unsigned int number = 0;
61 const unsigned int sixteenthPoints = num_points / 16;
64 const float iScalar = 1.0 / scalar;
65 __m256 invScalar = _mm256_set1_ps(iScalar);
66 __m256i complexVal, iIntVal;
67 int8_t* complexVectorPtr = (int8_t*)complexVector;
69 __m256i moveMask = _mm256_set_epi8(0x80,
101 for (; number < sixteenthPoints; number++) {
102 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
103 complexVectorPtr += 32;
104 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
106 iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(complexVal));
107 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
108 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
109 _mm256_store_ps(iBufferPtr, iFloatValue);
112 complexVal = _mm256_permute4x64_epi64(complexVal, 0b11000110);
113 iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(complexVal));
114 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
115 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
116 _mm256_store_ps(iBufferPtr, iFloatValue);
120 number = sixteenthPoints * 16;
121 for (; number < num_points; number++) {
122 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
129 #ifdef LV_HAVE_SSE4_1
130 #include <smmintrin.h>
133 volk_8ic_s32f_deinterleave_real_32f_a_sse4_1(
float* iBuffer,
136 unsigned int num_points)
138 float* iBufferPtr = iBuffer;
140 unsigned int number = 0;
141 const unsigned int eighthPoints = num_points / 8;
144 const float iScalar = 1.0 / scalar;
147 int8_t* complexVectorPtr = (int8_t*)complexVector;
150 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
152 for (; number < eighthPoints; number++) {
154 complexVectorPtr += 16;
160 iFloatValue =
_mm_mul_ps(iFloatValue, invScalar);
170 iFloatValue =
_mm_mul_ps(iFloatValue, invScalar);
177 number = eighthPoints * 8;
178 for (; number < num_points; number++) {
179 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
187 #include <xmmintrin.h>
193 unsigned int num_points)
195 float* iBufferPtr = iBuffer;
197 unsigned int number = 0;
198 const unsigned int quarterPoints = num_points / 4;
201 const float iScalar = 1.0 / scalar;
203 int8_t* complexVectorPtr = (int8_t*)complexVector;
207 for (; number < quarterPoints; number++) {
208 floatBuffer[0] = (float)(*complexVectorPtr);
209 complexVectorPtr += 2;
210 floatBuffer[1] = (float)(*complexVectorPtr);
211 complexVectorPtr += 2;
212 floatBuffer[2] = (float)(*complexVectorPtr);
213 complexVectorPtr += 2;
214 floatBuffer[3] = (float)(*complexVectorPtr);
215 complexVectorPtr += 2;
226 number = quarterPoints * 4;
227 for (; number < num_points; number++) {
228 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
235 #ifdef LV_HAVE_GENERIC
241 unsigned int num_points)
243 unsigned int number = 0;
244 const int8_t* complexVectorPtr = (
const int8_t*)complexVector;
245 float* iBufferPtr = iBuffer;
246 const float invScalar = 1.0 / scalar;
247 for (number = 0; number < num_points; number++) {
248 *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar;
257 #ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H
258 #define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H
260 #include <inttypes.h>
265 #include <immintrin.h>
268 volk_8ic_s32f_deinterleave_real_32f_u_avx2(
float* iBuffer,
271 unsigned int num_points)
273 float* iBufferPtr = iBuffer;
275 unsigned int number = 0;
276 const unsigned int sixteenthPoints = num_points / 16;
279 const float iScalar = 1.0 / scalar;
280 __m256 invScalar = _mm256_set1_ps(iScalar);
281 __m256i complexVal, iIntVal;
283 int8_t* complexVectorPtr = (int8_t*)complexVector;
285 __m256i moveMask = _mm256_set_epi8(0x80,
318 for (; number < sixteenthPoints; number++) {
319 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
320 complexVectorPtr += 32;
321 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
323 hcomplexVal = _mm256_extracti128_si256(complexVal, 0);
324 iIntVal = _mm256_cvtepi8_epi32(hcomplexVal);
325 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
327 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
329 _mm256_storeu_ps(iBufferPtr, iFloatValue);
333 hcomplexVal = _mm256_extracti128_si256(complexVal, 1);
334 iIntVal = _mm256_cvtepi8_epi32(hcomplexVal);
335 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
337 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
339 _mm256_storeu_ps(iBufferPtr, iFloatValue);
344 number = sixteenthPoints * 16;
345 for (; number < num_points; number++) {
346 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
float32x4_t __m128
Definition: sse2neon.h:235
FORCE_INLINE __m128i _mm_set_epi8(signed char b15, signed char b14, signed char b13, signed char b12, signed char b11, signed char b10, signed char b9, signed char b8, signed char b7, signed char b6, signed char b5, signed char b4, signed char b3, signed char b2, signed char b1, signed char b0)
Definition: sse2neon.h:5140
FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2205
FORCE_INLINE __m128 _mm_set_ps1(float)
Definition: sse2neon.h:2437
FORCE_INLINE __m128i _mm_shuffle_epi8(__m128i a, __m128i b)
Definition: sse2neon.h:7069
FORCE_INLINE __m128i _mm_cvtepi8_epi32(__m128i a)
Definition: sse2neon.h:7574
FORCE_INLINE __m128i _mm_load_si128(const __m128i *p)
Definition: sse2neon.h:4471
FORCE_INLINE __m128i _mm_srli_si128(__m128i a, int imm)
Definition: sse2neon.h:5885
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858
int64x2_t __m128i
Definition: sse2neon.h:244
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition: sse2neon.h:2704
FORCE_INLINE __m128 _mm_cvtepi32_ps(__m128i a)
Definition: sse2neon.h:3937
static void volk_8ic_s32f_deinterleave_real_32f_generic(float *iBuffer, const lv_8sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_8ic_s32f_deinterleave_real_32f.h:238
static void volk_8ic_s32f_deinterleave_real_32f_a_sse(float *iBuffer, const lv_8sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_8ic_s32f_deinterleave_real_32f.h:190
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:65
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:70