41 #ifndef INCLUDED_volk_8ic_deinterleave_16i_x2_a_H
42 #define INCLUDED_volk_8ic_deinterleave_16i_x2_a_H
48 #include <immintrin.h>
50 static inline void volk_8ic_deinterleave_16i_x2_a_avx2(int16_t* iBuffer,
53 unsigned int num_points)
55 unsigned int number = 0;
56 const int8_t* complexVectorPtr = (int8_t*)complexVector;
57 int16_t* iBufferPtr = iBuffer;
58 int16_t* qBufferPtr = qBuffer;
59 __m256i MoveMask = _mm256_set_epi8(15,
91 __m256i complexVal, iOutputVal, qOutputVal;
92 __m128i iOutputVal0, qOutputVal0;
94 unsigned int sixteenthPoints = num_points / 16;
96 for (number = 0; number < sixteenthPoints; number++) {
97 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
98 complexVectorPtr += 32;
100 complexVal = _mm256_shuffle_epi8(complexVal, MoveMask);
101 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
103 iOutputVal0 = _mm256_extracti128_si256(complexVal, 0);
104 qOutputVal0 = _mm256_extracti128_si256(complexVal, 1);
106 iOutputVal = _mm256_cvtepi8_epi16(iOutputVal0);
107 iOutputVal = _mm256_slli_epi16(iOutputVal, 8);
109 qOutputVal = _mm256_cvtepi8_epi16(qOutputVal0);
110 qOutputVal = _mm256_slli_epi16(qOutputVal, 8);
112 _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal);
113 _mm256_store_si256((__m256i*)qBufferPtr, qOutputVal);
119 number = sixteenthPoints * 16;
120 for (; number < num_points; number++) {
122 ((int16_t)*complexVectorPtr++) *
124 *qBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
129 #ifdef LV_HAVE_SSE4_1
130 #include <smmintrin.h>
132 static inline void volk_8ic_deinterleave_16i_x2_a_sse4_1(int16_t* iBuffer,
135 unsigned int num_points)
137 unsigned int number = 0;
138 const int8_t* complexVectorPtr = (int8_t*)complexVector;
139 int16_t* iBufferPtr = iBuffer;
140 int16_t* qBufferPtr = qBuffer;
158 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 13, 11, 9, 7, 5, 3, 1);
159 __m128i complexVal, iOutputVal, qOutputVal;
161 unsigned int eighthPoints = num_points / 8;
163 for (number = 0; number < eighthPoints; number++) {
165 complexVectorPtr += 16;
187 number = eighthPoints * 8;
188 for (; number < num_points; number++) {
190 ((int16_t)*complexVectorPtr++) *
192 *qBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
199 #include <immintrin.h>
204 unsigned int num_points)
206 unsigned int number = 0;
207 const int8_t* complexVectorPtr = (int8_t*)complexVector;
208 int16_t* iBufferPtr = iBuffer;
209 int16_t* qBufferPtr = qBuffer;
227 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 13, 11, 9, 7, 5, 3, 1);
228 __m256i complexVal, iOutputVal, qOutputVal;
229 __m128i complexVal1, complexVal0;
230 __m128i iOutputVal1, iOutputVal0, qOutputVal1, qOutputVal0;
232 unsigned int sixteenthPoints = num_points / 16;
234 for (number = 0; number < sixteenthPoints; number++) {
235 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
236 complexVectorPtr += 32;
239 complexVal1 = _mm256_extractf128_si256(complexVal, 1);
240 complexVal0 = _mm256_extractf128_si256(complexVal, 0);
243 complexVal1, iMoveMask);
263 __m256i dummy = _mm256_setzero_si256();
264 iOutputVal = _mm256_insertf128_si256(dummy, iOutputVal0, 0);
265 iOutputVal = _mm256_insertf128_si256(iOutputVal, iOutputVal1, 1);
266 qOutputVal = _mm256_insertf128_si256(dummy, qOutputVal0, 0);
267 qOutputVal = _mm256_insertf128_si256(qOutputVal, qOutputVal1, 1);
269 _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal);
270 _mm256_store_si256((__m256i*)qBufferPtr, qOutputVal);
276 number = sixteenthPoints * 16;
277 for (; number < num_points; number++) {
279 ((int16_t)*complexVectorPtr++) *
281 *qBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
287 #ifdef LV_HAVE_GENERIC
292 unsigned int num_points)
294 const int8_t* complexVectorPtr = (
const int8_t*)complexVector;
295 int16_t* iBufferPtr = iBuffer;
296 int16_t* qBufferPtr = qBuffer;
298 for (number = 0; number < num_points; number++) {
299 *iBufferPtr++ = (int16_t)(*complexVectorPtr++) * 256;
300 *qBufferPtr++ = (int16_t)(*complexVectorPtr++) * 256;
308 #ifndef INCLUDED_volk_8ic_deinterleave_16i_x2_u_H
309 #define INCLUDED_volk_8ic_deinterleave_16i_x2_u_H
311 #include <inttypes.h>
315 #include <immintrin.h>
317 static inline void volk_8ic_deinterleave_16i_x2_u_avx2(int16_t* iBuffer,
320 unsigned int num_points)
322 unsigned int number = 0;
323 const int8_t* complexVectorPtr = (int8_t*)complexVector;
324 int16_t* iBufferPtr = iBuffer;
325 int16_t* qBufferPtr = qBuffer;
326 __m256i MoveMask = _mm256_set_epi8(15,
358 __m256i complexVal, iOutputVal, qOutputVal;
359 __m128i iOutputVal0, qOutputVal0;
361 unsigned int sixteenthPoints = num_points / 16;
363 for (number = 0; number < sixteenthPoints; number++) {
364 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
365 complexVectorPtr += 32;
367 complexVal = _mm256_shuffle_epi8(complexVal, MoveMask);
368 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
370 iOutputVal0 = _mm256_extracti128_si256(complexVal, 0);
371 qOutputVal0 = _mm256_extracti128_si256(complexVal, 1);
373 iOutputVal = _mm256_cvtepi8_epi16(iOutputVal0);
374 iOutputVal = _mm256_slli_epi16(iOutputVal, 8);
376 qOutputVal = _mm256_cvtepi8_epi16(qOutputVal0);
377 qOutputVal = _mm256_slli_epi16(qOutputVal, 8);
379 _mm256_storeu_si256((__m256i*)iBufferPtr, iOutputVal);
380 _mm256_storeu_si256((__m256i*)qBufferPtr, qOutputVal);
386 number = sixteenthPoints * 16;
387 for (; number < num_points; number++) {
389 ((int16_t)*complexVectorPtr++) *
391 *qBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a)
Definition: sse2neon.h:5937
FORCE_INLINE __m128i _mm_set_epi8(signed char b15, signed char b14, signed char b13, signed char b12, signed char b11, signed char b10, signed char b9, signed char b8, signed char b7, signed char b6, signed char b5, signed char b4, signed char b3, signed char b2, signed char b1, signed char b0)
Definition: sse2neon.h:5140
FORCE_INLINE __m128i _mm_shuffle_epi8(__m128i a, __m128i b)
Definition: sse2neon.h:7069
FORCE_INLINE __m128i _mm_load_si128(const __m128i *p)
Definition: sse2neon.h:4471
FORCE_INLINE __m128i _mm_slli_epi16(__m128i a, int imm)
Definition: sse2neon.h:5544
FORCE_INLINE __m128i _mm_cvtepi8_epi16(__m128i a)
Definition: sse2neon.h:7565
int64x2_t __m128i
Definition: sse2neon.h:244
static void volk_8ic_deinterleave_16i_x2_generic(int16_t *iBuffer, int16_t *qBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_16i_x2.h:289
static void volk_8ic_deinterleave_16i_x2_a_avx(int16_t *iBuffer, int16_t *qBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_16i_x2.h:201
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:70