41 #ifndef INCLUDED_volk_16ic_deinterleave_16i_x2_a_H
42 #define INCLUDED_volk_16ic_deinterleave_16i_x2_a_H
47 #include <immintrin.h>
49 static inline void volk_16ic_deinterleave_16i_x2_a_avx2(int16_t* iBuffer,
52 unsigned int num_points)
54 unsigned int number = 0;
55 const int8_t* complexVectorPtr = (int8_t*)complexVector;
56 int16_t* iBufferPtr = iBuffer;
57 int16_t* qBufferPtr = qBuffer;
59 __m256i MoveMask = _mm256_set_epi8(15,
92 __m256i iMove2, iMove1;
93 __m256i complexVal1, complexVal2, iOutputVal, qOutputVal;
95 unsigned int sixteenthPoints = num_points / 16;
97 for (number = 0; number < sixteenthPoints; number++) {
98 complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr);
99 complexVectorPtr += 32;
100 complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr);
101 complexVectorPtr += 32;
103 iMove2 = _mm256_shuffle_epi8(complexVal2, MoveMask);
104 iMove1 = _mm256_shuffle_epi8(complexVal1, MoveMask);
106 iOutputVal = _mm256_permute2x128_si256(_mm256_permute4x64_epi64(iMove1, 0x08),
107 _mm256_permute4x64_epi64(iMove2, 0x80),
109 qOutputVal = _mm256_permute2x128_si256(_mm256_permute4x64_epi64(iMove1, 0x0d),
110 _mm256_permute4x64_epi64(iMove2, 0xd0),
113 _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal);
114 _mm256_store_si256((__m256i*)qBufferPtr, qOutputVal);
120 number = sixteenthPoints * 16;
121 int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
122 for (; number < num_points; number++) {
123 *iBufferPtr++ = *int16ComplexVectorPtr++;
124 *qBufferPtr++ = *int16ComplexVectorPtr++;
130 #include <tmmintrin.h>
135 unsigned int num_points)
137 unsigned int number = 0;
138 const int8_t* complexVectorPtr = (int8_t*)complexVector;
139 int16_t* iBufferPtr = iBuffer;
140 int16_t* qBufferPtr = qBuffer;
143 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
145 13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
148 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 14, 11, 10, 7, 6, 3, 2);
150 15, 14, 11, 10, 7, 6, 3, 2, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
152 __m128i complexVal1, complexVal2, iOutputVal, qOutputVal;
154 unsigned int eighthPoints = num_points / 8;
156 for (number = 0; number < eighthPoints; number++) {
158 complexVectorPtr += 16;
160 complexVectorPtr += 16;
174 number = eighthPoints * 8;
175 int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
176 for (; number < num_points; number++) {
177 *iBufferPtr++ = *int16ComplexVectorPtr++;
178 *qBufferPtr++ = *int16ComplexVectorPtr++;
184 #include <emmintrin.h>
189 unsigned int num_points)
191 unsigned int number = 0;
192 const int16_t* complexVectorPtr = (int16_t*)complexVector;
193 int16_t* iBufferPtr = iBuffer;
194 int16_t* qBufferPtr = qBuffer;
195 __m128i complexVal1, complexVal2, iComplexVal1, iComplexVal2, qComplexVal1,
196 qComplexVal2, iOutputVal, qOutputVal;
200 unsigned int eighthPoints = num_points / 8;
202 for (number = 0; number < eighthPoints; number++) {
204 complexVectorPtr += 8;
206 complexVectorPtr += 8;
246 number = eighthPoints * 8;
247 for (; number < num_points; number++) {
248 *iBufferPtr++ = *complexVectorPtr++;
249 *qBufferPtr++ = *complexVectorPtr++;
254 #ifdef LV_HAVE_GENERIC
259 unsigned int num_points)
261 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
262 int16_t* iBufferPtr = iBuffer;
263 int16_t* qBufferPtr = qBuffer;
265 for (number = 0; number < num_points; number++) {
266 *iBufferPtr++ = *complexVectorPtr++;
267 *qBufferPtr++ = *complexVectorPtr++;
274 extern void volk_16ic_deinterleave_16i_x2_a_orc_impl(int16_t* iBuffer,
277 unsigned int num_points);
278 static inline void volk_16ic_deinterleave_16i_x2_u_orc(int16_t* iBuffer,
281 unsigned int num_points)
283 volk_16ic_deinterleave_16i_x2_a_orc_impl(iBuffer, qBuffer, complexVector, num_points);
290 #ifndef INCLUDED_volk_16ic_deinterleave_16i_x2_u_H
291 #define INCLUDED_volk_16ic_deinterleave_16i_x2_u_H
293 #include <inttypes.h>
296 #include <immintrin.h>
298 static inline void volk_16ic_deinterleave_16i_x2_u_avx2(int16_t* iBuffer,
301 unsigned int num_points)
303 unsigned int number = 0;
304 const int8_t* complexVectorPtr = (int8_t*)complexVector;
305 int16_t* iBufferPtr = iBuffer;
306 int16_t* qBufferPtr = qBuffer;
308 __m256i MoveMask = _mm256_set_epi8(15,
341 __m256i iMove2, iMove1;
342 __m256i complexVal1, complexVal2, iOutputVal, qOutputVal;
344 unsigned int sixteenthPoints = num_points / 16;
346 for (number = 0; number < sixteenthPoints; number++) {
347 complexVal1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
348 complexVectorPtr += 32;
349 complexVal2 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
350 complexVectorPtr += 32;
352 iMove2 = _mm256_shuffle_epi8(complexVal2, MoveMask);
353 iMove1 = _mm256_shuffle_epi8(complexVal1, MoveMask);
355 iOutputVal = _mm256_permute2x128_si256(_mm256_permute4x64_epi64(iMove1, 0x08),
356 _mm256_permute4x64_epi64(iMove2, 0x80),
358 qOutputVal = _mm256_permute2x128_si256(_mm256_permute4x64_epi64(iMove1, 0x0d),
359 _mm256_permute4x64_epi64(iMove2, 0xd0),
362 _mm256_storeu_si256((__m256i*)iBufferPtr, iOutputVal);
363 _mm256_storeu_si256((__m256i*)qBufferPtr, qOutputVal);
369 number = sixteenthPoints * 16;
370 int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
371 for (; number < num_points; number++) {
372 *iBufferPtr++ = *int16ComplexVectorPtr++;
373 *qBufferPtr++ = *int16ComplexVectorPtr++;
FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a)
Definition: sse2neon.h:5937
FORCE_INLINE __m128i _mm_set_epi8(signed char b15, signed char b14, signed char b13, signed char b12, signed char b11, signed char b10, signed char b9, signed char b8, signed char b7, signed char b6, signed char b5, signed char b4, signed char b3, signed char b2, signed char b1, signed char b0)
Definition: sse2neon.h:5140
FORCE_INLINE __m128i _mm_set_epi32(int, int, int, int)
Definition: sse2neon.h:5115
FORCE_INLINE __m128i _mm_and_si128(__m128i, __m128i)
Definition: sse2neon.h:3128
FORCE_INLINE __m128i _mm_shuffle_epi8(__m128i a, __m128i b)
Definition: sse2neon.h:7069
FORCE_INLINE __m128i _mm_load_si128(const __m128i *p)
Definition: sse2neon.h:4471
#define _mm_shufflelo_epi16(a, imm)
Definition: sse2neon.h:5459
FORCE_INLINE __m128i _mm_or_si128(__m128i, __m128i)
Definition: sse2neon.h:5021
#define _mm_shufflehi_epi16(a, imm)
Definition: sse2neon.h:5444
#define _MM_SHUFFLE(fp3, fp2, fp1, fp0)
Definition: sse2neon.h:195
int64x2_t __m128i
Definition: sse2neon.h:244
#define _mm_shuffle_epi32(a, imm)
Definition: sse2neon.h:5358
static void volk_16ic_deinterleave_16i_x2_generic(int16_t *iBuffer, int16_t *qBuffer, const lv_16sc_t *complexVector, unsigned int num_points)
Definition: volk_16ic_deinterleave_16i_x2.h:256
static void volk_16ic_deinterleave_16i_x2_a_sse2(int16_t *iBuffer, int16_t *qBuffer, const lv_16sc_t *complexVector, unsigned int num_points)
Definition: volk_16ic_deinterleave_16i_x2.h:186
static void volk_16ic_deinterleave_16i_x2_a_ssse3(int16_t *iBuffer, int16_t *qBuffer, const lv_16sc_t *complexVector, unsigned int num_points)
Definition: volk_16ic_deinterleave_16i_x2.h:132
short complex lv_16sc_t
Definition: volk_complex.h:71