41 #ifndef INCLUDED_volk_16ic_deinterleave_real_16i_a_H
42 #define INCLUDED_volk_16ic_deinterleave_real_16i_a_H
49 #include <immintrin.h>
51 static inline void volk_16ic_deinterleave_real_16i_a_avx2(int16_t* iBuffer,
53 unsigned int num_points)
55 unsigned int number = 0;
56 const int16_t* complexVectorPtr = (int16_t*)complexVector;
57 int16_t* iBufferPtr = iBuffer;
59 __m256i iMoveMask1 = _mm256_set_epi8(0x80,
91 __m256i iMoveMask2 = _mm256_set_epi8(13,
124 __m256i complexVal1, complexVal2, iOutputVal;
126 unsigned int sixteenthPoints = num_points / 16;
128 for (number = 0; number < sixteenthPoints; number++) {
129 complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr);
130 complexVectorPtr += 16;
131 complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr);
132 complexVectorPtr += 16;
134 complexVal1 = _mm256_shuffle_epi8(complexVal1, iMoveMask1);
135 complexVal2 = _mm256_shuffle_epi8(complexVal2, iMoveMask2);
137 iOutputVal = _mm256_or_si256(complexVal1, complexVal2);
138 iOutputVal = _mm256_permute4x64_epi64(iOutputVal, 0xd8);
140 _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal);
145 number = sixteenthPoints * 16;
146 for (; number < num_points; number++) {
147 *iBufferPtr++ = *complexVectorPtr++;
154 #include <tmmintrin.h>
158 unsigned int num_points)
160 unsigned int number = 0;
161 const int16_t* complexVectorPtr = (int16_t*)complexVector;
162 int16_t* iBufferPtr = iBuffer;
165 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
167 13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
169 __m128i complexVal1, complexVal2, iOutputVal;
171 unsigned int eighthPoints = num_points / 8;
173 for (number = 0; number < eighthPoints; number++) {
175 complexVectorPtr += 8;
177 complexVectorPtr += 8;
189 number = eighthPoints * 8;
190 for (; number < num_points; number++) {
191 *iBufferPtr++ = *complexVectorPtr++;
199 #include <emmintrin.h>
203 unsigned int num_points)
205 unsigned int number = 0;
206 const int16_t* complexVectorPtr = (int16_t*)complexVector;
207 int16_t* iBufferPtr = iBuffer;
208 __m128i complexVal1, complexVal2, iOutputVal;
212 unsigned int eighthPoints = num_points / 8;
214 for (number = 0; number < eighthPoints; number++) {
216 complexVectorPtr += 8;
218 complexVectorPtr += 8;
240 number = eighthPoints * 8;
241 for (; number < num_points; number++) {
242 *iBufferPtr++ = *complexVectorPtr++;
248 #ifdef LV_HAVE_GENERIC
252 unsigned int num_points)
254 unsigned int number = 0;
255 const int16_t* complexVectorPtr = (int16_t*)complexVector;
256 int16_t* iBufferPtr = iBuffer;
257 for (number = 0; number < num_points; number++) {
258 *iBufferPtr++ = *complexVectorPtr++;
268 #ifndef INCLUDED_volk_16ic_deinterleave_real_16i_u_H
269 #define INCLUDED_volk_16ic_deinterleave_real_16i_u_H
271 #include <inttypes.h>
276 #include <immintrin.h>
278 static inline void volk_16ic_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
280 unsigned int num_points)
282 unsigned int number = 0;
283 const int16_t* complexVectorPtr = (int16_t*)complexVector;
284 int16_t* iBufferPtr = iBuffer;
286 __m256i iMoveMask1 = _mm256_set_epi8(0x80,
318 __m256i iMoveMask2 = _mm256_set_epi8(13,
351 __m256i complexVal1, complexVal2, iOutputVal;
353 unsigned int sixteenthPoints = num_points / 16;
355 for (number = 0; number < sixteenthPoints; number++) {
356 complexVal1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
357 complexVectorPtr += 16;
358 complexVal2 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
359 complexVectorPtr += 16;
361 complexVal1 = _mm256_shuffle_epi8(complexVal1, iMoveMask1);
362 complexVal2 = _mm256_shuffle_epi8(complexVal2, iMoveMask2);
364 iOutputVal = _mm256_or_si256(complexVal1, complexVal2);
365 iOutputVal = _mm256_permute4x64_epi64(iOutputVal, 0xd8);
367 _mm256_storeu_si256((__m256i*)iBufferPtr, iOutputVal);
372 number = sixteenthPoints * 16;
373 for (; number < num_points; number++) {
374 *iBufferPtr++ = *complexVectorPtr++;
FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a)
Definition: sse2neon.h:5937
FORCE_INLINE __m128i _mm_set_epi8(signed char b15, signed char b14, signed char b13, signed char b12, signed char b11, signed char b10, signed char b9, signed char b8, signed char b7, signed char b6, signed char b5, signed char b4, signed char b3, signed char b2, signed char b1, signed char b0)
Definition: sse2neon.h:5140
FORCE_INLINE __m128i _mm_set_epi32(int, int, int, int)
Definition: sse2neon.h:5115
FORCE_INLINE __m128i _mm_and_si128(__m128i, __m128i)
Definition: sse2neon.h:3128
FORCE_INLINE __m128i _mm_shuffle_epi8(__m128i a, __m128i b)
Definition: sse2neon.h:7069
FORCE_INLINE __m128i _mm_load_si128(const __m128i *p)
Definition: sse2neon.h:4471
#define _mm_shufflelo_epi16(a, imm)
Definition: sse2neon.h:5459
FORCE_INLINE __m128i _mm_or_si128(__m128i, __m128i)
Definition: sse2neon.h:5021
#define _mm_shufflehi_epi16(a, imm)
Definition: sse2neon.h:5444
#define _MM_SHUFFLE(fp3, fp2, fp1, fp0)
Definition: sse2neon.h:195
int64x2_t __m128i
Definition: sse2neon.h:244
#define _mm_shuffle_epi32(a, imm)
Definition: sse2neon.h:5358
static void volk_16ic_deinterleave_real_16i_generic(int16_t *iBuffer, const lv_16sc_t *complexVector, unsigned int num_points)
Definition: volk_16ic_deinterleave_real_16i.h:250
static void volk_16ic_deinterleave_real_16i_a_sse2(int16_t *iBuffer, const lv_16sc_t *complexVector, unsigned int num_points)
Definition: volk_16ic_deinterleave_real_16i.h:201
static void volk_16ic_deinterleave_real_16i_a_ssse3(int16_t *iBuffer, const lv_16sc_t *complexVector, unsigned int num_points)
Definition: volk_16ic_deinterleave_real_16i.h:156
short complex lv_16sc_t
Definition: volk_complex.h:71