40 #ifndef INCLUDED_volk_8ic_deinterleave_real_16i_a_H
41 #define INCLUDED_volk_8ic_deinterleave_real_16i_a_H
48 #include <immintrin.h>
50 static inline void volk_8ic_deinterleave_real_16i_a_avx2(int16_t* iBuffer,
52 unsigned int num_points)
54 unsigned int number = 0;
55 const int8_t* complexVectorPtr = (int8_t*)complexVector;
56 int16_t* iBufferPtr = iBuffer;
57 __m256i moveMask = _mm256_set_epi8(0x80,
89 __m256i complexVal, outputVal;
92 unsigned int sixteenthPoints = num_points / 16;
94 for (number = 0; number < sixteenthPoints; number++) {
95 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
96 complexVectorPtr += 32;
98 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
99 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
101 outputVal0 = _mm256_extractf128_si256(complexVal, 0);
103 outputVal = _mm256_cvtepi8_epi16(outputVal0);
104 outputVal = _mm256_slli_epi16(outputVal, 7);
106 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
111 number = sixteenthPoints * 16;
112 for (; number < num_points; number++) {
113 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
119 #ifdef LV_HAVE_SSE4_1
120 #include <smmintrin.h>
122 static inline void volk_8ic_deinterleave_real_16i_a_sse4_1(int16_t* iBuffer,
124 unsigned int num_points)
126 unsigned int number = 0;
127 const int8_t* complexVectorPtr = (int8_t*)complexVector;
128 int16_t* iBufferPtr = iBuffer;
130 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
133 unsigned int eighthPoints = num_points / 8;
135 for (number = 0; number < eighthPoints; number++) {
137 complexVectorPtr += 16;
148 number = eighthPoints * 8;
149 for (; number < num_points; number++) {
150 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
158 #include <immintrin.h>
162 unsigned int num_points)
164 unsigned int number = 0;
165 const int8_t* complexVectorPtr = (int8_t*)complexVector;
166 int16_t* iBufferPtr = iBuffer;
168 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
169 __m256i complexVal, outputVal;
170 __m128i complexVal1, complexVal0, outputVal1, outputVal0;
172 unsigned int sixteenthPoints = num_points / 16;
174 for (number = 0; number < sixteenthPoints; number++) {
175 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
176 complexVectorPtr += 32;
178 complexVal1 = _mm256_extractf128_si256(complexVal, 1);
179 complexVal0 = _mm256_extractf128_si256(complexVal, 0);
189 __m256i dummy = _mm256_setzero_si256();
190 outputVal = _mm256_insertf128_si256(dummy, outputVal0, 0);
191 outputVal = _mm256_insertf128_si256(outputVal, outputVal1, 1);
192 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
197 number = sixteenthPoints * 16;
198 for (; number < num_points; number++) {
199 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
206 #ifdef LV_HAVE_GENERIC
210 unsigned int num_points)
212 unsigned int number = 0;
213 const int8_t* complexVectorPtr = (
const int8_t*)complexVector;
214 int16_t* iBufferPtr = iBuffer;
215 for (number = 0; number < num_points; number++) {
216 *iBufferPtr++ = ((int16_t)(*complexVectorPtr++)) * 128;
225 #ifndef INCLUDED_volk_8ic_deinterleave_real_16i_u_H
226 #define INCLUDED_volk_8ic_deinterleave_real_16i_u_H
228 #include <inttypes.h>
233 #include <immintrin.h>
235 static inline void volk_8ic_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
237 unsigned int num_points)
239 unsigned int number = 0;
240 const int8_t* complexVectorPtr = (int8_t*)complexVector;
241 int16_t* iBufferPtr = iBuffer;
242 __m256i moveMask = _mm256_set_epi8(0x80,
274 __m256i complexVal, outputVal;
277 unsigned int sixteenthPoints = num_points / 16;
279 for (number = 0; number < sixteenthPoints; number++) {
280 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
281 complexVectorPtr += 32;
283 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
284 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
286 outputVal0 = _mm256_extractf128_si256(complexVal, 0);
288 outputVal = _mm256_cvtepi8_epi16(outputVal0);
289 outputVal = _mm256_slli_epi16(outputVal, 7);
291 _mm256_storeu_si256((__m256i*)iBufferPtr, outputVal);
296 number = sixteenthPoints * 16;
297 for (; number < num_points; number++) {
298 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a)
Definition: sse2neon.h:5937
FORCE_INLINE __m128i _mm_set_epi8(signed char b15, signed char b14, signed char b13, signed char b12, signed char b11, signed char b10, signed char b9, signed char b8, signed char b7, signed char b6, signed char b5, signed char b4, signed char b3, signed char b2, signed char b1, signed char b0)
Definition: sse2neon.h:5140
FORCE_INLINE __m128i _mm_shuffle_epi8(__m128i a, __m128i b)
Definition: sse2neon.h:7069
FORCE_INLINE __m128i _mm_load_si128(const __m128i *p)
Definition: sse2neon.h:4471
FORCE_INLINE __m128i _mm_slli_epi16(__m128i a, int imm)
Definition: sse2neon.h:5544
FORCE_INLINE __m128i _mm_cvtepi8_epi16(__m128i a)
Definition: sse2neon.h:7565
int64x2_t __m128i
Definition: sse2neon.h:244
static void volk_8ic_deinterleave_real_16i_a_avx(int16_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_real_16i.h:160
static void volk_8ic_deinterleave_real_16i_generic(int16_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_real_16i.h:208
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:70