40 #ifndef INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_ALIGNED8_H
41 #define INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_ALIGNED8_H
47 #include <immintrin.h>
49 static inline void volk_8ic_deinterleave_real_8i_a_avx2(int8_t* iBuffer,
51 unsigned int num_points)
53 unsigned int number = 0;
54 const int8_t* complexVectorPtr = (int8_t*)complexVector;
55 int8_t* iBufferPtr = iBuffer;
56 __m256i moveMask1 = _mm256_set_epi8(0x80,
88 __m256i moveMask2 = _mm256_set_epi8(14,
120 __m256i complexVal1, complexVal2, outputVal;
122 unsigned int thirtysecondPoints = num_points / 32;
124 for (number = 0; number < thirtysecondPoints; number++) {
126 complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr);
127 complexVectorPtr += 32;
128 complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr);
129 complexVectorPtr += 32;
131 complexVal1 = _mm256_shuffle_epi8(complexVal1, moveMask1);
132 complexVal2 = _mm256_shuffle_epi8(complexVal2, moveMask2);
133 outputVal = _mm256_or_si256(complexVal1, complexVal2);
134 outputVal = _mm256_permute4x64_epi64(outputVal, 0xd8);
136 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
140 number = thirtysecondPoints * 32;
141 for (; number < num_points; number++) {
142 *iBufferPtr++ = *complexVectorPtr++;
150 #include <tmmintrin.h>
154 unsigned int num_points)
156 unsigned int number = 0;
157 const int8_t* complexVectorPtr = (int8_t*)complexVector;
158 int8_t* iBufferPtr = iBuffer;
160 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
162 14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
163 __m128i complexVal1, complexVal2, outputVal;
165 unsigned int sixteenthPoints = num_points / 16;
167 for (number = 0; number < sixteenthPoints; number++) {
169 complexVectorPtr += 16;
171 complexVectorPtr += 16;
182 number = sixteenthPoints * 16;
183 for (; number < num_points; number++) {
184 *iBufferPtr++ = *complexVectorPtr++;
192 #include <immintrin.h>
196 unsigned int num_points)
198 unsigned int number = 0;
199 const int8_t* complexVectorPtr = (int8_t*)complexVector;
200 int8_t* iBufferPtr = iBuffer;
202 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
204 14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
205 __m256i complexVal1, complexVal2, outputVal;
206 __m128i complexVal1H, complexVal1L, complexVal2H, complexVal2L, outputVal1,
209 unsigned int thirtysecondPoints = num_points / 32;
211 for (number = 0; number < thirtysecondPoints; number++) {
213 complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr);
214 complexVectorPtr += 32;
215 complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr);
216 complexVectorPtr += 32;
218 complexVal1H = _mm256_extractf128_si256(complexVal1, 1);
219 complexVal1L = _mm256_extractf128_si256(complexVal1, 0);
220 complexVal2H = _mm256_extractf128_si256(complexVal2, 1);
221 complexVal2L = _mm256_extractf128_si256(complexVal2, 0);
232 __m256i dummy = _mm256_setzero_si256();
233 outputVal = _mm256_insertf128_si256(dummy, outputVal1, 0);
234 outputVal = _mm256_insertf128_si256(outputVal, outputVal2, 1);
237 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
241 number = thirtysecondPoints * 32;
242 for (; number < num_points; number++) {
243 *iBufferPtr++ = *complexVectorPtr++;
250 #ifdef LV_HAVE_GENERIC
254 unsigned int num_points)
256 unsigned int number = 0;
257 const int8_t* complexVectorPtr = (int8_t*)complexVector;
258 int8_t* iBufferPtr = iBuffer;
259 for (number = 0; number < num_points; number++) {
260 *iBufferPtr++ = *complexVectorPtr++;
268 #include <arm_neon.h>
272 unsigned int num_points)
275 unsigned int sixteenth_points = num_points / 16;
277 int8x16x2_t input_vector;
278 for (number = 0; number < sixteenth_points; ++number) {
279 input_vector = vld2q_s8((int8_t*)complexVector);
280 vst1q_s8(iBuffer, input_vector.val[0]);
285 const int8_t* complexVectorPtr = (int8_t*)complexVector;
286 int8_t* iBufferPtr = iBuffer;
287 for (number = sixteenth_points * 16; number < num_points; number++) {
288 *iBufferPtr++ = *complexVectorPtr++;
297 #ifndef INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_UNALIGNED8_H
298 #define INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_UNALIGNED8_H
300 #include <inttypes.h>
304 #include <immintrin.h>
306 static inline void volk_8ic_deinterleave_real_8i_u_avx2(int8_t* iBuffer,
308 unsigned int num_points)
310 unsigned int number = 0;
311 const int8_t* complexVectorPtr = (int8_t*)complexVector;
312 int8_t* iBufferPtr = iBuffer;
313 __m256i moveMask1 = _mm256_set_epi8(0x80,
345 __m256i moveMask2 = _mm256_set_epi8(14,
377 __m256i complexVal1, complexVal2, outputVal;
379 unsigned int thirtysecondPoints = num_points / 32;
381 for (number = 0; number < thirtysecondPoints; number++) {
383 complexVal1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
384 complexVectorPtr += 32;
385 complexVal2 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
386 complexVectorPtr += 32;
388 complexVal1 = _mm256_shuffle_epi8(complexVal1, moveMask1);
389 complexVal2 = _mm256_shuffle_epi8(complexVal2, moveMask2);
390 outputVal = _mm256_or_si256(complexVal1, complexVal2);
391 outputVal = _mm256_permute4x64_epi64(outputVal, 0xd8);
393 _mm256_storeu_si256((__m256i*)iBufferPtr, outputVal);
397 number = thirtysecondPoints * 32;
398 for (; number < num_points; number++) {
399 *iBufferPtr++ = *complexVectorPtr++;
FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a)
Definition: sse2neon.h:5937
FORCE_INLINE __m128i _mm_set_epi8(signed char b15, signed char b14, signed char b13, signed char b12, signed char b11, signed char b10, signed char b9, signed char b8, signed char b7, signed char b6, signed char b5, signed char b4, signed char b3, signed char b2, signed char b1, signed char b0)
Definition: sse2neon.h:5140
FORCE_INLINE __m128i _mm_shuffle_epi8(__m128i a, __m128i b)
Definition: sse2neon.h:7069
FORCE_INLINE __m128i _mm_load_si128(const __m128i *p)
Definition: sse2neon.h:4471
FORCE_INLINE __m128i _mm_or_si128(__m128i, __m128i)
Definition: sse2neon.h:5021
int64x2_t __m128i
Definition: sse2neon.h:244
static void volk_8ic_deinterleave_real_8i_a_ssse3(int8_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_real_8i.h:152
static void volk_8ic_deinterleave_real_8i_a_avx(int8_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_real_8i.h:194
static void volk_8ic_deinterleave_real_8i_neon(int8_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_real_8i.h:270
static void volk_8ic_deinterleave_real_8i_generic(int8_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_real_8i.h:252
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:70