43 #ifndef INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a_H 
   44 #define INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a_H 
   51 #include <immintrin.h> 
   54 volk_16ic_s32f_deinterleave_32f_x2_a_avx2(
float* iBuffer,
 
   58                                           unsigned int num_points)
 
   60     float* iBufferPtr = iBuffer;
 
   61     float* qBufferPtr = qBuffer;
 
   64     const uint64_t eighthPoints = num_points / 8;
 
   65     __m256 cplxValue1, cplxValue2, iValue, qValue;
 
   66     __m256i cplxValueA, cplxValueB;
 
   69     __m256 invScalar = _mm256_set1_ps(1.0 / scalar);
 
   70     int16_t* complexVectorPtr = (int16_t*)complexVector;
 
   71     __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
 
   73     for (; number < eighthPoints; number++) {
 
   75         cplxValueA = _mm256_load_si256((__m256i*)complexVectorPtr);
 
   76         complexVectorPtr += 16;
 
   79         cplxValue128 = _mm256_extracti128_si256(cplxValueA, 0);
 
   80         cplxValueB = _mm256_cvtepi16_epi32(cplxValue128);
 
   81         cplxValue1 = _mm256_cvtepi32_ps(cplxValueB);
 
   82         cplxValue128 = _mm256_extracti128_si256(cplxValueA, 1);
 
   83         cplxValueB = _mm256_cvtepi16_epi32(cplxValue128);
 
   84         cplxValue2 = _mm256_cvtepi32_ps(cplxValueB);
 
   86         cplxValue1 = _mm256_mul_ps(cplxValue1, invScalar);
 
   87         cplxValue2 = _mm256_mul_ps(cplxValue2, invScalar);
 
   90         iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, 
_MM_SHUFFLE(2, 0, 2, 0));
 
   91         iValue = _mm256_permutevar8x32_ps(iValue, idx);
 
   93         qValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, 
_MM_SHUFFLE(3, 1, 3, 1));
 
   94         qValue = _mm256_permutevar8x32_ps(qValue, idx);
 
   96         _mm256_store_ps(iBufferPtr, iValue);
 
   97         _mm256_store_ps(qBufferPtr, qValue);
 
  103     number = eighthPoints * 8;
 
  104     complexVectorPtr = (int16_t*)&complexVector[number];
 
  105     for (; number < num_points; number++) {
 
  106         *iBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
 
  107         *qBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
 
  113 #include <xmmintrin.h> 
  120                                          unsigned int num_points)
 
  122     float* iBufferPtr = iBuffer;
 
  123     float* qBufferPtr = qBuffer;
 
  126     const uint64_t quarterPoints = num_points / 4;
 
  127     __m128 cplxValue1, cplxValue2, iValue, qValue;
 
  130     int16_t* complexVectorPtr = (int16_t*)complexVector;
 
  134     for (; number < quarterPoints; number++) {
 
  136         floatBuffer[0] = (float)(complexVectorPtr[0]);
 
  137         floatBuffer[1] = (float)(complexVectorPtr[1]);
 
  138         floatBuffer[2] = (float)(complexVectorPtr[2]);
 
  139         floatBuffer[3] = (float)(complexVectorPtr[3]);
 
  141         floatBuffer[4] = (float)(complexVectorPtr[4]);
 
  142         floatBuffer[5] = (float)(complexVectorPtr[5]);
 
  143         floatBuffer[6] = (float)(complexVectorPtr[6]);
 
  144         floatBuffer[7] = (float)(complexVectorPtr[7]);
 
  149         complexVectorPtr += 8;
 
  151         cplxValue1 = 
_mm_mul_ps(cplxValue1, invScalar);
 
  152         cplxValue2 = 
_mm_mul_ps(cplxValue2, invScalar);
 
  166     number = quarterPoints * 4;
 
  167     complexVectorPtr = (int16_t*)&complexVector[number];
 
  168     for (; number < num_points; number++) {
 
  169         *iBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
 
  170         *qBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
 
  175 #ifdef LV_HAVE_GENERIC 
  182                                            unsigned int num_points)
 
  184     const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
 
  185     float* iBufferPtr = iBuffer;
 
  186     float* qBufferPtr = qBuffer;
 
  188     for (number = 0; number < num_points; number++) {
 
  189         *iBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
 
  190         *qBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
 
  196 #include <arm_neon.h> 
  201                                                            unsigned int num_points)
 
  203     const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
 
  204     float* iBufferPtr = iBuffer;
 
  205     float* qBufferPtr = qBuffer;
 
  206     unsigned int eighth_points = num_points / 4;
 
  208     float iScalar = 1.f / scalar;
 
  209     float32x4_t invScalar;
 
  210     invScalar = vld1q_dup_f32(&iScalar);
 
  212     int16x4x2_t complexInput_s16;
 
  213     int32x4x2_t complexInput_s32;
 
  214     float32x4x2_t complexFloat;
 
  216     for (number = 0; number < eighth_points; number++) {
 
  217         complexInput_s16 = vld2_s16(complexVectorPtr);
 
  218         complexInput_s32.val[0] = vmovl_s16(complexInput_s16.val[0]);
 
  219         complexInput_s32.val[1] = vmovl_s16(complexInput_s16.val[1]);
 
  220         complexFloat.val[0] = vcvtq_f32_s32(complexInput_s32.val[0]);
 
  221         complexFloat.val[1] = vcvtq_f32_s32(complexInput_s32.val[1]);
 
  222         complexFloat.val[0] = vmulq_f32(complexFloat.val[0], invScalar);
 
  223         complexFloat.val[1] = vmulq_f32(complexFloat.val[1], invScalar);
 
  224         vst1q_f32(iBufferPtr, complexFloat.val[0]);
 
  225         vst1q_f32(qBufferPtr, complexFloat.val[1]);
 
  226         complexVectorPtr += 8;
 
  231     for (number = eighth_points * 4; number < num_points; number++) {
 
  232         *iBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
 
  233         *qBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
 
  239 extern void volk_16ic_s32f_deinterleave_32f_x2_a_orc_impl(
float* iBuffer,
 
  243                                                           unsigned int num_points);
 
  246 volk_16ic_s32f_deinterleave_32f_x2_u_orc(
float* iBuffer,
 
  250                                          unsigned int num_points)
 
  252     volk_16ic_s32f_deinterleave_32f_x2_a_orc_impl(
 
  253         iBuffer, qBuffer, complexVector, scalar, num_points);
 
  261 #ifndef INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_u_H 
  262 #define INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_u_H 
  264 #include <inttypes.h> 
  269 #include <immintrin.h> 
  272 volk_16ic_s32f_deinterleave_32f_x2_u_avx2(
float* iBuffer,
 
  276                                           unsigned int num_points)
 
  278     float* iBufferPtr = iBuffer;
 
  279     float* qBufferPtr = qBuffer;
 
  282     const uint64_t eighthPoints = num_points / 8;
 
  283     __m256 cplxValue1, cplxValue2, iValue, qValue;
 
  284     __m256i cplxValueA, cplxValueB;
 
  287     __m256 invScalar = _mm256_set1_ps(1.0 / scalar);
 
  288     int16_t* complexVectorPtr = (int16_t*)complexVector;
 
  289     __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
 
  291     for (; number < eighthPoints; number++) {
 
  293         cplxValueA = _mm256_loadu_si256((__m256i*)complexVectorPtr);
 
  294         complexVectorPtr += 16;
 
  297         cplxValue128 = _mm256_extracti128_si256(cplxValueA, 0);
 
  298         cplxValueB = _mm256_cvtepi16_epi32(cplxValue128);
 
  299         cplxValue1 = _mm256_cvtepi32_ps(cplxValueB);
 
  300         cplxValue128 = _mm256_extracti128_si256(cplxValueA, 1);
 
  301         cplxValueB = _mm256_cvtepi16_epi32(cplxValue128);
 
  302         cplxValue2 = _mm256_cvtepi32_ps(cplxValueB);
 
  304         cplxValue1 = _mm256_mul_ps(cplxValue1, invScalar);
 
  305         cplxValue2 = _mm256_mul_ps(cplxValue2, invScalar);
 
  308         iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, 
_MM_SHUFFLE(2, 0, 2, 0));
 
  309         iValue = _mm256_permutevar8x32_ps(iValue, idx);
 
  311         qValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, 
_MM_SHUFFLE(3, 1, 3, 1));
 
  312         qValue = _mm256_permutevar8x32_ps(qValue, idx);
 
  314         _mm256_storeu_ps(iBufferPtr, iValue);
 
  315         _mm256_storeu_ps(qBufferPtr, qValue);
 
  321     number = eighthPoints * 8;
 
  322     complexVectorPtr = (int16_t*)&complexVector[number];
 
  323     for (; number < num_points; number++) {
 
  324         *iBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
 
  325         *qBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
 
float32x4_t __m128
Definition: sse2neon.h:235
#define _mm_shuffle_ps(a, b, imm)
Definition: sse2neon.h:2586
FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2205
FORCE_INLINE __m128 _mm_set_ps1(float)
Definition: sse2neon.h:2437
#define _MM_SHUFFLE(fp3, fp2, fp1, fp0)
Definition: sse2neon.h:195
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858
int64x2_t __m128i
Definition: sse2neon.h:244
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition: sse2neon.h:2704
static void volk_16ic_s32f_deinterleave_32f_x2_neon(float *iBuffer, float *qBuffer, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_deinterleave_32f_x2.h:197
static void volk_16ic_s32f_deinterleave_32f_x2_generic(float *iBuffer, float *qBuffer, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_deinterleave_32f_x2.h:178
static void volk_16ic_s32f_deinterleave_32f_x2_a_sse(float *iBuffer, float *qBuffer, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_deinterleave_32f_x2.h:116
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:65
short complex lv_16sc_t
Definition: volk_complex.h:71