doxygen/volk__32fc__deinterleave__64f__x2_8h_source.html

 /* -*- c++ -*- */

 /*

  * Copyright 2012, 2014 Free Software Foundation, Inc.

  *

  * This file is part of VOLK

  *

  * SPDX-License-Identifier: LGPL-3.0-or-later

  */


 #ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_u_H

 #define INCLUDED_volk_32fc_deinterleave_64f_x2_u_H


 #include <inttypes.h>

 #include <stdio.h>


 #ifdef LV_HAVE_AVX

 #include <immintrin.h>


 static inline void volk_32fc_deinterleave_64f_x2_u_avx(double* iBuffer,

                                                        double* qBuffer,

                                                        const lv_32fc_t* complexVector,

                                                        unsigned int num_points)

 {

     unsigned int number = 0;


     const float* complexVectorPtr = (float*)complexVector;

     double* iBufferPtr = iBuffer;

     double* qBufferPtr = qBuffer;


     const unsigned int quarterPoints = num_points / 4;

     __m256 cplxValue;

     __m128 complexH, complexL, fVal;

     __m256d dVal;


     for (; number < quarterPoints; number++) {


         cplxValue = _mm256_loadu_ps(complexVectorPtr);

         complexVectorPtr += 8;


         complexH = _mm256_extractf128_ps(cplxValue, 1);

         complexL = _mm256_extractf128_ps(cplxValue, 0);


         // Arrange in i1i2i1i2 format

         fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(2, 0, 2, 0));

         dVal = _mm256_cvtps_pd(fVal);

         _mm256_storeu_pd(iBufferPtr, dVal);


         // Arrange in q1q2q1q2 format

         fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(3, 1, 3, 1));

         dVal = _mm256_cvtps_pd(fVal);

         _mm256_storeu_pd(qBufferPtr, dVal);


         iBufferPtr += 4;

         qBufferPtr += 4;

     }


     number = quarterPoints * 4;

     for (; number < num_points; number++) {

         *iBufferPtr++ = *complexVectorPtr++;

         *qBufferPtr++ = *complexVectorPtr++;

     }

 }

 #endif /* LV_HAVE_AVX */


 #ifdef LV_HAVE_SSE2

 #include <emmintrin.h>


 static inline void volk_32fc_deinterleave_64f_x2_u_sse2(double* iBuffer,

                                                         double* qBuffer,

                                                         const lv_32fc_t* complexVector,

                                                         unsigned int num_points)

 {

     unsigned int number = 0;


     const float* complexVectorPtr = (float*)complexVector;

     double* iBufferPtr = iBuffer;

     double* qBufferPtr = qBuffer;


     const unsigned int halfPoints = num_points / 2;

     __m128 cplxValue, fVal;

     __m128d dVal;


     for (; number < halfPoints; number++) {


         cplxValue = _mm_loadu_ps(complexVectorPtr);

         complexVectorPtr += 4;


         // Arrange in i1i2i1i2 format

         fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));

         dVal = _mm_cvtps_pd(fVal);

         _mm_storeu_pd(iBufferPtr, dVal);


         // Arrange in q1q2q1q2 format

         fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3, 1, 3, 1));

         dVal = _mm_cvtps_pd(fVal);

         _mm_storeu_pd(qBufferPtr, dVal);


         iBufferPtr += 2;

         qBufferPtr += 2;

     }


     number = halfPoints * 2;

     for (; number < num_points; number++) {

         *iBufferPtr++ = *complexVectorPtr++;

         *qBufferPtr++ = *complexVectorPtr++;

     }

 }

 #endif /* LV_HAVE_SSE */


 #ifdef LV_HAVE_GENERIC


 static inline void volk_32fc_deinterleave_64f_x2_generic(double* iBuffer,

                                                          double* qBuffer,

                                                          const lv_32fc_t* complexVector,

                                                          unsigned int num_points)

 {

     unsigned int number = 0;

     const float* complexVectorPtr = (float*)complexVector;

     double* iBufferPtr = iBuffer;

     double* qBufferPtr = qBuffer;


     for (number = 0; number < num_points; number++) {

         *iBufferPtr++ = (double)*complexVectorPtr++;

         *qBufferPtr++ = (double)*complexVectorPtr++;

     }

 }

 #endif /* LV_HAVE_GENERIC */


 #endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_u_H */

 #ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_a_H

 #define INCLUDED_volk_32fc_deinterleave_64f_x2_a_H


 #include <inttypes.h>

 #include <stdio.h>


 #ifdef LV_HAVE_AVX

 #include <immintrin.h>


 static inline void volk_32fc_deinterleave_64f_x2_a_avx(double* iBuffer,

                                                        double* qBuffer,

                                                        const lv_32fc_t* complexVector,

                                                        unsigned int num_points)

 {

     unsigned int number = 0;


     const float* complexVectorPtr = (float*)complexVector;

     double* iBufferPtr = iBuffer;

     double* qBufferPtr = qBuffer;


     const unsigned int quarterPoints = num_points / 4;

     __m256 cplxValue;

     __m128 complexH, complexL, fVal;

     __m256d dVal;


     for (; number < quarterPoints; number++) {


         cplxValue = _mm256_load_ps(complexVectorPtr);

         complexVectorPtr += 8;


         complexH = _mm256_extractf128_ps(cplxValue, 1);

         complexL = _mm256_extractf128_ps(cplxValue, 0);


         // Arrange in i1i2i1i2 format

         fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(2, 0, 2, 0));

         dVal = _mm256_cvtps_pd(fVal);

         _mm256_store_pd(iBufferPtr, dVal);


         // Arrange in q1q2q1q2 format

         fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(3, 1, 3, 1));

         dVal = _mm256_cvtps_pd(fVal);

         _mm256_store_pd(qBufferPtr, dVal);


         iBufferPtr += 4;

         qBufferPtr += 4;

     }


     number = quarterPoints * 4;

     for (; number < num_points; number++) {

         *iBufferPtr++ = *complexVectorPtr++;

         *qBufferPtr++ = *complexVectorPtr++;

     }

 }

 #endif /* LV_HAVE_AVX */


 #ifdef LV_HAVE_SSE2

 #include <emmintrin.h>


 static inline void volk_32fc_deinterleave_64f_x2_a_sse2(double* iBuffer,

                                                         double* qBuffer,

                                                         const lv_32fc_t* complexVector,

                                                         unsigned int num_points)

 {

     unsigned int number = 0;


     const float* complexVectorPtr = (float*)complexVector;

     double* iBufferPtr = iBuffer;

     double* qBufferPtr = qBuffer;


     const unsigned int halfPoints = num_points / 2;

     __m128 cplxValue, fVal;

     __m128d dVal;


     for (; number < halfPoints; number++) {


         cplxValue = _mm_load_ps(complexVectorPtr);

         complexVectorPtr += 4;


         // Arrange in i1i2i1i2 format

         fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));

         dVal = _mm_cvtps_pd(fVal);

         _mm_store_pd(iBufferPtr, dVal);


         // Arrange in q1q2q1q2 format

         fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3, 1, 3, 1));

         dVal = _mm_cvtps_pd(fVal);

         _mm_store_pd(qBufferPtr, dVal);


         iBufferPtr += 2;

         qBufferPtr += 2;

     }


     number = halfPoints * 2;

     for (; number < num_points; number++) {

         *iBufferPtr++ = *complexVectorPtr++;

         *qBufferPtr++ = *complexVectorPtr++;

     }

 }

 #endif /* LV_HAVE_SSE */


 #ifdef LV_HAVE_GENERIC


 static inline void volk_32fc_deinterleave_64f_x2_a_generic(double* iBuffer,

                                                            double* qBuffer,

                                                            const lv_32fc_t* complexVector,

                                                            unsigned int num_points)

 {

     unsigned int number = 0;

     const float* complexVectorPtr = (float*)complexVector;

     double* iBufferPtr = iBuffer;

     double* qBufferPtr = qBuffer;


     for (number = 0; number < num_points; number++) {

         *iBufferPtr++ = (double)*complexVectorPtr++;

         *qBufferPtr++ = (double)*complexVectorPtr++;

     }

 }

 #endif /* LV_HAVE_GENERIC */


 #ifdef LV_HAVE_NEONV8

 #include <arm_neon.h>


 static inline void volk_32fc_deinterleave_64f_x2_neon(double* iBuffer,

                                                       double* qBuffer,

                                                       const lv_32fc_t* complexVector,

                                                       unsigned int num_points)

 {

     unsigned int number = 0;

     unsigned int half_points = num_points / 2;

     const float* complexVectorPtr = (float*)complexVector;

     double* iBufferPtr = iBuffer;

     double* qBufferPtr = qBuffer;

     float32x2x2_t complexInput;

     float64x2_t iVal, qVal;


     for (number = 0; number < half_points; number++) {

         complexInput = vld2_f32(complexVectorPtr);


         iVal = vcvt_f64_f32(complexInput.val[0]);

         qVal = vcvt_f64_f32(complexInput.val[1]);


         vst1q_f64(iBufferPtr, iVal);

         vst1q_f64(qBufferPtr, qVal);


         complexVectorPtr += 4;

         iBufferPtr += 2;

         qBufferPtr += 2;

     }


     for (number = half_points * 2; number < num_points; number++) {

         *iBufferPtr++ = (double)*complexVectorPtr++;

         *qBufferPtr++ = (double)*complexVectorPtr++;

     }

 }

 #endif /* LV_HAVE_NEONV8 */


 #endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_a_H */

__m128
float32x4_t __m128
Definition: sse2neon.h:235

_mm_shuffle_ps
#define _mm_shuffle_ps(a, b, imm)
Definition: sse2neon.h:2586

_mm_cvtps_pd
FORCE_INLINE __m128d _mm_cvtps_pd(__m128 a)
Definition: sse2neon.h:4096

__m128d
float32x4_t __m128d
Definition: sse2neon.h:242

_mm_storeu_pd
FORCE_INLINE void _mm_storeu_pd(double *mem_addr, __m128d a)
Definition: sse2neon.h:6003

_mm_loadu_ps
FORCE_INLINE __m128 _mm_loadu_ps(const float *p)
Definition: sse2neon.h:1941

_mm_store_pd
FORCE_INLINE void _mm_store_pd(double *mem_addr, __m128d a)
Definition: sse2neon.h:5897

_MM_SHUFFLE
#define _MM_SHUFFLE(fp3, fp2, fp1, fp0)
Definition: sse2neon.h:195

_mm_load_ps
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858

volk_32fc_deinterleave_64f_x2_a_avx
static void volk_32fc_deinterleave_64f_x2_a_avx(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:189

volk_32fc_deinterleave_64f_x2_u_sse2
static void volk_32fc_deinterleave_64f_x2_u_sse2(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:118

volk_32fc_deinterleave_64f_x2_generic
static void volk_32fc_deinterleave_64f_x2_generic(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:162

volk_32fc_deinterleave_64f_x2_a_sse2
static void volk_32fc_deinterleave_64f_x2_a_sse2(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:238

volk_32fc_deinterleave_64f_x2_a_generic
static void volk_32fc_deinterleave_64f_x2_a_generic(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:282

volk_32fc_deinterleave_64f_x2_u_avx
static void volk_32fc_deinterleave_64f_x2_u_avx(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:69

lv_32fc_t
float complex lv_32fc_t
Definition: volk_complex.h:74