15 #ifndef INCLUDE_VOLK_VOLK_AVX2_INTRINSICS_H_ 
   16 #define INCLUDE_VOLK_VOLK_AVX2_INTRINSICS_H_ 
   18 #include <immintrin.h> 
   24     const __m256i shuffle_mask = _mm256_setr_epi8(0xff,
 
   56     __m256i sign_bits = _mm256_setzero_si256();
 
   60     sign_bits = _mm256_insertf128_si256(sign_bits, fbits, 0);
 
   61     sign_bits = _mm256_insertf128_si256(sign_bits, fbits, 1);
 
   62     sign_bits = _mm256_shuffle_epi8(sign_bits, shuffle_mask);
 
   64     return _mm256_castsi256_ps(sign_bits);
 
   77     llr0 = _mm256_xor_ps(llr0, sign_mask);
 
   78     __m256 dst = _mm256_add_ps(llr0, llr1);
 
   83                                                      const __m256 cplxValue1)
 
   85     const __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
 
   86     const __m256 squared0 = _mm256_mul_ps(cplxValue0, cplxValue0); 
 
   87     const __m256 squared1 = _mm256_mul_ps(cplxValue1, cplxValue1); 
 
   88     const __m256 complex_result = _mm256_hadd_ps(squared0, squared1);
 
   89     return _mm256_permutevar8x32_ps(complex_result, idx);
 
   93                                                      const __m256 symbols1,
 
  103     const __m256 diff0 = _mm256_sub_ps(symbols0, points0);
 
  104     const __m256 diff1 = _mm256_sub_ps(symbols1, points1);
 
  106     return _mm256_mul_ps(norms, scalar);
 
  129                                                   __m256i* max_indices,
 
  130                                                   __m256i* current_indices,
 
  131                                                   __m256i indices_increment)
 
  133     in0 = _mm256_mul_ps(in0, in0);
 
  134     in1 = _mm256_mul_ps(in1, in1);
 
  154     __m256 abs_squared = _mm256_hadd_ps(in0, in1);
 
  165     __m256 compare_mask = _mm256_cmp_ps(abs_squared, *max_values, _CMP_GT_OS);
 
  168     *max_values = _mm256_blendv_ps(*max_values, abs_squared, compare_mask);
 
  179         _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(*max_indices),
 
  180                                              _mm256_castsi256_ps(*current_indices),
 
  184     *current_indices = _mm256_add_epi32(*current_indices, indices_increment);
 
  191                                                   __m256i* max_indices,
 
  192                                                   __m256i* current_indices,
 
  193                                                   __m256i indices_increment)
 
  195     in0 = _mm256_mul_ps(in0, in0);
 
  196     in1 = _mm256_mul_ps(in1, in1);
 
  198     __m256 abs_squared = _mm256_hadd_ps(in0, in1);
 
  199     __m256 compare_mask = _mm256_cmp_ps(abs_squared, *max_values, _CMP_GT_OS);
 
  211     *max_values = _mm256_max_ps(abs_squared, *max_values);
 
  214         _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(*max_indices),
 
  215                                              _mm256_castsi256_ps(*current_indices),
 
  218     *current_indices = _mm256_add_epi32(*current_indices, indices_increment);
 
  241                                                   __m256i* min_indices,
 
  242                                                   __m256i* current_indices,
 
  243                                                   __m256i indices_increment)
 
  245     in0 = _mm256_mul_ps(in0, in0);
 
  246     in1 = _mm256_mul_ps(in1, in1);
 
  266     __m256 abs_squared = _mm256_hadd_ps(in0, in1);
 
  277     __m256 compare_mask = _mm256_cmp_ps(abs_squared, *min_values, _CMP_LT_OS);
 
  280     *min_values = _mm256_blendv_ps(*min_values, abs_squared, compare_mask);
 
  291         _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(*min_indices),
 
  292                                              _mm256_castsi256_ps(*current_indices),
 
  296     *current_indices = _mm256_add_epi32(*current_indices, indices_increment);
 
  303                                                   __m256i* min_indices,
 
  304                                                   __m256i* current_indices,
 
  305                                                   __m256i indices_increment)
 
  307     in0 = _mm256_mul_ps(in0, in0);
 
  308     in1 = _mm256_mul_ps(in1, in1);
 
  310     __m256 abs_squared = _mm256_hadd_ps(in0, in1);
 
  311     __m256 compare_mask = _mm256_cmp_ps(abs_squared, *min_values, _CMP_LT_OS);
 
  323     *min_values = _mm256_min_ps(abs_squared, *min_values);
 
  326         _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(*min_indices),
 
  327                                              _mm256_castsi256_ps(*current_indices),
 
  330     *current_indices = _mm256_add_epi32(*current_indices, indices_increment);
 
FORCE_INLINE __m128i _mm_cmpgt_epi8(__m128i a, __m128i b)
Definition: sse2neon.h:3391
 
FORCE_INLINE __m128i _mm_and_si128(__m128i, __m128i)
Definition: sse2neon.h:3128
 
FORCE_INLINE __m128i _mm_set1_epi8(signed char w)
Definition: sse2neon.h:5239
 
int64x2_t __m128i
Definition: sse2neon.h:244
 
static __m256 _mm256_scaled_norm_dist_ps_avx2(const __m256 symbols0, const __m256 symbols1, const __m256 points0, const __m256 points1, const __m256 scalar)
Definition: volk_avx2_intrinsics.h:92
 
static __m256 _mm256_polar_sign_mask_avx2(__m128i fbits)
Definition: volk_avx2_intrinsics.h:20
 
static void vector_32fc_index_max_variant1(__m256 in0, __m256 in1, __m256 *max_values, __m256i *max_indices, __m256i *current_indices, __m256i indices_increment)
Definition: volk_avx2_intrinsics.h:188
 
static __m256 _mm256_magnitudesquared_ps_avx2(const __m256 cplxValue0, const __m256 cplxValue1)
Definition: volk_avx2_intrinsics.h:82
 
static void vector_32fc_index_min_variant0(__m256 in0, __m256 in1, __m256 *min_values, __m256i *min_indices, __m256i *current_indices, __m256i indices_increment)
Definition: volk_avx2_intrinsics.h:238
 
static __m256 _mm256_polar_fsign_add_llrs_avx2(__m256 src0, __m256 src1, __m128i fbits)
Definition: volk_avx2_intrinsics.h:68
 
static void vector_32fc_index_max_variant0(__m256 in0, __m256 in1, __m256 *max_values, __m256i *max_indices, __m256i *current_indices, __m256i indices_increment)
Definition: volk_avx2_intrinsics.h:126
 
static void vector_32fc_index_min_variant1(__m256 in0, __m256 in1, __m256 *min_values, __m256i *min_indices, __m256i *current_indices, __m256i indices_increment)
Definition: volk_avx2_intrinsics.h:300
 
static void _mm256_polar_deinterleave(__m256 *llr0, __m256 *llr1, __m256 src0, __m256 src1)
Definition: volk_avx_intrinsics.h:145