57 #ifndef INCLUDED_volk_32f_binary_slicer_32i_H
58 #define INCLUDED_volk_32f_binary_slicer_32i_H
61 #ifdef LV_HAVE_GENERIC
65 unsigned int num_points)
68 const float* aPtr = aVector;
69 unsigned int number = 0;
71 for (number = 0; number < num_points; number++) {
82 #ifdef LV_HAVE_GENERIC
86 unsigned int num_points)
89 const float* aPtr = aVector;
90 unsigned int number = 0;
92 for (number = 0; number < num_points; number++) {
93 *cPtr++ = (*aPtr++ >= 0);
100 #include <emmintrin.h>
103 const float* aVector,
104 unsigned int num_points)
107 const float* aPtr = aVector;
108 unsigned int number = 0;
110 unsigned int quarter_points = num_points / 4;
116 for (number = 0; number < quarter_points; number++) {
129 for (number = quarter_points * 4; number < num_points; number++) {
141 #include <immintrin.h>
144 const float* aVector,
145 unsigned int num_points)
148 const float* aPtr = aVector;
149 unsigned int number = 0;
151 unsigned int quarter_points = num_points / 8;
152 __m256 a_val, res_f, binary_f;
154 __m256 zero_val, one_val;
155 zero_val = _mm256_set1_ps(0.0f);
156 one_val = _mm256_set1_ps(1.0f);
158 for (number = 0; number < quarter_points; number++) {
159 a_val = _mm256_load_ps(aPtr);
161 res_f = _mm256_cmp_ps(a_val, zero_val, _CMP_GE_OS);
162 binary_f = _mm256_and_ps(res_f, one_val);
163 binary_i = _mm256_cvtps_epi32(binary_f);
165 _mm256_store_si256((__m256i*)cPtr, binary_i);
171 for (number = quarter_points * 8; number < num_points; number++) {
183 #include <emmintrin.h>
186 const float* aVector,
187 unsigned int num_points)
190 const float* aPtr = aVector;
191 unsigned int number = 0;
193 unsigned int quarter_points = num_points / 4;
199 for (number = 0; number < quarter_points; number++) {
212 for (number = quarter_points * 4; number < num_points; number++) {
224 #include <immintrin.h>
227 const float* aVector,
228 unsigned int num_points)
231 const float* aPtr = aVector;
232 unsigned int number = 0;
234 unsigned int quarter_points = num_points / 8;
235 __m256 a_val, res_f, binary_f;
237 __m256 zero_val, one_val;
238 zero_val = _mm256_set1_ps(0.0f);
239 one_val = _mm256_set1_ps(1.0f);
241 for (number = 0; number < quarter_points; number++) {
242 a_val = _mm256_loadu_ps(aPtr);
244 res_f = _mm256_cmp_ps(a_val, zero_val, _CMP_GE_OS);
245 binary_f = _mm256_and_ps(res_f, one_val);
246 binary_i = _mm256_cvtps_epi32(binary_f);
248 _mm256_storeu_si256((__m256i*)cPtr, binary_i);
254 for (number = quarter_points * 8; number < num_points; number++) {
FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a)
Definition: sse2neon.h:5937
float32x4_t __m128
Definition: sse2neon.h:235
#define _mm_srli_epi32(a, imm)
Definition: sse2neon.h:5838
FORCE_INLINE __m128i _mm_cvtps_epi32(__m128)
Definition: sse2neon.h:4036
FORCE_INLINE __m128 _mm_cmpge_ps(__m128 a, __m128 b)
Definition: sse2neon.h:1133
FORCE_INLINE __m128 _mm_set1_ps(float _w)
Definition: sse2neon.h:2503
FORCE_INLINE __m128 _mm_loadu_ps(const float *p)
Definition: sse2neon.h:1941
FORCE_INLINE void _mm_storeu_si128(__m128i *p, __m128i a)
Definition: sse2neon.h:6010
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1858
int64x2_t __m128i
Definition: sse2neon.h:244
static void volk_32f_binary_slicer_32i_generic(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:63
static void volk_32f_binary_slicer_32i_generic_branchless(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:84
static void volk_32f_binary_slicer_32i_u_sse2(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:185
static void volk_32f_binary_slicer_32i_a_avx(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:143
static void volk_32f_binary_slicer_32i_a_sse2(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:102
static void volk_32f_binary_slicer_32i_u_avx(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:226