40 #ifndef INCLUDED_volk_8i_convert_16i_u_H
41 #define INCLUDED_volk_8i_convert_16i_u_H
47 #include <immintrin.h>
49 static inline void volk_8i_convert_16i_u_avx2(int16_t* outputVector,
50 const int8_t* inputVector,
51 unsigned int num_points)
53 unsigned int number = 0;
54 const unsigned int sixteenthPoints = num_points / 16;
57 __m256i* outputVectorPtr = (__m256i*)outputVector;
61 for (; number < sixteenthPoints; number++) {
63 ret = _mm256_cvtepi8_epi16(inputVal);
64 ret = _mm256_slli_epi16(ret, 8);
65 _mm256_storeu_si256(outputVectorPtr, ret);
71 number = sixteenthPoints * 16;
72 for (; number < num_points; number++) {
73 outputVector[number] = (int16_t)(inputVector[number]) * 256;
80 #include <smmintrin.h>
82 static inline void volk_8i_convert_16i_u_sse4_1(int16_t* outputVector,
83 const int8_t* inputVector,
84 unsigned int num_points)
86 unsigned int number = 0;
87 const unsigned int sixteenthPoints = num_points / 16;
94 for (; number < sixteenthPoints; number++) {
112 number = sixteenthPoints * 16;
113 for (; number < num_points; number++) {
114 outputVector[number] = (int16_t)(inputVector[number]) * 256;
120 #ifdef LV_HAVE_GENERIC
123 const int8_t* inputVector,
124 unsigned int num_points)
126 int16_t* outputVectorPtr = outputVector;
127 const int8_t* inputVectorPtr = inputVector;
128 unsigned int number = 0;
130 for (number = 0; number < num_points; number++) {
131 *outputVectorPtr++ = ((int16_t)(*inputVectorPtr++)) * 256;
140 #ifndef INCLUDED_volk_8i_convert_16i_a_H
141 #define INCLUDED_volk_8i_convert_16i_a_H
143 #include <inttypes.h>
147 #include <immintrin.h>
149 static inline void volk_8i_convert_16i_a_avx2(int16_t* outputVector,
150 const int8_t* inputVector,
151 unsigned int num_points)
153 unsigned int number = 0;
154 const unsigned int sixteenthPoints = num_points / 16;
157 __m256i* outputVectorPtr = (__m256i*)outputVector;
161 for (; number < sixteenthPoints; number++) {
163 ret = _mm256_cvtepi8_epi16(inputVal);
164 ret = _mm256_slli_epi16(ret, 8);
165 _mm256_store_si256(outputVectorPtr, ret);
171 number = sixteenthPoints * 16;
172 for (; number < num_points; number++) {
173 outputVector[number] = (int16_t)(inputVector[number]) * 256;
179 #ifdef LV_HAVE_SSE4_1
180 #include <smmintrin.h>
182 static inline void volk_8i_convert_16i_a_sse4_1(int16_t* outputVector,
183 const int8_t* inputVector,
184 unsigned int num_points)
186 unsigned int number = 0;
187 const unsigned int sixteenthPoints = num_points / 16;
194 for (; number < sixteenthPoints; number++) {
212 number = sixteenthPoints * 16;
213 for (; number < num_points; number++) {
214 outputVector[number] = (int16_t)(inputVector[number]) * 256;
220 #ifdef LV_HAVE_GENERIC
223 const int8_t* inputVector,
224 unsigned int num_points)
226 int16_t* outputVectorPtr = outputVector;
227 const int8_t* inputVectorPtr = inputVector;
228 unsigned int number = 0;
230 for (number = 0; number < num_points; number++) {
231 *outputVectorPtr++ = ((int16_t)(*inputVectorPtr++)) * 256;
238 #include <arm_neon.h>
241 const int8_t* inputVector,
242 unsigned int num_points)
244 int16_t* outputVectorPtr = outputVector;
245 const int8_t* inputVectorPtr = inputVector;
247 const unsigned int eighth_points = num_points / 8;
250 int16x8_t converted_vec;
255 for (number = 0; number < eighth_points; ++number) {
256 input_vec = vld1_s8(inputVectorPtr);
257 converted_vec = vmovl_s8(input_vec);
259 converted_vec = vshlq_n_s16(converted_vec, 8);
260 vst1q_s16(outputVectorPtr, converted_vec);
263 outputVectorPtr += 8;
266 for (number = eighth_points * 8; number < num_points; number++) {
267 *outputVectorPtr++ = ((int16_t)(*inputVectorPtr++)) * 256;
274 extern void volk_8i_convert_16i_a_orc_impl(int16_t* outputVector,
275 const int8_t* inputVector,
276 unsigned int num_points);
278 static inline void volk_8i_convert_16i_u_orc(int16_t* outputVector,
279 const int8_t* inputVector,
280 unsigned int num_points)
282 volk_8i_convert_16i_a_orc_impl(outputVector, inputVector, num_points);
FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a)
Definition: sse2neon.h:5937
FORCE_INLINE __m128i _mm_loadu_si128(const __m128i *p)
Definition: sse2neon.h:4570
FORCE_INLINE __m128i _mm_load_si128(const __m128i *p)
Definition: sse2neon.h:4471
FORCE_INLINE void _mm_storeu_si128(__m128i *p, __m128i a)
Definition: sse2neon.h:6010
FORCE_INLINE __m128i _mm_slli_epi16(__m128i a, int imm)
Definition: sse2neon.h:5544
FORCE_INLINE __m128i _mm_cvtepi8_epi16(__m128i a)
Definition: sse2neon.h:7565
FORCE_INLINE __m128i _mm_srli_si128(__m128i a, int imm)
Definition: sse2neon.h:5885
int64x2_t __m128i
Definition: sse2neon.h:244
static void volk_8i_convert_16i_generic(int16_t *outputVector, const int8_t *inputVector, unsigned int num_points)
Definition: volk_8i_convert_16i.h:122
static void volk_8i_convert_16i_a_generic(int16_t *outputVector, const int8_t *inputVector, unsigned int num_points)
Definition: volk_8i_convert_16i.h:222
static void volk_8i_convert_16i_neon(int16_t *outputVector, const int8_t *inputVector, unsigned int num_points)
Definition: volk_8i_convert_16i.h:240