47 #ifndef INCLUDED_volk_64u_popcnt_a_H
48 #define INCLUDED_volk_64u_popcnt_a_H
54 #ifdef LV_HAVE_GENERIC
63 uint32_t retVal = (uint32_t)(value & 0x00000000FFFFFFFFull);
65 retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
66 retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
67 retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
68 retVal = (retVal + (retVal >> 8));
69 retVal = (retVal + (retVal >> 16)) & 0x0000003F;
70 uint64_t retVal64 = retVal;
73 retVal = (uint32_t)((value & 0xFFFFFFFF00000000ull) >> 32);
74 retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
75 retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
76 retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
77 retVal = (retVal + (retVal >> 8));
78 retVal = (retVal + (retVal >> 16)) & 0x0000003F;
87 #if LV_HAVE_SSE4_2 && LV_HAVE_64
89 #include <nmmintrin.h>
91 static inline void volk_64u_popcnt_a_sse4_2(uint64_t* ret,
const uint64_t value)
100 #include <arm_neon.h>
103 uint8x8_t input_val, count8x8_val;
104 uint16x4_t count16x4_val;
105 uint32x2_t count32x2_val;
106 uint64x1_t count64x1_val;
108 input_val = vld1_u8((
unsigned char*)&value);
109 count8x8_val = vcnt_u8(input_val);
110 count16x4_val = vpaddl_u8(count8x8_val);
111 count32x2_val = vpaddl_u16(count16x4_val);
112 count64x1_val = vpaddl_u32(count32x2_val);
113 vst1_u64(ret, count64x1_val);
FORCE_INLINE int64_t _mm_popcnt_u64(uint64_t a)
Definition: sse2neon.h:8763
static void volk_64u_popcnt_neon(uint64_t *ret, const uint64_t value)
Definition: volk_64u_popcnt.h:101
static void volk_64u_popcnt_generic(uint64_t *ret, const uint64_t value)
Definition: volk_64u_popcnt.h:57