Vector Optimized Library of Kernels  3.0.0
Architecture-tuned implementations of math kernels
volk_32u_popcnt.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of VOLK
6  *
7  * SPDX-License-Identifier: LGPL-3.0-or-later
8  */
9 
43 #ifndef INCLUDED_VOLK_32u_POPCNT_A16_H
44 #define INCLUDED_VOLK_32u_POPCNT_A16_H
45 
46 #include <inttypes.h>
47 #include <stdio.h>
48 
49 #ifdef LV_HAVE_GENERIC
50 
51 static inline void volk_32u_popcnt_generic(uint32_t* ret, const uint32_t value)
52 {
53  // This is faster than a lookup table
54  uint32_t retVal = value;
55 
56  retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
57  retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
58  retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
59  retVal = (retVal + (retVal >> 8));
60  retVal = (retVal + (retVal >> 16)) & 0x0000003F;
61 
62  *ret = retVal;
63 }
64 
65 #endif /*LV_HAVE_GENERIC*/
66 
67 
68 #ifdef LV_HAVE_SSE4_2
69 
70 #include <nmmintrin.h>
71 
72 static inline void volk_32u_popcnt_a_sse4_2(uint32_t* ret, const uint32_t value)
73 {
74  *ret = _mm_popcnt_u32(value);
75 }
76 
77 #endif /*LV_HAVE_SSE4_2*/
78 
79 #endif /*INCLUDED_VOLK_32u_POPCNT_A16_H*/
FORCE_INLINE int _mm_popcnt_u32(unsigned int a)
Definition: sse2neon.h:8736
static void volk_32u_popcnt_a_sse4_2(uint32_t *ret, const uint32_t value)
Definition: volk_32u_popcnt.h:72
static void volk_32u_popcnt_generic(uint32_t *ret, const uint32_t value)
Definition: volk_32u_popcnt.h:51