30 #ifndef INCLUDED_VOLK_32u_REVERSE_32u_U_H
79 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0,
80 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8,
81 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94,
82 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC,
83 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2,
84 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA,
85 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA, 0x06, 0x86,
86 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
87 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE,
88 0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1,
89 0x31, 0xB1, 0x71, 0xF1, 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99,
90 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5,
91 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD,
92 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, 0x03, 0x83, 0x43, 0xC3,
93 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, 0x0B, 0x8B,
94 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
95 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7,
96 0x77, 0xF7, 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF,
97 0x3F, 0xBF, 0x7F, 0xFF
99 #ifdef LV_HAVE_GENERIC
102 unsigned int num_points)
106 unsigned int number = 0;
107 for (; number < num_points; ++number) {
108 out_ptr->
b00 = in_ptr->
b31;
109 out_ptr->
b01 = in_ptr->
b30;
110 out_ptr->
b02 = in_ptr->
b29;
111 out_ptr->
b03 = in_ptr->
b28;
112 out_ptr->
b04 = in_ptr->
b27;
113 out_ptr->
b05 = in_ptr->
b26;
114 out_ptr->
b06 = in_ptr->
b25;
115 out_ptr->
b07 = in_ptr->
b24;
116 out_ptr->
b08 = in_ptr->
b23;
117 out_ptr->
b09 = in_ptr->
b22;
118 out_ptr->
b10 = in_ptr->
b21;
119 out_ptr->
b11 = in_ptr->
b20;
120 out_ptr->
b12 = in_ptr->
b19;
121 out_ptr->
b13 = in_ptr->
b18;
122 out_ptr->
b14 = in_ptr->
b17;
123 out_ptr->
b15 = in_ptr->
b16;
124 out_ptr->
b16 = in_ptr->
b15;
125 out_ptr->
b17 = in_ptr->
b14;
126 out_ptr->
b18 = in_ptr->
b13;
127 out_ptr->
b19 = in_ptr->
b12;
128 out_ptr->
b20 = in_ptr->
b11;
129 out_ptr->
b21 = in_ptr->
b10;
130 out_ptr->
b22 = in_ptr->
b09;
131 out_ptr->
b23 = in_ptr->
b08;
132 out_ptr->
b24 = in_ptr->
b07;
133 out_ptr->
b25 = in_ptr->
b06;
134 out_ptr->
b26 = in_ptr->
b05;
135 out_ptr->
b27 = in_ptr->
b04;
136 out_ptr->
b28 = in_ptr->
b03;
137 out_ptr->
b29 = in_ptr->
b02;
138 out_ptr->
b30 = in_ptr->
b01;
139 out_ptr->
b31 = in_ptr->
b00;
146 #ifdef LV_HAVE_GENERIC
149 unsigned int num_points)
151 const uint32_t* in_ptr = in;
152 uint32_t* out_ptr = out;
153 unsigned int number = 0;
154 for (; number < num_points; ++number) {
201 #ifdef LV_HAVE_GENERIC
205 const uint32_t* in_ptr = in;
206 uint32_t* out_ptr = out;
207 unsigned int number = 0;
208 for (; number < num_points; ++number) {
221 #ifdef LV_HAVE_GENERIC
225 const uint32_t* in_ptr = in;
226 uint32_t* out_ptr = out;
229 unsigned int number = 0;
230 for (; number < num_points; ++number) {
231 in8 = (
const uint8_t*)in_ptr;
232 out8 = (uint8_t*)out_ptr;
233 out8[3] = ((in8[0] * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
234 out8[2] = ((in8[1] * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
235 out8[1] = ((in8[2] * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
236 out8[0] = ((in8[3] * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
243 #ifdef LV_HAVE_GENERIC
248 const uint32_t* in_ptr = in;
249 uint32_t* out_ptr = out;
252 unsigned int number = 0;
253 for (; number < num_points; ++number) {
254 in8 = (
const uint8_t*)in_ptr;
255 out8 = (uint8_t*)out_ptr;
256 out8[3] = (in8[0] * 0x0202020202ULL & 0x010884422010ULL) % 1023;
257 out8[2] = (in8[1] * 0x0202020202ULL & 0x010884422010ULL) % 1023;
258 out8[1] = (in8[2] * 0x0202020202ULL & 0x010884422010ULL) % 1023;
259 out8[0] = (in8[3] * 0x0202020202ULL & 0x010884422010ULL) % 1023;
267 #ifdef LV_HAVE_GENERIC
270 unsigned int num_points)
272 const uint32_t* in_ptr = in;
273 uint32_t* out_ptr = out;
274 unsigned int number = 0;
275 for (; number < num_points; ++number) {
276 uint32_t tmp = *in_ptr;
280 tmp = (tmp << 16) | (tmp >> 16);
285 tmp = ((tmp & (0xFF | 0xFF << 16)) << 8) | ((tmp >> 8) & (0xFF | 0xFF << 16));
289 tmp = ((tmp & (0xF | 0xF << 8 | 0xF << 16 | 0xF << 24)) << 4) |
290 ((tmp >> 4) & (0xF | 0xF << 8 | 0xF << 16 | 0xF << 24));
295 tmp = ((tmp & (0x33333333)) << 2) | ((tmp >> 2) & (0x33333333));
300 tmp = ((tmp & (0x55555555)) << 1) | ((tmp >> 1) & (0x55555555));
308 #ifdef LV_HAVE_GENERIC
311 unsigned int num_points)
314 const uint32_t* in_ptr = in;
315 uint32_t* out_ptr = out;
316 unsigned int number = 0;
317 for (; number < num_points; ++number) {
318 uint32_t tmp = *in_ptr;
319 tmp = ((tmp & (0x55555555)) << 1) | ((tmp >> 1) & (0x55555555));
320 tmp = ((tmp & (0x33333333)) << 2) | ((tmp >> 2) & (0x33333333));
321 tmp = ((tmp & (0xF | 0xF << 8 | 0xF << 16 | 0xF << 24)) << 4) |
322 ((tmp >> 4) & (0xF | 0xF << 8 | 0xF << 16 | 0xF << 24));
323 tmp = ((tmp & (0xFF | 0xFF << 16)) << 8) | ((tmp >> 8) & (0xFF | 0xFF << 16));
324 tmp = (tmp << 16) | (tmp >> 16);
333 #ifdef LV_HAVE_NEONV8
334 #include <arm_neon.h>
337 volk_32u_reverse_32u_neonv8(uint32_t* out,
const uint32_t* in,
unsigned int num_points)
339 const uint32_t* in_ptr = in;
340 uint32_t* out_ptr = out;
342 const uint8x16_t idx = { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 };
344 const unsigned int quarterPoints = num_points / 4;
345 unsigned int number = 0;
346 for (; number < quarterPoints; ++number) {
348 uint32x4_t x = vld1q_u32(in_ptr);
350 vreinterpretq_u32_u8(vqtbl1q_u8(vrbitq_u8(vreinterpretq_u8_u32(x)), idx));
351 vst1q_u32(out_ptr, z);
355 number = quarterPoints * 4;
356 for (; number < num_points; ++number) {
368 #include <arm_neon.h>
371 __VOLK_ASM("rbit %[result], %[value]" \
372 : [result] "=r"(*out_ptr) \
373 : [value] "r"(*in_ptr) \
382 const uint32_t* in_ptr = in;
383 uint32_t* out_ptr = out;
384 const unsigned int eighthPoints = num_points / 8;
385 unsigned int number = 0;
386 for (; number < eighthPoints; ++number) {
397 number = eighthPoints * 8;
398 for (; number < num_points; ++number) {
Definition: volk_32u_reverse_32u.h:65
uint8_t b02
Definition: volk_32u_reverse_32u.h:68
uint8_t b01
Definition: volk_32u_reverse_32u.h:67
uint8_t b05
Definition: volk_32u_reverse_32u.h:71
uint8_t b07
Definition: volk_32u_reverse_32u.h:73
uint8_t b04
Definition: volk_32u_reverse_32u.h:70
uint8_t b00
Definition: volk_32u_reverse_32u.h:66
uint8_t b06
Definition: volk_32u_reverse_32u.h:72
uint8_t b03
Definition: volk_32u_reverse_32u.h:69
Definition: volk_32u_reverse_32u.h:31
int b10
Definition: volk_32u_reverse_32u.h:42
int b02
Definition: volk_32u_reverse_32u.h:34
int b07
Definition: volk_32u_reverse_32u.h:39
int b16
Definition: volk_32u_reverse_32u.h:48
int b24
Definition: volk_32u_reverse_32u.h:56
int b06
Definition: volk_32u_reverse_32u.h:38
int b09
Definition: volk_32u_reverse_32u.h:41
int b28
Definition: volk_32u_reverse_32u.h:60
int b03
Definition: volk_32u_reverse_32u.h:35
int b11
Definition: volk_32u_reverse_32u.h:43
int b31
Definition: volk_32u_reverse_32u.h:63
int b23
Definition: volk_32u_reverse_32u.h:55
int b29
Definition: volk_32u_reverse_32u.h:61
int b25
Definition: volk_32u_reverse_32u.h:57
int b14
Definition: volk_32u_reverse_32u.h:46
int b15
Definition: volk_32u_reverse_32u.h:47
int b08
Definition: volk_32u_reverse_32u.h:40
int b21
Definition: volk_32u_reverse_32u.h:53
int b27
Definition: volk_32u_reverse_32u.h:59
int b19
Definition: volk_32u_reverse_32u.h:51
int b22
Definition: volk_32u_reverse_32u.h:54
int b30
Definition: volk_32u_reverse_32u.h:62
int b04
Definition: volk_32u_reverse_32u.h:36
int b18
Definition: volk_32u_reverse_32u.h:50
int b17
Definition: volk_32u_reverse_32u.h:49
int b12
Definition: volk_32u_reverse_32u.h:44
int b05
Definition: volk_32u_reverse_32u.h:37
int b00
Definition: volk_32u_reverse_32u.h:32
int b01
Definition: volk_32u_reverse_32u.h:33
int b26
Definition: volk_32u_reverse_32u.h:58
int b13
Definition: volk_32u_reverse_32u.h:45
int b20
Definition: volk_32u_reverse_32u.h:52
static void volk_32u_reverse_32u_1972magic(uint32_t *out, const uint32_t *in, unsigned int num_points)
Definition: volk_32u_reverse_32u.h:246
static void volk_32u_reverse_32u_dword_shuffle(uint32_t *out, const uint32_t *in, unsigned int num_points)
Definition: volk_32u_reverse_32u.h:100
static void volk_32u_reverse_32u_2001magic(uint32_t *out, const uint32_t *in, unsigned int num_points)
Definition: volk_32u_reverse_32u.h:223
static void volk_32u_reverse_32u_lut(uint32_t *out, const uint32_t *in, unsigned int num_points)
Definition: volk_32u_reverse_32u.h:203
#define DO_RBIT
Definition: volk_32u_reverse_32u.h:370
static const unsigned char BitReverseTable256[]
Definition: volk_32u_reverse_32u.h:78
static void volk_32u_reverse_32u_bintree_permute_bottom_up(uint32_t *out, const uint32_t *in, unsigned int num_points)
Definition: volk_32u_reverse_32u.h:309
static void volk_32u_reverse_32u_bintree_permute_top_down(uint32_t *out, const uint32_t *in, unsigned int num_points)
Definition: volk_32u_reverse_32u.h:268
static void volk_32u_reverse_32u_arm(uint32_t *out, const uint32_t *in, unsigned int num_points)
Definition: volk_32u_reverse_32u.h:379
static void volk_32u_reverse_32u_byte_shuffle(uint32_t *out, const uint32_t *in, unsigned int num_points)
Definition: volk_32u_reverse_32u.h:147
#define __VOLK_PREFETCH(addr)
Definition: volk_common.h:71