45 #ifndef INCLUDED_volk_8u_x4_conv_k7_r2_8u_H
46 #define INCLUDED_volk_8u_x4_conv_k7_r2_8u_H
49 unsigned char t[64 / 8 ];
50 unsigned int w[64 / 32];
51 unsigned short s[64 / 16];
52 unsigned char c[64 / 8];
60 static inline void renormalize(
unsigned char* X,
unsigned char threshold)
65 unsigned char min = X[0];
67 for (
i = 0;
i < NUMSTATES;
i++)
70 for (
i = 0;
i < NUMSTATES;
i++)
83 unsigned char* Branchtab)
85 int j, decision0, decision1;
86 unsigned char metric, m0, m1, m2, m3;
91 int PRECISIONSHIFT = 2;
94 for (j = 0; j < RATE; j++)
95 metric += (Branchtab[
i + j * NUMSTATES / 2] ^ syms[s * RATE + j]) >> METRICSHIFT;
96 metric = metric >> PRECISIONSHIFT;
98 unsigned char max = ((RATE * ((256 - 1) >> METRICSHIFT)) >> PRECISIONSHIFT);
101 m1 = X[
i + NUMSTATES / 2] + (max - metric);
102 m2 = X[
i] + (max - metric);
103 m3 = X[
i + NUMSTATES / 2] + metric;
105 decision0 = (
signed int)(m0 - m1) > 0;
106 decision1 = (
signed int)(m2 - m3) > 0;
108 Y[2 *
i] = decision0 ? m1 : m0;
109 Y[2 *
i + 1] = decision1 ? m3 : m2;
111 d->
w[
i / (
sizeof(
unsigned int) * 8 / 2) +
112 s * (
sizeof(
decision_t) /
sizeof(
unsigned int))] |=
113 (decision0 | decision1 << 1) << ((2 *
i) & (
sizeof(
unsigned int) * 8 - 1));
324 #include <emmintrin.h>
325 #include <mmintrin.h>
326 #include <pmmintrin.h>
328 #include <xmmintrin.h>
334 unsigned int framebits,
336 unsigned char* Branchtab)
339 for (i9 = 0; i9 < ((framebits + excess) >> 1); i9++) {
340 unsigned char a75, a81;
342 short int s20, s21, s26, s27;
343 unsigned char *a74, *a80, *b6;
344 short int *a110, *a111, *a91, *a93, *a94;
345 __m128i *a102, *a112, *a113, *a71, *a72, *a77, *a83, *a95, *a96, *a97, *a98, *a99;
347 __m128i a100, a101, a103, a104, a107, a108, a109, a76, a78, a79, a82, a84, a85,
348 a88, a89, a90, d10, d11, d12, d9, m23, m24, m25, m26, m27, m28, m29, m30, s18,
349 s19, s22, s23, s24, s25, s28, s29, t13, t14, t15, t16, t17, t18;
374 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
376 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
387 a91 = ((
short int*)dec);
416 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
418 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
440 if ((((
unsigned char*)Y)[0] > 210)) {
461 unsigned char a188, a194;
463 short int s48, s49, s54, s55;
464 unsigned char *a187, *a193, *b15;
465 short int *a204, *a206, *a207, *a223, *a224, *b16;
466 __m128i *a184, *a185, *a190, *a196, *a208, *a209, *a210, *a211, *a212, *a215,
468 __m128i a199, a200, a218, a219;
469 __m128i a189, a191, a192, a195, a197, a198, a201, a202, a203, a213, a214, a216,
470 a217, a220, a221, a222, d17, d18, d19, d20, m39, m40, m41, m42, m43, m44, m45,
471 m46, s46, s47, s50, s51, s52, s53, s56, s57, t25, t26, t27, t28, t29, t30;
496 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
498 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
509 a204 = ((
short int*)dec);
539 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
541 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
563 if ((((
unsigned char*)X)[0] > 210)) {
596 for (j = 0; j < (framebits + excess) % 2; ++j) {
598 for (
i = 0;
i < 64 / 2;
i++) {
600 (((framebits + excess) >> 1) << 1) + j,
630 unsigned int framebits,
632 unsigned char* Branchtab)
635 for (i9 = 0; i9 < ((framebits + excess) >> 1); i9++) {
636 unsigned char a75, a81;
638 short int s20, s21, s26, s27;
639 unsigned char *a74, *a80, *b6;
640 short int *a110, *a111, *a91, *a93, *a94;
641 __m128i *a102, *a112, *a113, *a71, *a72, *a77, *a83, *a95, *a96, *a97, *a98, *a99;
643 __m128i a100, a101, a103, a104, a107, a108, a109, a76, a78, a79, a82, a84, a85,
644 a88, a89, a90, d10, d11, d12, d9, m23, m24, m25, m26, m27, m28, m29, m30, s18,
645 s19, s22, s23, s24, s25, s28, s29, t13, t14, t15, t16, t17, t18;
670 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
672 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
683 a91 = ((
short int*)dec);
712 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
714 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
736 if ((((
unsigned char*)Y)[0] > 210)) {
757 unsigned char a188, a194;
759 short int s48, s49, s54, s55;
760 unsigned char *a187, *a193, *b15;
761 short int *a204, *a206, *a207, *a223, *a224, *b16;
762 __m128i *a184, *a185, *a190, *a196, *a208, *a209, *a210, *a211, *a212, *a215,
764 __m128i a199, a200, a218, a219;
765 __m128i a189, a191, a192, a195, a197, a198, a201, a202, a203, a213, a214, a216,
766 a217, a220, a221, a222, d17, d18, d19, d20, m39, m40, m41, m42, m43, m44, m45,
767 m46, s46, s47, s50, s51, s52, s53, s56, s57, t25, t26, t27, t28, t29, t30;
792 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
794 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
805 a204 = ((
short int*)dec);
835 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
837 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
859 if ((((
unsigned char*)X)[0] > 210)) {
892 for (j = 0; j < (framebits + excess) % 2; ++j) {
894 for (
i = 0;
i < 64 / 2;
i++) {
896 (((framebits + excess) >> 1) << 1) + j,
924 unsigned int framebits,
926 unsigned char* Branchtab)
928 int nbits = framebits + excess;
930 int RENORMALIZE_THRESHOLD = 210;
933 for (s = 0; s < nbits; s++) {
935 for (
i = 0;
i < NUMSTATES / 2;
i++) {
944 Y = (
unsigned char*)tmp;
FORCE_INLINE __m128i _mm_set_epi8(signed char b15, signed char b14, signed char b13, signed char b12, signed char b11, signed char b10, signed char b9, signed char b8, signed char b7, signed char b6, signed char b5, signed char b4, signed char b3, signed char b2, signed char b1, signed char b0)
Definition: sse2neon.h:5140
FORCE_INLINE __m128i _mm_unpacklo_epi8(__m128i a, __m128i b)
Definition: sse2neon.h:6405
FORCE_INLINE __m128i _mm_adds_epu8(__m128i a, __m128i b)
Definition: sse2neon.h:3101
FORCE_INLINE int _mm_movemask_epi8(__m128i a)
Definition: sse2neon.h:4776
FORCE_INLINE __m128i _mm_and_si128(__m128i, __m128i)
Definition: sse2neon.h:3128
#define _mm_srli_epi64(a, imm)
Definition: sse2neon.h:5863
FORCE_INLINE __m128i _mm_set1_epi8(signed char w)
Definition: sse2neon.h:5239
FORCE_INLINE __m128i _mm_unpackhi_epi8(__m128i a, __m128i b)
Definition: sse2neon.h:6300
FORCE_INLINE __m128i _mm_xor_si128(__m128i a, __m128i b)
Definition: sse2neon.h:6458
FORCE_INLINE __m128i _mm_cmpeq_epi8(__m128i a, __m128i b)
Definition: sse2neon.h:3284
#define _mm_shufflelo_epi16(a, imm)
Definition: sse2neon.h:5459
FORCE_INLINE __m128i _mm_min_epu8(__m128i a, __m128i b)
Definition: sse2neon.h:4696
#define _mm_srli_epi16(a, imm)
Definition: sse2neon.h:5812
FORCE_INLINE __m128i _mm_srli_si128(__m128i a, int imm)
Definition: sse2neon.h:5885
FORCE_INLINE __m128i _mm_subs_epu8(__m128i a, __m128i b)
Definition: sse2neon.h:6206
#define _MM_SHUFFLE(fp3, fp2, fp1, fp0)
Definition: sse2neon.h:195
int64x2_t __m128i
Definition: sse2neon.h:244
FORCE_INLINE __m128i _mm_avg_epu8(__m128i a, __m128i b)
Definition: sse2neon.h:3187
FORCE_INLINE __m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
Definition: sse2neon.h:6386
Definition: volk_8u_x4_conv_k7_r2_8u.h:48
unsigned int w[64/32]
Definition: volk_8u_x4_conv_k7_r2_8u.h:50
static void BFLY(int i, int s, unsigned char *syms, unsigned char *Y, unsigned char *X, decision_t *d, unsigned char *Branchtab)
Definition: volk_8u_x4_conv_k7_r2_8u.h:77
static void volk_8u_x4_conv_k7_r2_8u_spiral(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition: volk_8u_x4_conv_k7_r2_8u.h:330
static void volk_8u_x4_conv_k7_r2_8u_neonspiral(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition: volk_8u_x4_conv_k7_r2_8u.h:626
static void volk_8u_x4_conv_k7_r2_8u_generic(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition: volk_8u_x4_conv_k7_r2_8u.h:920
static void renormalize(unsigned char *X, unsigned char threshold)
Definition: volk_8u_x4_conv_k7_r2_8u.h:60
for i
Definition: volk_config_fixed.tmpl.h:13