Vector Optimized Library of Kernels  3.0.0
Architecture-tuned implementations of math kernels
sse2neon.h File Reference
#include <stdint.h>
#include <stdlib.h>
#include <arm_neon.h>
#include <math.h>
#include <sys/time.h>

Go to the source code of this file.

Data Structures

struct  fpcr_bitfield
 

Macros

#define SSE2NEON_PRECISE_MINMAX   (0)
 
#define SSE2NEON_PRECISE_DIV   (0)
 
#define SSE2NEON_PRECISE_SQRT   (0)
 
#define SSE2NEON_PRECISE_DP   (0)
 
#define FORCE_INLINE   static inline
 
#define ALIGN_STRUCT(x)   __declspec(align(x))
 
#define _sse2neon_likely(x)   (x)
 
#define _sse2neon_unlikely(x)   (x)
 
#define _sse2neon_const   const
 
#define __has_builtin(x)   0
 
#define _MM_SHUFFLE(fp3, fp2, fp1, fp0)    (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0)))
 
#define _MM_FROUND_TO_NEAREST_INT   0x00
 
#define _MM_FROUND_TO_NEG_INF   0x01
 
#define _MM_FROUND_TO_POS_INF   0x02
 
#define _MM_FROUND_TO_ZERO   0x03
 
#define _MM_FROUND_CUR_DIRECTION   0x04
 
#define _MM_FROUND_NO_EXC   0x08
 
#define _MM_FROUND_RAISE_EXC   0x00
 
#define _MM_FROUND_NINT   (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC)
 
#define _MM_FROUND_FLOOR   (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC)
 
#define _MM_FROUND_CEIL   (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC)
 
#define _MM_FROUND_TRUNC   (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC)
 
#define _MM_FROUND_RINT   (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
 
#define _MM_FROUND_NEARBYINT   (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
 
#define _MM_ROUND_NEAREST   0x0000
 
#define _MM_ROUND_DOWN   0x2000
 
#define _MM_ROUND_UP   0x4000
 
#define _MM_ROUND_TOWARD_ZERO   0x6000
 
#define _MM_FLUSH_ZERO_MASK   0x8000
 
#define _MM_FLUSH_ZERO_ON   0x8000
 
#define _MM_FLUSH_ZERO_OFF   0x0000
 
#define _MM_DENORMALS_ZERO_MASK   0x0040
 
#define _MM_DENORMALS_ZERO_ON   0x0040
 
#define _MM_DENORMALS_ZERO_OFF   0x0000
 
#define __constrange(a, b)   const
 
#define __int64   int64_t
 
#define vreinterpretq_m128_f16(x)   vreinterpretq_f32_f16(x)
 
#define vreinterpretq_m128_f32(x)   (x)
 
#define vreinterpretq_m128_f64(x)   vreinterpretq_f32_f64(x)
 
#define vreinterpretq_m128_u8(x)   vreinterpretq_f32_u8(x)
 
#define vreinterpretq_m128_u16(x)   vreinterpretq_f32_u16(x)
 
#define vreinterpretq_m128_u32(x)   vreinterpretq_f32_u32(x)
 
#define vreinterpretq_m128_u64(x)   vreinterpretq_f32_u64(x)
 
#define vreinterpretq_m128_s8(x)   vreinterpretq_f32_s8(x)
 
#define vreinterpretq_m128_s16(x)   vreinterpretq_f32_s16(x)
 
#define vreinterpretq_m128_s32(x)   vreinterpretq_f32_s32(x)
 
#define vreinterpretq_m128_s64(x)   vreinterpretq_f32_s64(x)
 
#define vreinterpretq_f16_m128(x)   vreinterpretq_f16_f32(x)
 
#define vreinterpretq_f32_m128(x)   (x)
 
#define vreinterpretq_f64_m128(x)   vreinterpretq_f64_f32(x)
 
#define vreinterpretq_u8_m128(x)   vreinterpretq_u8_f32(x)
 
#define vreinterpretq_u16_m128(x)   vreinterpretq_u16_f32(x)
 
#define vreinterpretq_u32_m128(x)   vreinterpretq_u32_f32(x)
 
#define vreinterpretq_u64_m128(x)   vreinterpretq_u64_f32(x)
 
#define vreinterpretq_s8_m128(x)   vreinterpretq_s8_f32(x)
 
#define vreinterpretq_s16_m128(x)   vreinterpretq_s16_f32(x)
 
#define vreinterpretq_s32_m128(x)   vreinterpretq_s32_f32(x)
 
#define vreinterpretq_s64_m128(x)   vreinterpretq_s64_f32(x)
 
#define vreinterpretq_m128i_s8(x)   vreinterpretq_s64_s8(x)
 
#define vreinterpretq_m128i_s16(x)   vreinterpretq_s64_s16(x)
 
#define vreinterpretq_m128i_s32(x)   vreinterpretq_s64_s32(x)
 
#define vreinterpretq_m128i_s64(x)   (x)
 
#define vreinterpretq_m128i_u8(x)   vreinterpretq_s64_u8(x)
 
#define vreinterpretq_m128i_u16(x)   vreinterpretq_s64_u16(x)
 
#define vreinterpretq_m128i_u32(x)   vreinterpretq_s64_u32(x)
 
#define vreinterpretq_m128i_u64(x)   vreinterpretq_s64_u64(x)
 
#define vreinterpretq_f32_m128i(x)   vreinterpretq_f32_s64(x)
 
#define vreinterpretq_f64_m128i(x)   vreinterpretq_f64_s64(x)
 
#define vreinterpretq_s8_m128i(x)   vreinterpretq_s8_s64(x)
 
#define vreinterpretq_s16_m128i(x)   vreinterpretq_s16_s64(x)
 
#define vreinterpretq_s32_m128i(x)   vreinterpretq_s32_s64(x)
 
#define vreinterpretq_s64_m128i(x)   (x)
 
#define vreinterpretq_u8_m128i(x)   vreinterpretq_u8_s64(x)
 
#define vreinterpretq_u16_m128i(x)   vreinterpretq_u16_s64(x)
 
#define vreinterpretq_u32_m128i(x)   vreinterpretq_u32_s64(x)
 
#define vreinterpretq_u64_m128i(x)   vreinterpretq_u64_s64(x)
 
#define vreinterpret_m64_s8(x)   vreinterpret_s64_s8(x)
 
#define vreinterpret_m64_s16(x)   vreinterpret_s64_s16(x)
 
#define vreinterpret_m64_s32(x)   vreinterpret_s64_s32(x)
 
#define vreinterpret_m64_s64(x)   (x)
 
#define vreinterpret_m64_u8(x)   vreinterpret_s64_u8(x)
 
#define vreinterpret_m64_u16(x)   vreinterpret_s64_u16(x)
 
#define vreinterpret_m64_u32(x)   vreinterpret_s64_u32(x)
 
#define vreinterpret_m64_u64(x)   vreinterpret_s64_u64(x)
 
#define vreinterpret_m64_f16(x)   vreinterpret_s64_f16(x)
 
#define vreinterpret_m64_f32(x)   vreinterpret_s64_f32(x)
 
#define vreinterpret_m64_f64(x)   vreinterpret_s64_f64(x)
 
#define vreinterpret_u8_m64(x)   vreinterpret_u8_s64(x)
 
#define vreinterpret_u16_m64(x)   vreinterpret_u16_s64(x)
 
#define vreinterpret_u32_m64(x)   vreinterpret_u32_s64(x)
 
#define vreinterpret_u64_m64(x)   vreinterpret_u64_s64(x)
 
#define vreinterpret_s8_m64(x)   vreinterpret_s8_s64(x)
 
#define vreinterpret_s16_m64(x)   vreinterpret_s16_s64(x)
 
#define vreinterpret_s32_m64(x)   vreinterpret_s32_s64(x)
 
#define vreinterpret_s64_m64(x)   (x)
 
#define vreinterpret_f32_m64(x)   vreinterpret_f32_s64(x)
 
#define vreinterpretq_m128d_s32(x)   vreinterpretq_f32_s32(x)
 
#define vreinterpretq_m128d_s64(x)   vreinterpretq_f32_s64(x)
 
#define vreinterpretq_m128d_u32(x)   vreinterpretq_f32_u32(x)
 
#define vreinterpretq_m128d_u64(x)   vreinterpretq_f32_u64(x)
 
#define vreinterpretq_m128d_f32(x)   (x)
 
#define vreinterpretq_s64_m128d(x)   vreinterpretq_s64_f32(x)
 
#define vreinterpretq_u32_m128d(x)   vreinterpretq_u32_f32(x)
 
#define vreinterpretq_u64_m128d(x)   vreinterpretq_u64_f32(x)
 
#define vreinterpretq_f32_m128d(x)   (x)
 
#define vreinterpretq_nth_u64_m128i(x, n)   (((SIMDVec *) &x)->m128_u64[n])
 
#define vreinterpretq_nth_u32_m128i(x, n)   (((SIMDVec *) &x)->m128_u32[n])
 
#define vreinterpretq_nth_u8_m128i(x, n)   (((SIMDVec *) &x)->m128_u8[n])
 
#define _MM_GET_FLUSH_ZERO_MODE   _sse2neon_mm_get_flush_zero_mode
 
#define _MM_SET_FLUSH_ZERO_MODE   _sse2neon_mm_set_flush_zero_mode
 
#define _MM_GET_DENORMALS_ZERO_MODE   _sse2neon_mm_get_denormals_zero_mode
 
#define _MM_SET_DENORMALS_ZERO_MODE   _sse2neon_mm_set_denormals_zero_mode
 
#define _mm_shuffle_epi32_default(a, imm)
 
#define _mm_shuffle_epi32_splat(a, imm)
 
#define _mm_shuffle_ps_default(a, b, imm)
 
#define _mm_shufflelo_epi16_function(a, imm)
 
#define _mm_shufflehi_epi16_function(a, imm)
 
#define _mm_cvtps_pi32(a)   _mm_cvt_ps2pi(a)
 
#define _mm_cvtsi32_ss(a, b)   _mm_cvt_si2ss(a, b)
 
#define _mm_cvtss_si32(a)   _mm_cvt_ss2si(a)
 
#define _mm_cvttps_pi32(a)   _mm_cvtt_ps2pi(a)
 
#define _mm_cvttss_si32(a)   _mm_cvtt_ss2si(a)
 
#define _mm_extract_pi16(a, imm)    (int32_t) vget_lane_u16(vreinterpret_u16_m64(a), (imm))
 
#define _mm_insert_pi16(a, b, imm)
 
#define _mm_load_ps1   _mm_load1_ps
 
#define _m_maskmovq(a, mask, mem_addr)   _mm_maskmove_si64(a, mask, mem_addr)
 
#define _m_pavgb(a, b)   _mm_avg_pu8(a, b)
 
#define _m_pavgw(a, b)   _mm_avg_pu16(a, b)
 
#define _m_pextrw(a, imm)   _mm_extract_pi16(a, imm)
 
#define _m_pinsrw(a, i, imm)   _mm_insert_pi16(a, i, imm)
 
#define _m_pmaxsw(a, b)   _mm_max_pi16(a, b)
 
#define _m_pmaxub(a, b)   _mm_max_pu8(a, b)
 
#define _m_pminsw(a, b)   _mm_min_pi16(a, b)
 
#define _m_pminub(a, b)   _mm_min_pu8(a, b)
 
#define _m_pmovmskb(a)   _mm_movemask_pi8(a)
 
#define _m_pmulhuw(a, b)   _mm_mulhi_pu16(a, b)
 
#define _m_psadbw(a, b)   _mm_sad_pu8(a, b)
 
#define _m_pshufw(a, imm)   _mm_shuffle_pi16(a, imm)
 
#define _mm_shuffle_pi16(a, imm)
 
#define _mm_shuffle_ps(a, b, imm)
 
#define _mm_store1_ps   _mm_store_ps1
 
#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3)
 
#define _mm_ucomieq_ss   _mm_comieq_ss
 
#define _mm_ucomige_ss   _mm_comige_ss
 
#define _mm_ucomigt_ss   _mm_comigt_ss
 
#define _mm_ucomile_ss   _mm_comile_ss
 
#define _mm_ucomilt_ss   _mm_comilt_ss
 
#define _mm_ucomineq_ss   _mm_comineq_ss
 
#define _mm_bslli_si128(a, imm)   _mm_slli_si128(a, imm)
 
#define _mm_bsrli_si128(a, imm)   _mm_srli_si128(a, imm)
 
#define _mm_cvtsd_si64x   _mm_cvtsd_si64
 
#define _mm_cvtsi128_si64x(a)   _mm_cvtsi128_si64(a)
 
#define _mm_cvtsi128_si64x(a)   _mm_cvtsi128_si64(a)
 
#define _mm_cvtsi64x_si128(a)   _mm_cvtsi64_si128(a)
 
#define _mm_cvtsi64x_sd(a, b)   _mm_cvtsi64_sd(a, b)
 
#define _mm_cvttsd_si64x(a)   _mm_cvttsd_si64(a)
 
#define _mm_extract_epi16(a, imm)    vgetq_lane_u16(vreinterpretq_u16_m128i(a), (imm))
 
#define _mm_insert_epi16(a, b, imm)
 
#define _mm_load_pd1   _mm_load1_pd
 
#define _mm_set_pd1   _mm_set1_pd
 
#define _mm_shuffle_epi32(a, imm)
 
#define _mm_shuffle_pd(a, b, imm8)
 
#define _mm_shufflehi_epi16(a, imm)   _mm_shufflehi_epi16_function((a), (imm))
 
#define _mm_shufflelo_epi16(a, imm)   _mm_shufflelo_epi16_function((a), (imm))
 
#define _mm_srai_epi32(a, imm)
 
#define _mm_srli_epi16(a, imm)
 
#define _mm_srli_epi32(a, imm)
 
#define _mm_srli_epi64(a, imm)
 
#define _mm_store1_pd   _mm_store_pd1
 
#define _mm_ucomieq_sd   _mm_comieq_sd
 
#define _mm_ucomige_sd   _mm_comige_sd
 
#define _mm_ucomigt_sd   _mm_comigt_sd
 
#define _mm_ucomile_sd   _mm_comile_sd
 
#define _mm_ucomilt_sd   _mm_comilt_sd
 
#define _mm_ucomineq_sd   _mm_comineq_sd
 
#define _mm_lddqu_si128   _mm_loadu_si128
 
#define _mm_loaddup_pd   _mm_load1_pd
 
#define _mm_alignr_pi8(a, b, imm)
 
#define _mm_blend_epi16(a, b, imm)
 
#define _mm_blend_pd(a, b, imm)
 
#define _mm_extract_epi32(a, imm)    vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm))
 
#define _mm_extract_epi64(a, imm)    vgetq_lane_s64(vreinterpretq_s64_m128i(a), (imm))
 
#define _mm_extract_epi8(a, imm)   vgetq_lane_u8(vreinterpretq_u8_m128i(a), (imm))
 
#define _mm_extract_ps(a, imm)   vgetq_lane_s32(vreinterpretq_s32_m128(a), (imm))
 
#define _mm_insert_epi32(a, b, imm)
 
#define _mm_insert_epi64(a, b, imm)
 
#define _mm_insert_epi8(a, b, imm)
 
#define _mm_insert_ps(a, b, imm8)
 
#define _mm_testnzc_si128(a, b)   _mm_test_mix_ones_zeros(a, b)
 
#define SSE2NEON_AES_DATA(w)
 
#define SSE2NEON_AES_H0(x)   (x)
 
#define SSE2NEON_AES_B2W(b0, b1, b2, b3)
 
#define SSE2NEON_AES_F2(x)   ((x << 1) ^ (((x >> 7) & 1) * 0x011b /* WPOLY */))
 
#define SSE2NEON_AES_F3(x)   (SSE2NEON_AES_F2(x) ^ x)
 
#define SSE2NEON_AES_U0(p)    SSE2NEON_AES_B2W(SSE2NEON_AES_F2(p), p, p, SSE2NEON_AES_F3(p))
 
#define SSE2NEON_AES_U1(p)    SSE2NEON_AES_B2W(SSE2NEON_AES_F3(p), SSE2NEON_AES_F2(p), p, p)
 
#define SSE2NEON_AES_U2(p)    SSE2NEON_AES_B2W(p, SSE2NEON_AES_F3(p), SSE2NEON_AES_F2(p), p)
 
#define SSE2NEON_AES_U3(p)    SSE2NEON_AES_B2W(p, p, SSE2NEON_AES_F3(p), SSE2NEON_AES_F2(p))
 

Typedefs

typedef int64x1_t __m64
 
typedef float32x4_t __m128
 
typedef float32x4_t __m128d
 
typedef int64x2_t __m128i
 

Enumerations

enum  _mm_hint {
  _MM_HINT_NTA = 0 , _MM_HINT_T0 = 1 , _MM_HINT_T1 = 2 , _MM_HINT_T2 = 3 ,
  _MM_HINT_ENTA = 4 , _MM_HINT_ET0 = 5 , _MM_HINT_ET1 = 6 , _MM_HINT_ET2 = 7
}
 

Functions

union ALIGN_STRUCT (16) SIMDVec
 
FORCE_INLINE unsigned int _MM_GET_ROUNDING_MODE ()
 
FORCE_INLINE __m128 _mm_move_ss (__m128, __m128)
 
FORCE_INLINE __m128 _mm_or_ps (__m128, __m128)
 
FORCE_INLINE __m128 _mm_set_ps1 (float)
 
FORCE_INLINE __m128 _mm_setzero_ps (void)
 
FORCE_INLINE __m128i _mm_and_si128 (__m128i, __m128i)
 
FORCE_INLINE __m128i _mm_castps_si128 (__m128)
 
FORCE_INLINE __m128i _mm_cmpeq_epi32 (__m128i, __m128i)
 
FORCE_INLINE __m128i _mm_cvtps_epi32 (__m128)
 
FORCE_INLINE __m128d _mm_move_sd (__m128d, __m128d)
 
FORCE_INLINE __m128i _mm_or_si128 (__m128i, __m128i)
 
FORCE_INLINE __m128i _mm_set_epi32 (int, int, int, int)
 
FORCE_INLINE __m128i _mm_set_epi64x (int64_t, int64_t)
 
FORCE_INLINE __m128d _mm_set_pd (double, double)
 
FORCE_INLINE __m128i _mm_set1_epi32 (int)
 
FORCE_INLINE __m128i _mm_setzero_si128 ()
 
FORCE_INLINE __m128d _mm_ceil_pd (__m128d)
 
FORCE_INLINE __m128 _mm_ceil_ps (__m128)
 
FORCE_INLINE __m128d _mm_floor_pd (__m128d)
 
FORCE_INLINE __m128 _mm_floor_ps (__m128)
 
FORCE_INLINE __m128d _mm_round_pd (__m128d, int)
 
FORCE_INLINE __m128 _mm_round_ps (__m128, int)
 
FORCE_INLINE uint32_t _mm_crc32_u8 (uint32_t, uint8_t)
 
FORCE_INLINE uint8x16x4_t _sse2neon_vld1q_u8_x4 (const uint8_t *p)
 
FORCE_INLINE __m128 _mm_shuffle_ps_1032 (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_shuffle_ps_2301 (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_shuffle_ps_0321 (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_shuffle_ps_2103 (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_shuffle_ps_1010 (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_shuffle_ps_1001 (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_shuffle_ps_0101 (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_shuffle_ps_3210 (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_shuffle_ps_0011 (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_shuffle_ps_0022 (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_shuffle_ps_2200 (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_shuffle_ps_3202 (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_shuffle_ps_1133 (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_shuffle_ps_2010 (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_shuffle_ps_2001 (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_shuffle_ps_2032 (__m128 a, __m128 b)
 
FORCE_INLINE void _sse2neon_kadd_f32 (float *sum, float *c, float y)
 
static uint64x2_t _sse2neon_vmull_p64 (uint64x1_t _a, uint64x1_t _b)
 
FORCE_INLINE __m128i _mm_shuffle_epi_1032 (__m128i a)
 
FORCE_INLINE __m128i _mm_shuffle_epi_2301 (__m128i a)
 
FORCE_INLINE __m128i _mm_shuffle_epi_0321 (__m128i a)
 
FORCE_INLINE __m128i _mm_shuffle_epi_2103 (__m128i a)
 
FORCE_INLINE __m128i _mm_shuffle_epi_1010 (__m128i a)
 
FORCE_INLINE __m128i _mm_shuffle_epi_1001 (__m128i a)
 
FORCE_INLINE __m128i _mm_shuffle_epi_0101 (__m128i a)
 
FORCE_INLINE __m128i _mm_shuffle_epi_2211 (__m128i a)
 
FORCE_INLINE __m128i _mm_shuffle_epi_0122 (__m128i a)
 
FORCE_INLINE __m128i _mm_shuffle_epi_3332 (__m128i a)
 
FORCE_INLINE void _mm_empty (void)
 
FORCE_INLINE __m128 _mm_add_ps (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_add_ss (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_and_ps (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_andnot_ps (__m128 a, __m128 b)
 
FORCE_INLINE __m64 _mm_avg_pu16 (__m64 a, __m64 b)
 
FORCE_INLINE __m64 _mm_avg_pu8 (__m64 a, __m64 b)
 
FORCE_INLINE __m128 _mm_cmpeq_ps (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_cmpeq_ss (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_cmpge_ps (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_cmpge_ss (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_cmpgt_ps (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_cmpgt_ss (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_cmple_ps (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_cmple_ss (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_cmplt_ps (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_cmplt_ss (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_cmpneq_ps (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_cmpneq_ss (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_cmpnge_ps (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_cmpnge_ss (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_cmpngt_ps (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_cmpngt_ss (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_cmpnle_ps (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_cmpnle_ss (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_cmpnlt_ps (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_cmpnlt_ss (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_cmpord_ps (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_cmpord_ss (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_cmpunord_ps (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_cmpunord_ss (__m128 a, __m128 b)
 
FORCE_INLINE int _mm_comieq_ss (__m128 a, __m128 b)
 
FORCE_INLINE int _mm_comige_ss (__m128 a, __m128 b)
 
FORCE_INLINE int _mm_comigt_ss (__m128 a, __m128 b)
 
FORCE_INLINE int _mm_comile_ss (__m128 a, __m128 b)
 
FORCE_INLINE int _mm_comilt_ss (__m128 a, __m128 b)
 
FORCE_INLINE int _mm_comineq_ss (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_cvt_pi2ps (__m128 a, __m64 b)
 
FORCE_INLINE __m64 _mm_cvt_ps2pi (__m128 a)
 
FORCE_INLINE __m128 _mm_cvt_si2ss (__m128 a, int b)
 
FORCE_INLINE int _mm_cvt_ss2si (__m128 a)
 
FORCE_INLINE __m128 _mm_cvtpi16_ps (__m64 a)
 
FORCE_INLINE __m128 _mm_cvtpi32_ps (__m128 a, __m64 b)
 
FORCE_INLINE __m128 _mm_cvtpi32x2_ps (__m64 a, __m64 b)
 
FORCE_INLINE __m128 _mm_cvtpi8_ps (__m64 a)
 
FORCE_INLINE __m64 _mm_cvtps_pi16 (__m128 a)
 
FORCE_INLINE __m64 _mm_cvtps_pi8 (__m128 a)
 
FORCE_INLINE __m128 _mm_cvtpu16_ps (__m64 a)
 
FORCE_INLINE __m128 _mm_cvtpu8_ps (__m64 a)
 
FORCE_INLINE __m128 _mm_cvtsi64_ss (__m128 a, int64_t b)
 
FORCE_INLINE float _mm_cvtss_f32 (__m128 a)
 
FORCE_INLINE int64_t _mm_cvtss_si64 (__m128 a)
 
FORCE_INLINE __m64 _mm_cvtt_ps2pi (__m128 a)
 
FORCE_INLINE int _mm_cvtt_ss2si (__m128 a)
 
FORCE_INLINE int64_t _mm_cvttss_si64 (__m128 a)
 
FORCE_INLINE __m128 _mm_div_ps (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_div_ss (__m128 a, __m128 b)
 
FORCE_INLINE void _mm_free (void *addr)
 
FORCE_INLINE unsigned int _sse2neon_mm_get_flush_zero_mode ()
 
FORCE_INLINE __m128 _mm_load_ps (const float *p)
 
FORCE_INLINE __m128 _mm_load_ss (const float *p)
 
FORCE_INLINE __m128 _mm_load1_ps (const float *p)
 
FORCE_INLINE __m128 _mm_loadh_pi (__m128 a, __m64 const *p)
 
FORCE_INLINE __m128 _mm_loadl_pi (__m128 a, __m64 const *p)
 
FORCE_INLINE __m128 _mm_loadr_ps (const float *p)
 
FORCE_INLINE __m128 _mm_loadu_ps (const float *p)
 
FORCE_INLINE __m128i _mm_loadu_si16 (const void *p)
 
FORCE_INLINE __m128i _mm_loadu_si64 (const void *p)
 
FORCE_INLINE void_mm_malloc (size_t size, size_t align)
 
FORCE_INLINE void _mm_maskmove_si64 (__m64 a, __m64 mask, char *mem_addr)
 
FORCE_INLINE __m64 _mm_max_pi16 (__m64 a, __m64 b)
 
FORCE_INLINE __m128 _mm_max_ps (__m128 a, __m128 b)
 
FORCE_INLINE __m64 _mm_max_pu8 (__m64 a, __m64 b)
 
FORCE_INLINE __m128 _mm_max_ss (__m128 a, __m128 b)
 
FORCE_INLINE __m64 _mm_min_pi16 (__m64 a, __m64 b)
 
FORCE_INLINE __m128 _mm_min_ps (__m128 a, __m128 b)
 
FORCE_INLINE __m64 _mm_min_pu8 (__m64 a, __m64 b)
 
FORCE_INLINE __m128 _mm_min_ss (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_movehl_ps (__m128 __A, __m128 __B)
 
FORCE_INLINE __m128 _mm_movelh_ps (__m128 __A, __m128 __B)
 
FORCE_INLINE int _mm_movemask_pi8 (__m64 a)
 
FORCE_INLINE int _mm_movemask_ps (__m128 a)
 
FORCE_INLINE __m128 _mm_mul_ps (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_mul_ss (__m128 a, __m128 b)
 
FORCE_INLINE __m64 _mm_mulhi_pu16 (__m64 a, __m64 b)
 
FORCE_INLINE void _mm_prefetch (const void *p, int i)
 
FORCE_INLINE __m128 _mm_rcp_ps (__m128 in)
 
FORCE_INLINE __m128 _mm_rcp_ss (__m128 a)
 
FORCE_INLINE __m128 _mm_rsqrt_ps (__m128 in)
 
FORCE_INLINE __m128 _mm_rsqrt_ss (__m128 in)
 
FORCE_INLINE __m64 _mm_sad_pu8 (__m64 a, __m64 b)
 
FORCE_INLINE void _sse2neon_mm_set_flush_zero_mode (unsigned int flag)
 
FORCE_INLINE __m128 _mm_set_ps (float w, float z, float y, float x)
 
FORCE_INLINE void _MM_SET_ROUNDING_MODE (int rounding)
 
FORCE_INLINE __m128 _mm_set_ss (float a)
 
FORCE_INLINE __m128 _mm_set1_ps (float _w)
 
FORCE_INLINE void _mm_setcsr (unsigned int a)
 
FORCE_INLINE unsigned int _mm_getcsr ()
 
FORCE_INLINE __m128 _mm_setr_ps (float w, float z, float y, float x)
 
FORCE_INLINE void _mm_sfence (void)
 
FORCE_INLINE __m128 _mm_sqrt_ps (__m128 in)
 
FORCE_INLINE __m128 _mm_sqrt_ss (__m128 in)
 
FORCE_INLINE void _mm_store_ps (float *p, __m128 a)
 
FORCE_INLINE void _mm_store_ps1 (float *p, __m128 a)
 
FORCE_INLINE void _mm_store_ss (float *p, __m128 a)
 
FORCE_INLINE void _mm_storeh_pi (__m64 *p, __m128 a)
 
FORCE_INLINE void _mm_storel_pi (__m64 *p, __m128 a)
 
FORCE_INLINE void _mm_storer_ps (float *p, __m128 a)
 
FORCE_INLINE void _mm_storeu_ps (float *p, __m128 a)
 
FORCE_INLINE void _mm_storeu_si16 (void *p, __m128i a)
 
FORCE_INLINE void _mm_storeu_si64 (void *p, __m128i a)
 
FORCE_INLINE void _mm_stream_pi (__m64 *p, __m64 a)
 
FORCE_INLINE void _mm_stream_ps (float *p, __m128 a)
 
FORCE_INLINE __m128 _mm_sub_ps (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_sub_ss (__m128 a, __m128 b)
 
FORCE_INLINE __m128i _mm_undefined_si128 (void)
 
FORCE_INLINE __m128 _mm_undefined_ps (void)
 
FORCE_INLINE __m128 _mm_unpackhi_ps (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_unpacklo_ps (__m128 a, __m128 b)
 
FORCE_INLINE __m128 _mm_xor_ps (__m128 a, __m128 b)
 
FORCE_INLINE __m128i _mm_add_epi16 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_add_epi32 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_add_epi64 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_add_epi8 (__m128i a, __m128i b)
 
FORCE_INLINE __m128d _mm_add_pd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_add_sd (__m128d a, __m128d b)
 
FORCE_INLINE __m64 _mm_add_si64 (__m64 a, __m64 b)
 
FORCE_INLINE __m128i _mm_adds_epi16 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_adds_epi8 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_adds_epu16 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_adds_epu8 (__m128i a, __m128i b)
 
FORCE_INLINE __m128d _mm_and_pd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_andnot_pd (__m128d a, __m128d b)
 
FORCE_INLINE __m128i _mm_andnot_si128 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_avg_epu16 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_avg_epu8 (__m128i a, __m128i b)
 
FORCE_INLINE __m128 _mm_castpd_ps (__m128d a)
 
FORCE_INLINE __m128i _mm_castpd_si128 (__m128d a)
 
FORCE_INLINE __m128d _mm_castps_pd (__m128 a)
 
FORCE_INLINE __m128d _mm_castsi128_pd (__m128i a)
 
FORCE_INLINE __m128 _mm_castsi128_ps (__m128i a)
 
FORCE_INLINE void _mm_clflush (void const *p)
 
FORCE_INLINE __m128i _mm_cmpeq_epi16 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_cmpeq_epi8 (__m128i a, __m128i b)
 
FORCE_INLINE __m128d _mm_cmpeq_pd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_cmpeq_sd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_cmpge_pd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_cmpge_sd (__m128d a, __m128d b)
 
FORCE_INLINE __m128i _mm_cmpgt_epi16 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_cmpgt_epi32 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_cmpgt_epi8 (__m128i a, __m128i b)
 
FORCE_INLINE __m128d _mm_cmpgt_pd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_cmpgt_sd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_cmple_pd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_cmple_sd (__m128d a, __m128d b)
 
FORCE_INLINE __m128i _mm_cmplt_epi16 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_cmplt_epi32 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_cmplt_epi8 (__m128i a, __m128i b)
 
FORCE_INLINE __m128d _mm_cmplt_pd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_cmplt_sd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_cmpneq_pd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_cmpneq_sd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_cmpnge_pd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_cmpnge_sd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_cmpngt_pd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_cmpngt_sd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_cmpnle_pd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_cmpnle_sd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_cmpnlt_pd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_cmpnlt_sd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_cmpord_pd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_cmpord_sd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_cmpunord_pd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_cmpunord_sd (__m128d a, __m128d b)
 
FORCE_INLINE int _mm_comige_sd (__m128d a, __m128d b)
 
FORCE_INLINE int _mm_comigt_sd (__m128d a, __m128d b)
 
FORCE_INLINE int _mm_comile_sd (__m128d a, __m128d b)
 
FORCE_INLINE int _mm_comilt_sd (__m128d a, __m128d b)
 
FORCE_INLINE int _mm_comieq_sd (__m128d a, __m128d b)
 
FORCE_INLINE int _mm_comineq_sd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_cvtepi32_pd (__m128i a)
 
FORCE_INLINE __m128 _mm_cvtepi32_ps (__m128i a)
 
FORCE_INLINE __m128i _mm_cvtpd_epi32 (__m128d a)
 
FORCE_INLINE __m64 _mm_cvtpd_pi32 (__m128d a)
 
FORCE_INLINE __m128 _mm_cvtpd_ps (__m128d a)
 
FORCE_INLINE __m128d _mm_cvtpi32_pd (__m64 a)
 
FORCE_INLINE __m128d _mm_cvtps_pd (__m128 a)
 
FORCE_INLINE double _mm_cvtsd_f64 (__m128d a)
 
FORCE_INLINE int32_t _mm_cvtsd_si32 (__m128d a)
 
FORCE_INLINE int64_t _mm_cvtsd_si64 (__m128d a)
 
FORCE_INLINE __m128 _mm_cvtsd_ss (__m128 a, __m128d b)
 
FORCE_INLINE int _mm_cvtsi128_si32 (__m128i a)
 
FORCE_INLINE int64_t _mm_cvtsi128_si64 (__m128i a)
 
FORCE_INLINE __m128d _mm_cvtsi32_sd (__m128d a, int32_t b)
 
FORCE_INLINE __m128i _mm_cvtsi32_si128 (int a)
 
FORCE_INLINE __m128d _mm_cvtsi64_sd (__m128d a, int64_t b)
 
FORCE_INLINE __m128i _mm_cvtsi64_si128 (int64_t a)
 
FORCE_INLINE __m128d _mm_cvtss_sd (__m128d a, __m128 b)
 
FORCE_INLINE __m128i _mm_cvttpd_epi32 (__m128d a)
 
FORCE_INLINE __m64 _mm_cvttpd_pi32 (__m128d a)
 
FORCE_INLINE __m128i _mm_cvttps_epi32 (__m128 a)
 
FORCE_INLINE int32_t _mm_cvttsd_si32 (__m128d a)
 
FORCE_INLINE int64_t _mm_cvttsd_si64 (__m128d a)
 
FORCE_INLINE __m128d _mm_div_pd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_div_sd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_load_pd (const double *p)
 
FORCE_INLINE __m128d _mm_load_sd (const double *p)
 
FORCE_INLINE __m128i _mm_load_si128 (const __m128i *p)
 
FORCE_INLINE __m128d _mm_load1_pd (const double *p)
 
FORCE_INLINE __m128d _mm_loadh_pd (__m128d a, const double *p)
 
FORCE_INLINE __m128i _mm_loadl_epi64 (__m128i const *p)
 
FORCE_INLINE __m128d _mm_loadl_pd (__m128d a, const double *p)
 
FORCE_INLINE __m128d _mm_loadr_pd (const double *p)
 
FORCE_INLINE __m128d _mm_loadu_pd (const double *p)
 
FORCE_INLINE __m128i _mm_loadu_si128 (const __m128i *p)
 
FORCE_INLINE __m128i _mm_loadu_si32 (const void *p)
 
FORCE_INLINE __m128i _mm_madd_epi16 (__m128i a, __m128i b)
 
FORCE_INLINE void _mm_maskmoveu_si128 (__m128i a, __m128i mask, char *mem_addr)
 
FORCE_INLINE __m128i _mm_max_epi16 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_max_epu8 (__m128i a, __m128i b)
 
FORCE_INLINE __m128d _mm_max_pd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_max_sd (__m128d a, __m128d b)
 
FORCE_INLINE __m128i _mm_min_epi16 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_min_epu8 (__m128i a, __m128i b)
 
FORCE_INLINE __m128d _mm_min_pd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_min_sd (__m128d a, __m128d b)
 
FORCE_INLINE __m128i _mm_move_epi64 (__m128i a)
 
FORCE_INLINE int _mm_movemask_epi8 (__m128i a)
 
FORCE_INLINE int _mm_movemask_pd (__m128d a)
 
FORCE_INLINE __m64 _mm_movepi64_pi64 (__m128i a)
 
FORCE_INLINE __m128i _mm_movpi64_epi64 (__m64 a)
 
FORCE_INLINE __m128i _mm_mul_epu32 (__m128i a, __m128i b)
 
FORCE_INLINE __m128d _mm_mul_pd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_mul_sd (__m128d a, __m128d b)
 
FORCE_INLINE __m64 _mm_mul_su32 (__m64 a, __m64 b)
 
FORCE_INLINE __m128i _mm_mulhi_epi16 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_mulhi_epu16 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_mullo_epi16 (__m128i a, __m128i b)
 
FORCE_INLINE __m128d _mm_or_pd (__m128d a, __m128d b)
 
FORCE_INLINE __m128i _mm_packs_epi16 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_packs_epi32 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_packus_epi16 (const __m128i a, const __m128i b)
 
FORCE_INLINE void _mm_pause ()
 
FORCE_INLINE __m128i _mm_sad_epu8 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_set_epi16 (short i7, short i6, short i5, short i4, short i3, short i2, short i1, short i0)
 
FORCE_INLINE __m128i _mm_set_epi64 (__m64 i1, __m64 i2)
 
FORCE_INLINE __m128i _mm_set_epi8 (signed char b15, signed char b14, signed char b13, signed char b12, signed char b11, signed char b10, signed char b9, signed char b8, signed char b7, signed char b6, signed char b5, signed char b4, signed char b3, signed char b2, signed char b1, signed char b0)
 
FORCE_INLINE __m128d _mm_set_sd (double a)
 
FORCE_INLINE __m128i _mm_set1_epi16 (short w)
 
FORCE_INLINE __m128i _mm_set1_epi64 (__m64 _i)
 
FORCE_INLINE __m128i _mm_set1_epi64x (int64_t _i)
 
FORCE_INLINE __m128i _mm_set1_epi8 (signed char w)
 
FORCE_INLINE __m128d _mm_set1_pd (double d)
 
FORCE_INLINE __m128i _mm_setr_epi16 (short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7)
 
FORCE_INLINE __m128i _mm_setr_epi32 (int i3, int i2, int i1, int i0)
 
FORCE_INLINE __m128i _mm_setr_epi64 (__m64 e1, __m64 e0)
 
FORCE_INLINE __m128i _mm_setr_epi8 (signed char b0, signed char b1, signed char b2, signed char b3, signed char b4, signed char b5, signed char b6, signed char b7, signed char b8, signed char b9, signed char b10, signed char b11, signed char b12, signed char b13, signed char b14, signed char b15)
 
FORCE_INLINE __m128d _mm_setr_pd (double e1, double e0)
 
FORCE_INLINE __m128d _mm_setzero_pd (void)
 
FORCE_INLINE __m128i _mm_sll_epi16 (__m128i a, __m128i count)
 
FORCE_INLINE __m128i _mm_sll_epi32 (__m128i a, __m128i count)
 
FORCE_INLINE __m128i _mm_sll_epi64 (__m128i a, __m128i count)
 
FORCE_INLINE __m128i _mm_slli_epi16 (__m128i a, int imm)
 
FORCE_INLINE __m128i _mm_slli_epi32 (__m128i a, int imm)
 
FORCE_INLINE __m128i _mm_slli_epi64 (__m128i a, int imm)
 
FORCE_INLINE __m128i _mm_slli_si128 (__m128i a, int imm)
 
FORCE_INLINE __m128d _mm_sqrt_pd (__m128d a)
 
FORCE_INLINE __m128d _mm_sqrt_sd (__m128d a, __m128d b)
 
FORCE_INLINE __m128i _mm_sra_epi16 (__m128i a, __m128i count)
 
FORCE_INLINE __m128i _mm_sra_epi32 (__m128i a, __m128i count)
 
FORCE_INLINE __m128i _mm_srai_epi16 (__m128i a, int imm)
 
FORCE_INLINE __m128i _mm_srl_epi16 (__m128i a, __m128i count)
 
FORCE_INLINE __m128i _mm_srl_epi32 (__m128i a, __m128i count)
 
FORCE_INLINE __m128i _mm_srl_epi64 (__m128i a, __m128i count)
 
FORCE_INLINE __m128i _mm_srli_si128 (__m128i a, int imm)
 
FORCE_INLINE void _mm_store_pd (double *mem_addr, __m128d a)
 
FORCE_INLINE void _mm_store_pd1 (double *mem_addr, __m128d a)
 
FORCE_INLINE void _mm_store_sd (double *mem_addr, __m128d a)
 
FORCE_INLINE void _mm_store_si128 (__m128i *p, __m128i a)
 
FORCE_INLINE void _mm_storeh_pd (double *mem_addr, __m128d a)
 
FORCE_INLINE void _mm_storel_epi64 (__m128i *a, __m128i b)
 
FORCE_INLINE void _mm_storel_pd (double *mem_addr, __m128d a)
 
FORCE_INLINE void _mm_storer_pd (double *mem_addr, __m128d a)
 
FORCE_INLINE void _mm_storeu_pd (double *mem_addr, __m128d a)
 
FORCE_INLINE void _mm_storeu_si128 (__m128i *p, __m128i a)
 
FORCE_INLINE void _mm_storeu_si32 (void *p, __m128i a)
 
FORCE_INLINE void _mm_stream_pd (double *p, __m128d a)
 
FORCE_INLINE void _mm_stream_si128 (__m128i *p, __m128i a)
 
FORCE_INLINE void _mm_stream_si32 (int *p, int a)
 
FORCE_INLINE void _mm_stream_si64 (__int64 *p, __int64 a)
 
FORCE_INLINE __m128i _mm_sub_epi16 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_sub_epi32 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_sub_epi64 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_sub_epi8 (__m128i a, __m128i b)
 
FORCE_INLINE __m128d _mm_sub_pd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_sub_sd (__m128d a, __m128d b)
 
FORCE_INLINE __m64 _mm_sub_si64 (__m64 a, __m64 b)
 
FORCE_INLINE __m128i _mm_subs_epi16 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_subs_epi8 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_subs_epu16 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_subs_epu8 (__m128i a, __m128i b)
 
FORCE_INLINE __m128d _mm_undefined_pd (void)
 
FORCE_INLINE __m128i _mm_unpackhi_epi16 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_unpackhi_epi32 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_unpackhi_epi64 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_unpackhi_epi8 (__m128i a, __m128i b)
 
FORCE_INLINE __m128d _mm_unpackhi_pd (__m128d a, __m128d b)
 
FORCE_INLINE __m128i _mm_unpacklo_epi16 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_unpacklo_epi32 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_unpacklo_epi64 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_unpacklo_epi8 (__m128i a, __m128i b)
 
FORCE_INLINE __m128d _mm_unpacklo_pd (__m128d a, __m128d b)
 
FORCE_INLINE __m128d _mm_xor_pd (__m128d a, __m128d b)
 
FORCE_INLINE __m128i _mm_xor_si128 (__m128i a, __m128i b)
 
FORCE_INLINE __m128d _mm_addsub_pd (__m128d a, __m128d b)
 
FORCE_INLINE __m128 _mm_addsub_ps (__m128 a, __m128 b)
 
FORCE_INLINE __m128d _mm_hadd_pd (__m128d a, __m128d b)
 
FORCE_INLINE __m128 _mm_hadd_ps (__m128 a, __m128 b)
 
FORCE_INLINE __m128d _mm_hsub_pd (__m128d _a, __m128d _b)
 
FORCE_INLINE __m128 _mm_hsub_ps (__m128 _a, __m128 _b)
 
FORCE_INLINE __m128d _mm_movedup_pd (__m128d a)
 
FORCE_INLINE __m128 _mm_movehdup_ps (__m128 a)
 
FORCE_INLINE __m128 _mm_moveldup_ps (__m128 a)
 
FORCE_INLINE __m128i _mm_abs_epi16 (__m128i a)
 
FORCE_INLINE __m128i _mm_abs_epi32 (__m128i a)
 
FORCE_INLINE __m128i _mm_abs_epi8 (__m128i a)
 
FORCE_INLINE __m64 _mm_abs_pi16 (__m64 a)
 
FORCE_INLINE __m64 _mm_abs_pi32 (__m64 a)
 
FORCE_INLINE __m64 _mm_abs_pi8 (__m64 a)
 
FORCE_INLINE __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int imm)
 
FORCE_INLINE __m128i _mm_hadd_epi16 (__m128i _a, __m128i _b)
 
FORCE_INLINE __m128i _mm_hadd_epi32 (__m128i _a, __m128i _b)
 
FORCE_INLINE __m64 _mm_hadd_pi16 (__m64 a, __m64 b)
 
FORCE_INLINE __m64 _mm_hadd_pi32 (__m64 a, __m64 b)
 
FORCE_INLINE __m128i _mm_hadds_epi16 (__m128i _a, __m128i _b)
 
FORCE_INLINE __m64 _mm_hadds_pi16 (__m64 _a, __m64 _b)
 
FORCE_INLINE __m128i _mm_hsub_epi16 (__m128i _a, __m128i _b)
 
FORCE_INLINE __m128i _mm_hsub_epi32 (__m128i _a, __m128i _b)
 
FORCE_INLINE __m64 _mm_hsub_pi16 (__m64 _a, __m64 _b)
 
FORCE_INLINE __m64 _mm_hsub_pi32 (__m64 _a, __m64 _b)
 
FORCE_INLINE __m128i _mm_hsubs_epi16 (__m128i _a, __m128i _b)
 
FORCE_INLINE __m64 _mm_hsubs_pi16 (__m64 _a, __m64 _b)
 
FORCE_INLINE __m128i _mm_maddubs_epi16 (__m128i _a, __m128i _b)
 
FORCE_INLINE __m64 _mm_maddubs_pi16 (__m64 _a, __m64 _b)
 
FORCE_INLINE __m128i _mm_mulhrs_epi16 (__m128i a, __m128i b)
 
FORCE_INLINE __m64 _mm_mulhrs_pi16 (__m64 a, __m64 b)
 
FORCE_INLINE __m128i _mm_shuffle_epi8 (__m128i a, __m128i b)
 
FORCE_INLINE __m64 _mm_shuffle_pi8 (__m64 a, __m64 b)
 
FORCE_INLINE __m128i _mm_sign_epi16 (__m128i _a, __m128i _b)
 
FORCE_INLINE __m128i _mm_sign_epi32 (__m128i _a, __m128i _b)
 
FORCE_INLINE __m128i _mm_sign_epi8 (__m128i _a, __m128i _b)
 
FORCE_INLINE __m64 _mm_sign_pi16 (__m64 _a, __m64 _b)
 
FORCE_INLINE __m64 _mm_sign_pi32 (__m64 _a, __m64 _b)
 
FORCE_INLINE __m64 _mm_sign_pi8 (__m64 _a, __m64 _b)
 
FORCE_INLINE __m128 _mm_blend_ps (__m128 _a, __m128 _b, const char imm8)
 
FORCE_INLINE __m128i _mm_blendv_epi8 (__m128i _a, __m128i _b, __m128i _mask)
 
FORCE_INLINE __m128d _mm_blendv_pd (__m128d _a, __m128d _b, __m128d _mask)
 
FORCE_INLINE __m128 _mm_blendv_ps (__m128 _a, __m128 _b, __m128 _mask)
 
FORCE_INLINE __m128d _mm_ceil_sd (__m128d a, __m128d b)
 
FORCE_INLINE __m128 _mm_ceil_ss (__m128 a, __m128 b)
 
FORCE_INLINE __m128i _mm_cmpeq_epi64 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_cvtepi16_epi32 (__m128i a)
 
FORCE_INLINE __m128i _mm_cvtepi16_epi64 (__m128i a)
 
FORCE_INLINE __m128i _mm_cvtepi32_epi64 (__m128i a)
 
FORCE_INLINE __m128i _mm_cvtepi8_epi16 (__m128i a)
 
FORCE_INLINE __m128i _mm_cvtepi8_epi32 (__m128i a)
 
FORCE_INLINE __m128i _mm_cvtepi8_epi64 (__m128i a)
 
FORCE_INLINE __m128i _mm_cvtepu16_epi32 (__m128i a)
 
FORCE_INLINE __m128i _mm_cvtepu16_epi64 (__m128i a)
 
FORCE_INLINE __m128i _mm_cvtepu32_epi64 (__m128i a)
 
FORCE_INLINE __m128i _mm_cvtepu8_epi16 (__m128i a)
 
FORCE_INLINE __m128i _mm_cvtepu8_epi32 (__m128i a)
 
FORCE_INLINE __m128i _mm_cvtepu8_epi64 (__m128i a)
 
FORCE_INLINE __m128d _mm_dp_pd (__m128d a, __m128d b, const int imm)
 
FORCE_INLINE __m128 _mm_dp_ps (__m128 a, __m128 b, const int imm)
 
FORCE_INLINE __m128d _mm_floor_sd (__m128d a, __m128d b)
 
FORCE_INLINE __m128 _mm_floor_ss (__m128 a, __m128 b)
 
FORCE_INLINE __m128i _mm_max_epi32 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_max_epi8 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_max_epu16 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_max_epu32 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_min_epi32 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_min_epi8 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_min_epu16 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_min_epu32 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_minpos_epu16 (__m128i a)
 
FORCE_INLINE __m128i _mm_mpsadbw_epu8 (__m128i a, __m128i b, const int imm)
 
FORCE_INLINE __m128i _mm_mul_epi32 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_mullo_epi32 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_packus_epi32 (__m128i a, __m128i b)
 
FORCE_INLINE __m128d _mm_round_sd (__m128d a, __m128d b, int rounding)
 
FORCE_INLINE __m128 _mm_round_ss (__m128 a, __m128 b, int rounding)
 
FORCE_INLINE __m128i _mm_stream_load_si128 (__m128i *p)
 
FORCE_INLINE int _mm_test_all_ones (__m128i a)
 
FORCE_INLINE int _mm_test_all_zeros (__m128i a, __m128i mask)
 
FORCE_INLINE int _mm_test_mix_ones_zeros (__m128i a, __m128i mask)
 
FORCE_INLINE int _mm_testc_si128 (__m128i a, __m128i b)
 
FORCE_INLINE int _mm_testz_si128 (__m128i a, __m128i b)
 
FORCE_INLINE __m128i _mm_cmpgt_epi64 (__m128i a, __m128i b)
 
FORCE_INLINE uint32_t _mm_crc32_u16 (uint32_t crc, uint16_t v)
 
FORCE_INLINE uint32_t _mm_crc32_u32 (uint32_t crc, uint32_t v)
 
FORCE_INLINE uint64_t _mm_crc32_u64 (uint64_t crc, uint64_t v)
 
FORCE_INLINE __m128i _mm_aesenc_si128 (__m128i EncBlock, __m128i RoundKey)
 
FORCE_INLINE __m128i _mm_aesenclast_si128 (__m128i a, __m128i RoundKey)
 
FORCE_INLINE __m128i _mm_aeskeygenassist_si128 (__m128i key, const int rcon)
 
FORCE_INLINE __m128i _mm_clmulepi64_si128 (__m128i _a, __m128i _b, const int imm)
 
FORCE_INLINE unsigned int _sse2neon_mm_get_denormals_zero_mode ()
 
FORCE_INLINE int _mm_popcnt_u32 (unsigned int a)
 
FORCE_INLINE int64_t _mm_popcnt_u64 (uint64_t a)
 
FORCE_INLINE void _sse2neon_mm_set_denormals_zero_mode (unsigned int flag)
 
FORCE_INLINE uint64_t _rdtsc (void)
 

Variables

 SIMDVec
 
static const uint8_t SSE2NEON_sbox [256] = SSE2NEON_AES_DATA(SSE2NEON_AES_H0)
 

Macro Definition Documentation

◆ __constrange

#define __constrange (   a,
 
)    const

◆ __has_builtin

#define __has_builtin (   x)    0

◆ __int64

#define __int64   int64_t

◆ _m_maskmovq

#define _m_maskmovq (   a,
  mask,
  mem_addr 
)    _mm_maskmove_si64(a, mask, mem_addr)

◆ _m_pavgb

#define _m_pavgb (   a,
 
)    _mm_avg_pu8(a, b)

◆ _m_pavgw

#define _m_pavgw (   a,
 
)    _mm_avg_pu16(a, b)

◆ _m_pextrw

#define _m_pextrw (   a,
  imm 
)    _mm_extract_pi16(a, imm)

◆ _m_pinsrw

#define _m_pinsrw (   a,
  i,
  imm 
)    _mm_insert_pi16(a, i, imm)

◆ _m_pmaxsw

#define _m_pmaxsw (   a,
 
)    _mm_max_pi16(a, b)

◆ _m_pmaxub

#define _m_pmaxub (   a,
 
)    _mm_max_pu8(a, b)

◆ _m_pminsw

#define _m_pminsw (   a,
 
)    _mm_min_pi16(a, b)

◆ _m_pminub

#define _m_pminub (   a,
 
)    _mm_min_pu8(a, b)

◆ _m_pmovmskb

#define _m_pmovmskb (   a)    _mm_movemask_pi8(a)

◆ _m_pmulhuw

#define _m_pmulhuw (   a,
 
)    _mm_mulhi_pu16(a, b)

◆ _m_psadbw

#define _m_psadbw (   a,
 
)    _mm_sad_pu8(a, b)

◆ _m_pshufw

#define _m_pshufw (   a,
  imm 
)    _mm_shuffle_pi16(a, imm)

◆ _mm_alignr_pi8

#define _mm_alignr_pi8 (   a,
  b,
  imm 
)
Value:
__extension__({ \
__m64 ret; \
if (_sse2neon_unlikely((imm) >= 16)) { \
ret = vreinterpret_m64_s8(vdup_n_s8(0)); \
} else { \
uint8x8_t tmp_low, tmp_high; \
if ((imm) >= 8) { \
const int idx = (imm) -8; \
tmp_low = vreinterpret_u8_m64(a); \
tmp_high = vdup_n_u8(0); \
ret = vreinterpret_m64_u8(vext_u8(tmp_low, tmp_high, idx)); \
} else { \
const int idx = (imm); \
tmp_low = vreinterpret_u8_m64(b); \
tmp_high = vreinterpret_u8_m64(a); \
ret = vreinterpret_m64_u8(vext_u8(tmp_low, tmp_high, idx)); \
} \
} \
ret; \
})
#define vreinterpret_m64_s8(x)
Definition: sse2neon.h:309
#define vreinterpret_m64_u8(x)
Definition: sse2neon.h:314
#define _sse2neon_unlikely(x)
Definition: sse2neon.h:106
#define vreinterpret_u8_m64(x)
Definition: sse2neon.h:323

◆ _mm_blend_epi16

#define _mm_blend_epi16 (   a,
  b,
  imm 
)
Value:
__extension__({ \
const uint16_t _mask[8] = {((imm) & (1 << 0)) ? (uint16_t) -1 : 0x0, \
((imm) & (1 << 1)) ? (uint16_t) -1 : 0x0, \
((imm) & (1 << 2)) ? (uint16_t) -1 : 0x0, \
((imm) & (1 << 3)) ? (uint16_t) -1 : 0x0, \
((imm) & (1 << 4)) ? (uint16_t) -1 : 0x0, \
((imm) & (1 << 5)) ? (uint16_t) -1 : 0x0, \
((imm) & (1 << 6)) ? (uint16_t) -1 : 0x0, \
((imm) & (1 << 7)) ? (uint16_t) -1 : 0x0}; \
uint16x8_t _mask_vec = vld1q_u16(_mask); \
uint16x8_t _a = vreinterpretq_u16_m128i(a); \
uint16x8_t _b = vreinterpretq_u16_m128i(b); \
vreinterpretq_m128i_u16(vbslq_u16(_mask_vec, _b, _a)); \
})
#define vreinterpretq_u16_m128i(x)
Definition: sse2neon.h:305

◆ _mm_blend_pd

#define _mm_blend_pd (   a,
  b,
  imm 
)
Value:
__extension__({ \
const uint64_t _mask[2] = { \
((imm) & (1 << 0)) ? ~UINT64_C(0) : UINT64_C(0), \
((imm) & (1 << 1)) ? ~UINT64_C(0) : UINT64_C(0)}; \
uint64x2_t _mask_vec = vld1q_u64(_mask); \
uint64x2_t _a = vreinterpretq_u64_m128d(a); \
uint64x2_t _b = vreinterpretq_u64_m128d(b); \
vreinterpretq_m128d_u64(vbslq_u64(_mask_vec, _b, _a)); \
})
#define vreinterpretq_u64_m128d(x)
Definition: sse2neon.h:363

◆ _mm_bslli_si128

#define _mm_bslli_si128 (   a,
  imm 
)    _mm_slli_si128(a, imm)

◆ _mm_bsrli_si128

#define _mm_bsrli_si128 (   a,
  imm 
)    _mm_srli_si128(a, imm)

◆ _mm_cvtps_pi32

#define _mm_cvtps_pi32 (   a)    _mm_cvt_ps2pi(a)

◆ _mm_cvtsd_si64x

#define _mm_cvtsd_si64x   _mm_cvtsd_si64

◆ _mm_cvtsi128_si64x [1/2]

#define _mm_cvtsi128_si64x (   a)    _mm_cvtsi128_si64(a)

◆ _mm_cvtsi128_si64x [2/2]

#define _mm_cvtsi128_si64x (   a)    _mm_cvtsi128_si64(a)

◆ _mm_cvtsi32_ss

#define _mm_cvtsi32_ss (   a,
 
)    _mm_cvt_si2ss(a, b)

◆ _mm_cvtsi64x_sd

#define _mm_cvtsi64x_sd (   a,
 
)    _mm_cvtsi64_sd(a, b)

◆ _mm_cvtsi64x_si128

#define _mm_cvtsi64x_si128 (   a)    _mm_cvtsi64_si128(a)

◆ _mm_cvtss_si32

#define _mm_cvtss_si32 (   a)    _mm_cvt_ss2si(a)

◆ _mm_cvttps_pi32

#define _mm_cvttps_pi32 (   a)    _mm_cvtt_ps2pi(a)

◆ _mm_cvttsd_si64x

#define _mm_cvttsd_si64x (   a)    _mm_cvttsd_si64(a)

◆ _mm_cvttss_si32

#define _mm_cvttss_si32 (   a)    _mm_cvtt_ss2si(a)

◆ _MM_DENORMALS_ZERO_MASK

#define _MM_DENORMALS_ZERO_MASK   0x0040

◆ _MM_DENORMALS_ZERO_OFF

#define _MM_DENORMALS_ZERO_OFF   0x0000

◆ _MM_DENORMALS_ZERO_ON

#define _MM_DENORMALS_ZERO_ON   0x0040

◆ _mm_extract_epi16

#define _mm_extract_epi16 (   a,
  imm 
)     vgetq_lane_u16(vreinterpretq_u16_m128i(a), (imm))

◆ _mm_extract_epi32

#define _mm_extract_epi32 (   a,
  imm 
)     vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm))

◆ _mm_extract_epi64

#define _mm_extract_epi64 (   a,
  imm 
)     vgetq_lane_s64(vreinterpretq_s64_m128i(a), (imm))

◆ _mm_extract_epi8

#define _mm_extract_epi8 (   a,
  imm 
)    vgetq_lane_u8(vreinterpretq_u8_m128i(a), (imm))

◆ _mm_extract_pi16

#define _mm_extract_pi16 (   a,
  imm 
)     (int32_t) vget_lane_u16(vreinterpret_u16_m64(a), (imm))

◆ _mm_extract_ps

#define _mm_extract_ps (   a,
  imm 
)    vgetq_lane_s32(vreinterpretq_s32_m128(a), (imm))

◆ _MM_FLUSH_ZERO_MASK

#define _MM_FLUSH_ZERO_MASK   0x8000

◆ _MM_FLUSH_ZERO_OFF

#define _MM_FLUSH_ZERO_OFF   0x0000

◆ _MM_FLUSH_ZERO_ON

#define _MM_FLUSH_ZERO_ON   0x8000

◆ _MM_FROUND_CEIL

#define _MM_FROUND_CEIL   (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC)

◆ _MM_FROUND_CUR_DIRECTION

#define _MM_FROUND_CUR_DIRECTION   0x04

◆ _MM_FROUND_FLOOR

#define _MM_FROUND_FLOOR   (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC)

◆ _MM_FROUND_NEARBYINT

#define _MM_FROUND_NEARBYINT   (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)

◆ _MM_FROUND_NINT

#define _MM_FROUND_NINT   (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC)

◆ _MM_FROUND_NO_EXC

#define _MM_FROUND_NO_EXC   0x08

◆ _MM_FROUND_RAISE_EXC

#define _MM_FROUND_RAISE_EXC   0x00

◆ _MM_FROUND_RINT

#define _MM_FROUND_RINT   (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)

◆ _MM_FROUND_TO_NEAREST_INT

#define _MM_FROUND_TO_NEAREST_INT   0x00

◆ _MM_FROUND_TO_NEG_INF

#define _MM_FROUND_TO_NEG_INF   0x01

◆ _MM_FROUND_TO_POS_INF

#define _MM_FROUND_TO_POS_INF   0x02

◆ _MM_FROUND_TO_ZERO

#define _MM_FROUND_TO_ZERO   0x03

◆ _MM_FROUND_TRUNC

#define _MM_FROUND_TRUNC   (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC)

◆ _MM_GET_DENORMALS_ZERO_MODE

#define _MM_GET_DENORMALS_ZERO_MODE   _sse2neon_mm_get_denormals_zero_mode

◆ _MM_GET_FLUSH_ZERO_MODE

#define _MM_GET_FLUSH_ZERO_MODE   _sse2neon_mm_get_flush_zero_mode

◆ _mm_insert_epi16

#define _mm_insert_epi16 (   a,
  b,
  imm 
)
Value:
__extension__({ \
vreinterpretq_m128i_s16( \
vsetq_lane_s16((b), vreinterpretq_s16_m128i(a), (imm))); \
})
#define vreinterpretq_s16_m128i(x)
Definition: sse2neon.h:300

◆ _mm_insert_epi32

#define _mm_insert_epi32 (   a,
  b,
  imm 
)
Value:
__extension__({ \
vreinterpretq_m128i_s32( \
vsetq_lane_s32((b), vreinterpretq_s32_m128i(a), (imm))); \
})
#define vreinterpretq_s32_m128i(x)
Definition: sse2neon.h:301

◆ _mm_insert_epi64

#define _mm_insert_epi64 (   a,
  b,
  imm 
)
Value:
__extension__({ \
vreinterpretq_m128i_s64( \
vsetq_lane_s64((b), vreinterpretq_s64_m128i(a), (imm))); \
})
#define vreinterpretq_s64_m128i(x)
Definition: sse2neon.h:302

◆ _mm_insert_epi8

#define _mm_insert_epi8 (   a,
  b,
  imm 
)
Value:
__extension__({ \
vreinterpretq_m128i_s8( \
vsetq_lane_s8((b), vreinterpretq_s8_m128i(a), (imm))); \
})
#define vreinterpretq_s8_m128i(x)
Definition: sse2neon.h:299

◆ _mm_insert_pi16

#define _mm_insert_pi16 (   a,
  b,
  imm 
)
Value:
__extension__({ \
vreinterpret_m64_s16( \
vset_lane_s16((b), vreinterpret_s16_m64(a), (imm))); \
})
#define vreinterpret_s16_m64(x)
Definition: sse2neon.h:329

◆ _mm_insert_ps

#define _mm_insert_ps (   a,
  b,
  imm8 
)
Value:
__extension__({ \
float32x4_t tmp1 = \
vsetq_lane_f32(vgetq_lane_f32(b, (imm8 >> 6) & 0x3), \
float32x4_t tmp2 = \
vsetq_lane_f32(vgetq_lane_f32(tmp1, 0), vreinterpretq_f32_m128(a), \
((imm8 >> 4) & 0x3)); \
const uint32_t data[4] = {((imm8) & (1 << 0)) ? UINT32_MAX : 0, \
((imm8) & (1 << 1)) ? UINT32_MAX : 0, \
((imm8) & (1 << 2)) ? UINT32_MAX : 0, \
((imm8) & (1 << 3)) ? UINT32_MAX : 0}; \
uint32x4_t mask = vld1q_u32(data); \
float32x4_t all_zeros = vdupq_n_f32(0); \
vbslq_f32(mask, all_zeros, vreinterpretq_f32_m128(tmp2))); \
})
data
Definition: plot_best_vs_generic.py:23
#define vreinterpretq_m128_f32(x)
Definition: sse2neon.h:259
#define vreinterpretq_f32_m128(x)
Definition: sse2neon.h:273

◆ _mm_lddqu_si128

#define _mm_lddqu_si128   _mm_loadu_si128

◆ _mm_load_pd1

#define _mm_load_pd1   _mm_load1_pd

◆ _mm_load_ps1

#define _mm_load_ps1   _mm_load1_ps

◆ _mm_loaddup_pd

#define _mm_loaddup_pd   _mm_load1_pd

◆ _MM_ROUND_DOWN

#define _MM_ROUND_DOWN   0x2000

◆ _MM_ROUND_NEAREST

#define _MM_ROUND_NEAREST   0x0000

◆ _MM_ROUND_TOWARD_ZERO

#define _MM_ROUND_TOWARD_ZERO   0x6000

◆ _MM_ROUND_UP

#define _MM_ROUND_UP   0x4000

◆ _MM_SET_DENORMALS_ZERO_MODE

#define _MM_SET_DENORMALS_ZERO_MODE   _sse2neon_mm_set_denormals_zero_mode

◆ _MM_SET_FLUSH_ZERO_MODE

#define _MM_SET_FLUSH_ZERO_MODE   _sse2neon_mm_set_flush_zero_mode

◆ _mm_set_pd1

#define _mm_set_pd1   _mm_set1_pd

◆ _MM_SHUFFLE

#define _MM_SHUFFLE (   fp3,
  fp2,
  fp1,
  fp0 
)     (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0)))

MACRO for shuffle parameter for _mm_shuffle_ps(). Argument fp3 is a digit[0123] that represents the fp from argument "b" of mm_shuffle_ps that will be placed in fp3 of result. fp2 is the same for fp2 in result. fp1 is a digit[0123] that represents the fp from argument "a" of mm_shuffle_ps that will be places in fp1 of result. fp0 is the same for fp0 of result.

◆ _mm_shuffle_epi32

#define _mm_shuffle_epi32 (   a,
  imm 
)

◆ _mm_shuffle_epi32_default

#define _mm_shuffle_epi32_default (   a,
  imm 
)
Value:
__extension__({ \
int32x4_t ret; \
ret = vmovq_n_s32( \
vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm) & (0x3))); \
ret = vsetq_lane_s32( \
vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 2) & 0x3), \
ret, 1); \
ret = vsetq_lane_s32( \
vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 4) & 0x3), \
ret, 2); \
ret = vsetq_lane_s32( \
vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 6) & 0x3), \
ret, 3); \
vreinterpretq_m128i_s32(ret); \
})

◆ _mm_shuffle_epi32_splat

#define _mm_shuffle_epi32_splat (   a,
  imm 
)
Value:
__extension__({ \
vreinterpretq_m128i_s32( \
vdupq_n_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm)))); \
})

◆ _mm_shuffle_pd

#define _mm_shuffle_pd (   a,
  b,
  imm8 
)
Value:
vgetq_lane_s64(vreinterpretq_s64_m128d(b), (imm8 & 0x2) >> 1), \
vgetq_lane_s64(vreinterpretq_s64_m128d(a), imm8 & 0x1)))
#define vreinterpretq_s64_m128d(x)
Definition: sse2neon.h:360
FORCE_INLINE __m128d _mm_castsi128_pd(__m128i a)
Definition: sse2neon.h:3238
FORCE_INLINE __m128i _mm_set_epi64x(int64_t, int64_t)
Definition: sse2neon.h:5132

◆ _mm_shuffle_pi16

#define _mm_shuffle_pi16 (   a,
  imm 
)
Value:
__extension__({ \
int16x4_t ret; \
ret = \
vmov_n_s16(vget_lane_s16(vreinterpret_s16_m64(a), (imm) & (0x3))); \
ret = vset_lane_s16( \
vget_lane_s16(vreinterpret_s16_m64(a), ((imm) >> 2) & 0x3), ret, \
1); \
ret = vset_lane_s16( \
vget_lane_s16(vreinterpret_s16_m64(a), ((imm) >> 4) & 0x3), ret, \
2); \
ret = vset_lane_s16( \
vget_lane_s16(vreinterpret_s16_m64(a), ((imm) >> 6) & 0x3), ret, \
3); \
vreinterpret_m64_s16(ret); \
})

◆ _mm_shuffle_ps

#define _mm_shuffle_ps (   a,
  b,
  imm 
)

◆ _mm_shuffle_ps_default

#define _mm_shuffle_ps_default (   a,
  b,
  imm 
)
Value:
__extension__({ \
float32x4_t ret; \
ret = vmovq_n_f32( \
vgetq_lane_f32(vreinterpretq_f32_m128(a), (imm) & (0x3))); \
ret = vsetq_lane_f32( \
vgetq_lane_f32(vreinterpretq_f32_m128(a), ((imm) >> 2) & 0x3), \
ret, 1); \
ret = vsetq_lane_f32( \
vgetq_lane_f32(vreinterpretq_f32_m128(b), ((imm) >> 4) & 0x3), \
ret, 2); \
ret = vsetq_lane_f32( \
vgetq_lane_f32(vreinterpretq_f32_m128(b), ((imm) >> 6) & 0x3), \
ret, 3); \
vreinterpretq_m128_f32(ret); \
})

◆ _mm_shufflehi_epi16

#define _mm_shufflehi_epi16 (   a,
  imm 
)    _mm_shufflehi_epi16_function((a), (imm))

◆ _mm_shufflehi_epi16_function

#define _mm_shufflehi_epi16_function (   a,
  imm 
)
Value:
__extension__({ \
int16x8_t ret = vreinterpretq_s16_m128i(a); \
int16x4_t highBits = vget_high_s16(ret); \
ret = vsetq_lane_s16(vget_lane_s16(highBits, (imm) & (0x3)), ret, 4); \
ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 2) & 0x3), ret, \
5); \
ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 4) & 0x3), ret, \
6); \
ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 6) & 0x3), ret, \
7); \
vreinterpretq_m128i_s16(ret); \
})

◆ _mm_shufflelo_epi16

#define _mm_shufflelo_epi16 (   a,
  imm 
)    _mm_shufflelo_epi16_function((a), (imm))

◆ _mm_shufflelo_epi16_function

#define _mm_shufflelo_epi16_function (   a,
  imm 
)
Value:
__extension__({ \
int16x8_t ret = vreinterpretq_s16_m128i(a); \
int16x4_t lowBits = vget_low_s16(ret); \
ret = vsetq_lane_s16(vget_lane_s16(lowBits, (imm) & (0x3)), ret, 0); \
ret = vsetq_lane_s16(vget_lane_s16(lowBits, ((imm) >> 2) & 0x3), ret, \
1); \
ret = vsetq_lane_s16(vget_lane_s16(lowBits, ((imm) >> 4) & 0x3), ret, \
2); \
ret = vsetq_lane_s16(vget_lane_s16(lowBits, ((imm) >> 6) & 0x3), ret, \
3); \
vreinterpretq_m128i_s16(ret); \
})

◆ _mm_srai_epi32

#define _mm_srai_epi32 (   a,
  imm 
)
Value:
__extension__({ \
__m128i ret; \
if (_sse2neon_unlikely((imm) == 0)) { \
ret = a; \
} else if (_sse2neon_likely(0 < (imm) && (imm) < 32)) { \
vshlq_s32(vreinterpretq_s32_m128i(a), vdupq_n_s32(-(imm)))); \
} else { \
vshrq_n_s32(vreinterpretq_s32_m128i(a), 31)); \
} \
ret; \
})
#define _sse2neon_likely(x)
Definition: sse2neon.h:105
#define vreinterpretq_m128i_s32(x)
Definition: sse2neon.h:288

◆ _mm_srli_epi16

#define _mm_srli_epi16 (   a,
  imm 
)
Value:
__extension__({ \
__m128i ret; \
if (_sse2neon_unlikely((imm) & ~15)) { \
ret = _mm_setzero_si128(); \
} else { \
vshlq_u16(vreinterpretq_u16_m128i(a), vdupq_n_s16(-(imm)))); \
} \
ret; \
})
FORCE_INLINE __m128i _mm_setzero_si128()
Definition: sse2neon.h:5339
#define vreinterpretq_m128i_u16(x)
Definition: sse2neon.h:292

◆ _mm_srli_epi32

#define _mm_srli_epi32 (   a,
  imm 
)
Value:
__extension__({ \
__m128i ret; \
if (_sse2neon_unlikely((imm) & ~31)) { \
ret = _mm_setzero_si128(); \
} else { \
vshlq_u32(vreinterpretq_u32_m128i(a), vdupq_n_s32(-(imm)))); \
} \
ret; \
})
#define vreinterpretq_u32_m128i(x)
Definition: sse2neon.h:306
#define vreinterpretq_m128i_u32(x)
Definition: sse2neon.h:293

◆ _mm_srli_epi64

#define _mm_srli_epi64 (   a,
  imm 
)
Value:
__extension__({ \
__m128i ret; \
if (_sse2neon_unlikely((imm) & ~63)) { \
ret = _mm_setzero_si128(); \
} else { \
vshlq_u64(vreinterpretq_u64_m128i(a), vdupq_n_s64(-(imm)))); \
} \
ret; \
})
#define vreinterpretq_m128i_u64(x)
Definition: sse2neon.h:294
#define vreinterpretq_u64_m128i(x)
Definition: sse2neon.h:307

◆ _mm_store1_pd

#define _mm_store1_pd   _mm_store_pd1

◆ _mm_store1_ps

#define _mm_store1_ps   _mm_store_ps1

◆ _mm_testnzc_si128

#define _mm_testnzc_si128 (   a,
 
)    _mm_test_mix_ones_zeros(a, b)

◆ _MM_TRANSPOSE4_PS

#define _MM_TRANSPOSE4_PS (   row0,
  row1,
  row2,
  row3 
)
Value:
do { \
float32x4x2_t ROW01 = vtrnq_f32(row0, row1); \
float32x4x2_t ROW23 = vtrnq_f32(row2, row3); \
row0 = vcombine_f32(vget_low_f32(ROW01.val[0]), \
vget_low_f32(ROW23.val[0])); \
row1 = vcombine_f32(vget_low_f32(ROW01.val[1]), \
vget_low_f32(ROW23.val[1])); \
row2 = vcombine_f32(vget_high_f32(ROW01.val[0]), \
vget_high_f32(ROW23.val[0])); \
row3 = vcombine_f32(vget_high_f32(ROW01.val[1]), \
vget_high_f32(ROW23.val[1])); \
} while (0)

◆ _mm_ucomieq_sd

#define _mm_ucomieq_sd   _mm_comieq_sd

◆ _mm_ucomieq_ss

#define _mm_ucomieq_ss   _mm_comieq_ss

◆ _mm_ucomige_sd

#define _mm_ucomige_sd   _mm_comige_sd

◆ _mm_ucomige_ss

#define _mm_ucomige_ss   _mm_comige_ss

◆ _mm_ucomigt_sd

#define _mm_ucomigt_sd   _mm_comigt_sd

◆ _mm_ucomigt_ss

#define _mm_ucomigt_ss   _mm_comigt_ss

◆ _mm_ucomile_sd

#define _mm_ucomile_sd   _mm_comile_sd

◆ _mm_ucomile_ss

#define _mm_ucomile_ss   _mm_comile_ss

◆ _mm_ucomilt_sd

#define _mm_ucomilt_sd   _mm_comilt_sd

◆ _mm_ucomilt_ss

#define _mm_ucomilt_ss   _mm_comilt_ss

◆ _mm_ucomineq_sd

#define _mm_ucomineq_sd   _mm_comineq_sd

◆ _mm_ucomineq_ss

#define _mm_ucomineq_ss   _mm_comineq_ss

◆ _sse2neon_const

#define _sse2neon_const   const

◆ _sse2neon_likely

#define _sse2neon_likely (   x)    (x)

◆ _sse2neon_unlikely

#define _sse2neon_unlikely (   x)    (x)

◆ ALIGN_STRUCT

#define ALIGN_STRUCT (   x)    __declspec(align(x))

◆ FORCE_INLINE

#define FORCE_INLINE   static inline

◆ SSE2NEON_AES_B2W

#define SSE2NEON_AES_B2W (   b0,
  b1,
  b2,
  b3 
)
Value:
(((uint32_t) (b3) << 24) | ((uint32_t) (b2) << 16) | \
((uint32_t) (b1) << 8) | (uint32_t) (b0))

◆ SSE2NEON_AES_DATA

#define SSE2NEON_AES_DATA (   w)

◆ SSE2NEON_AES_F2

#define SSE2NEON_AES_F2 (   x)    ((x << 1) ^ (((x >> 7) & 1) * 0x011b /* WPOLY */))

◆ SSE2NEON_AES_F3

#define SSE2NEON_AES_F3 (   x)    (SSE2NEON_AES_F2(x) ^ x)

◆ SSE2NEON_AES_H0

#define SSE2NEON_AES_H0 (   x)    (x)

◆ SSE2NEON_AES_U0

#define SSE2NEON_AES_U0 (   p)     SSE2NEON_AES_B2W(SSE2NEON_AES_F2(p), p, p, SSE2NEON_AES_F3(p))

◆ SSE2NEON_AES_U1

#define SSE2NEON_AES_U1 (   p)     SSE2NEON_AES_B2W(SSE2NEON_AES_F3(p), SSE2NEON_AES_F2(p), p, p)

◆ SSE2NEON_AES_U2

#define SSE2NEON_AES_U2 (   p)     SSE2NEON_AES_B2W(p, SSE2NEON_AES_F3(p), SSE2NEON_AES_F2(p), p)

◆ SSE2NEON_AES_U3

#define SSE2NEON_AES_U3 (   p)     SSE2NEON_AES_B2W(p, p, SSE2NEON_AES_F3(p), SSE2NEON_AES_F2(p))

◆ SSE2NEON_PRECISE_DIV

#define SSE2NEON_PRECISE_DIV   (0)

◆ SSE2NEON_PRECISE_DP

#define SSE2NEON_PRECISE_DP   (0)

◆ SSE2NEON_PRECISE_MINMAX

#define SSE2NEON_PRECISE_MINMAX   (0)

◆ SSE2NEON_PRECISE_SQRT

#define SSE2NEON_PRECISE_SQRT   (0)

◆ vreinterpret_f32_m64

#define vreinterpret_f32_m64 (   x)    vreinterpret_f32_s64(x)

◆ vreinterpret_m64_f16

#define vreinterpret_m64_f16 (   x)    vreinterpret_s64_f16(x)

◆ vreinterpret_m64_f32

#define vreinterpret_m64_f32 (   x)    vreinterpret_s64_f32(x)

◆ vreinterpret_m64_f64

#define vreinterpret_m64_f64 (   x)    vreinterpret_s64_f64(x)

◆ vreinterpret_m64_s16

#define vreinterpret_m64_s16 (   x)    vreinterpret_s64_s16(x)

◆ vreinterpret_m64_s32

#define vreinterpret_m64_s32 (   x)    vreinterpret_s64_s32(x)

◆ vreinterpret_m64_s64

#define vreinterpret_m64_s64 (   x)    (x)

◆ vreinterpret_m64_s8

#define vreinterpret_m64_s8 (   x)    vreinterpret_s64_s8(x)

◆ vreinterpret_m64_u16

#define vreinterpret_m64_u16 (   x)    vreinterpret_s64_u16(x)

◆ vreinterpret_m64_u32

#define vreinterpret_m64_u32 (   x)    vreinterpret_s64_u32(x)

◆ vreinterpret_m64_u64

#define vreinterpret_m64_u64 (   x)    vreinterpret_s64_u64(x)

◆ vreinterpret_m64_u8

#define vreinterpret_m64_u8 (   x)    vreinterpret_s64_u8(x)

◆ vreinterpret_s16_m64

#define vreinterpret_s16_m64 (   x)    vreinterpret_s16_s64(x)

◆ vreinterpret_s32_m64

#define vreinterpret_s32_m64 (   x)    vreinterpret_s32_s64(x)

◆ vreinterpret_s64_m64

#define vreinterpret_s64_m64 (   x)    (x)

◆ vreinterpret_s8_m64

#define vreinterpret_s8_m64 (   x)    vreinterpret_s8_s64(x)

◆ vreinterpret_u16_m64

#define vreinterpret_u16_m64 (   x)    vreinterpret_u16_s64(x)

◆ vreinterpret_u32_m64

#define vreinterpret_u32_m64 (   x)    vreinterpret_u32_s64(x)

◆ vreinterpret_u64_m64

#define vreinterpret_u64_m64 (   x)    vreinterpret_u64_s64(x)

◆ vreinterpret_u8_m64

#define vreinterpret_u8_m64 (   x)    vreinterpret_u8_s64(x)

◆ vreinterpretq_f16_m128

#define vreinterpretq_f16_m128 (   x)    vreinterpretq_f16_f32(x)

◆ vreinterpretq_f32_m128

#define vreinterpretq_f32_m128 (   x)    (x)

◆ vreinterpretq_f32_m128d

#define vreinterpretq_f32_m128d (   x)    (x)

◆ vreinterpretq_f32_m128i

#define vreinterpretq_f32_m128i (   x)    vreinterpretq_f32_s64(x)

◆ vreinterpretq_f64_m128

#define vreinterpretq_f64_m128 (   x)    vreinterpretq_f64_f32(x)

◆ vreinterpretq_f64_m128i

#define vreinterpretq_f64_m128i (   x)    vreinterpretq_f64_s64(x)

◆ vreinterpretq_m128_f16

#define vreinterpretq_m128_f16 (   x)    vreinterpretq_f32_f16(x)

◆ vreinterpretq_m128_f32

#define vreinterpretq_m128_f32 (   x)    (x)

◆ vreinterpretq_m128_f64

#define vreinterpretq_m128_f64 (   x)    vreinterpretq_f32_f64(x)

◆ vreinterpretq_m128_s16

#define vreinterpretq_m128_s16 (   x)    vreinterpretq_f32_s16(x)

◆ vreinterpretq_m128_s32

#define vreinterpretq_m128_s32 (   x)    vreinterpretq_f32_s32(x)

◆ vreinterpretq_m128_s64

#define vreinterpretq_m128_s64 (   x)    vreinterpretq_f32_s64(x)

◆ vreinterpretq_m128_s8

#define vreinterpretq_m128_s8 (   x)    vreinterpretq_f32_s8(x)

◆ vreinterpretq_m128_u16

#define vreinterpretq_m128_u16 (   x)    vreinterpretq_f32_u16(x)

◆ vreinterpretq_m128_u32

#define vreinterpretq_m128_u32 (   x)    vreinterpretq_f32_u32(x)

◆ vreinterpretq_m128_u64

#define vreinterpretq_m128_u64 (   x)    vreinterpretq_f32_u64(x)

◆ vreinterpretq_m128_u8

#define vreinterpretq_m128_u8 (   x)    vreinterpretq_f32_u8(x)

◆ vreinterpretq_m128d_f32

#define vreinterpretq_m128d_f32 (   x)    (x)

◆ vreinterpretq_m128d_s32

#define vreinterpretq_m128d_s32 (   x)    vreinterpretq_f32_s32(x)

◆ vreinterpretq_m128d_s64

#define vreinterpretq_m128d_s64 (   x)    vreinterpretq_f32_s64(x)

◆ vreinterpretq_m128d_u32

#define vreinterpretq_m128d_u32 (   x)    vreinterpretq_f32_u32(x)

◆ vreinterpretq_m128d_u64

#define vreinterpretq_m128d_u64 (   x)    vreinterpretq_f32_u64(x)

◆ vreinterpretq_m128i_s16

#define vreinterpretq_m128i_s16 (   x)    vreinterpretq_s64_s16(x)

◆ vreinterpretq_m128i_s32

#define vreinterpretq_m128i_s32 (   x)    vreinterpretq_s64_s32(x)

◆ vreinterpretq_m128i_s64

#define vreinterpretq_m128i_s64 (   x)    (x)

◆ vreinterpretq_m128i_s8

#define vreinterpretq_m128i_s8 (   x)    vreinterpretq_s64_s8(x)

◆ vreinterpretq_m128i_u16

#define vreinterpretq_m128i_u16 (   x)    vreinterpretq_s64_u16(x)

◆ vreinterpretq_m128i_u32

#define vreinterpretq_m128i_u32 (   x)    vreinterpretq_s64_u32(x)

◆ vreinterpretq_m128i_u64

#define vreinterpretq_m128i_u64 (   x)    vreinterpretq_s64_u64(x)

◆ vreinterpretq_m128i_u8

#define vreinterpretq_m128i_u8 (   x)    vreinterpretq_s64_u8(x)

◆ vreinterpretq_nth_u32_m128i

#define vreinterpretq_nth_u32_m128i (   x,
 
)    (((SIMDVec *) &x)->m128_u32[n])

◆ vreinterpretq_nth_u64_m128i

#define vreinterpretq_nth_u64_m128i (   x,
 
)    (((SIMDVec *) &x)->m128_u64[n])

◆ vreinterpretq_nth_u8_m128i

#define vreinterpretq_nth_u8_m128i (   x,
 
)    (((SIMDVec *) &x)->m128_u8[n])

◆ vreinterpretq_s16_m128

#define vreinterpretq_s16_m128 (   x)    vreinterpretq_s16_f32(x)

◆ vreinterpretq_s16_m128i

#define vreinterpretq_s16_m128i (   x)    vreinterpretq_s16_s64(x)

◆ vreinterpretq_s32_m128

#define vreinterpretq_s32_m128 (   x)    vreinterpretq_s32_f32(x)

◆ vreinterpretq_s32_m128i

#define vreinterpretq_s32_m128i (   x)    vreinterpretq_s32_s64(x)

◆ vreinterpretq_s64_m128

#define vreinterpretq_s64_m128 (   x)    vreinterpretq_s64_f32(x)

◆ vreinterpretq_s64_m128d

#define vreinterpretq_s64_m128d (   x)    vreinterpretq_s64_f32(x)

◆ vreinterpretq_s64_m128i

#define vreinterpretq_s64_m128i (   x)    (x)

◆ vreinterpretq_s8_m128

#define vreinterpretq_s8_m128 (   x)    vreinterpretq_s8_f32(x)

◆ vreinterpretq_s8_m128i

#define vreinterpretq_s8_m128i (   x)    vreinterpretq_s8_s64(x)

◆ vreinterpretq_u16_m128

#define vreinterpretq_u16_m128 (   x)    vreinterpretq_u16_f32(x)

◆ vreinterpretq_u16_m128i

#define vreinterpretq_u16_m128i (   x)    vreinterpretq_u16_s64(x)

◆ vreinterpretq_u32_m128

#define vreinterpretq_u32_m128 (   x)    vreinterpretq_u32_f32(x)

◆ vreinterpretq_u32_m128d

#define vreinterpretq_u32_m128d (   x)    vreinterpretq_u32_f32(x)

◆ vreinterpretq_u32_m128i

#define vreinterpretq_u32_m128i (   x)    vreinterpretq_u32_s64(x)

◆ vreinterpretq_u64_m128

#define vreinterpretq_u64_m128 (   x)    vreinterpretq_u64_f32(x)

◆ vreinterpretq_u64_m128d

#define vreinterpretq_u64_m128d (   x)    vreinterpretq_u64_f32(x)

◆ vreinterpretq_u64_m128i

#define vreinterpretq_u64_m128i (   x)    vreinterpretq_u64_s64(x)

◆ vreinterpretq_u8_m128

#define vreinterpretq_u8_m128 (   x)    vreinterpretq_u8_f32(x)

◆ vreinterpretq_u8_m128i

#define vreinterpretq_u8_m128i (   x)    vreinterpretq_u8_s64(x)

Typedef Documentation

◆ __m128

typedef float32x4_t __m128

◆ __m128d

typedef float32x4_t __m128d

◆ __m128i

typedef int64x2_t __m128i

◆ __m64

typedef int64x1_t __m64

Enumeration Type Documentation

◆ _mm_hint

enum _mm_hint
Enumerator
_MM_HINT_NTA 
_MM_HINT_T0 
_MM_HINT_T1 
_MM_HINT_T2 
_MM_HINT_ENTA 
_MM_HINT_ET0 
_MM_HINT_ET1 
_MM_HINT_ET2 

Function Documentation

◆ _mm_abs_epi16()

FORCE_INLINE __m128i _mm_abs_epi16 ( __m128i  a)

◆ _mm_abs_epi32()

FORCE_INLINE __m128i _mm_abs_epi32 ( __m128i  a)

◆ _mm_abs_epi8()

FORCE_INLINE __m128i _mm_abs_epi8 ( __m128i  a)

◆ _mm_abs_pi16()

FORCE_INLINE __m64 _mm_abs_pi16 ( __m64  a)

◆ _mm_abs_pi32()

FORCE_INLINE __m64 _mm_abs_pi32 ( __m64  a)

◆ _mm_abs_pi8()

FORCE_INLINE __m64 _mm_abs_pi8 ( __m64  a)

◆ _mm_add_epi16()

FORCE_INLINE __m128i _mm_add_epi16 ( __m128i  a,
__m128i  b 
)

◆ _mm_add_epi32()

FORCE_INLINE __m128i _mm_add_epi32 ( __m128i  a,
__m128i  b 
)

◆ _mm_add_epi64()

FORCE_INLINE __m128i _mm_add_epi64 ( __m128i  a,
__m128i  b 
)

◆ _mm_add_epi8()

FORCE_INLINE __m128i _mm_add_epi8 ( __m128i  a,
__m128i  b 
)

◆ _mm_add_pd()

FORCE_INLINE __m128d _mm_add_pd ( __m128d  a,
__m128d  b 
)

◆ _mm_add_ps()

FORCE_INLINE __m128 _mm_add_ps ( __m128  a,
__m128  b 
)

◆ _mm_add_sd()

FORCE_INLINE __m128d _mm_add_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_add_si64()

FORCE_INLINE __m64 _mm_add_si64 ( __m64  a,
__m64  b 
)

◆ _mm_add_ss()

FORCE_INLINE __m128 _mm_add_ss ( __m128  a,
__m128  b 
)

◆ _mm_adds_epi16()

FORCE_INLINE __m128i _mm_adds_epi16 ( __m128i  a,
__m128i  b 
)

◆ _mm_adds_epi8()

FORCE_INLINE __m128i _mm_adds_epi8 ( __m128i  a,
__m128i  b 
)

◆ _mm_adds_epu16()

FORCE_INLINE __m128i _mm_adds_epu16 ( __m128i  a,
__m128i  b 
)

◆ _mm_adds_epu8()

FORCE_INLINE __m128i _mm_adds_epu8 ( __m128i  a,
__m128i  b 
)

◆ _mm_addsub_pd()

FORCE_INLINE __m128d _mm_addsub_pd ( __m128d  a,
__m128d  b 
)

◆ _mm_addsub_ps()

FORCE_INLINE __m128 _mm_addsub_ps ( __m128  a,
__m128  b 
)

◆ _mm_aesenc_si128()

FORCE_INLINE __m128i _mm_aesenc_si128 ( __m128i  EncBlock,
__m128i  RoundKey 
)

◆ _mm_aesenclast_si128()

FORCE_INLINE __m128i _mm_aesenclast_si128 ( __m128i  a,
__m128i  RoundKey 
)

◆ _mm_aeskeygenassist_si128()

FORCE_INLINE __m128i _mm_aeskeygenassist_si128 ( __m128i  key,
const int  rcon 
)

◆ _mm_alignr_epi8()

FORCE_INLINE __m128i _mm_alignr_epi8 ( __m128i  a,
__m128i  b,
int  imm 
)

◆ _mm_and_pd()

FORCE_INLINE __m128d _mm_and_pd ( __m128d  a,
__m128d  b 
)

◆ _mm_and_ps()

FORCE_INLINE __m128 _mm_and_ps ( __m128  a,
__m128  b 
)

◆ _mm_and_si128()

FORCE_INLINE __m128i _mm_and_si128 ( __m128i  a,
__m128i  b 
)

◆ _mm_andnot_pd()

FORCE_INLINE __m128d _mm_andnot_pd ( __m128d  a,
__m128d  b 
)

◆ _mm_andnot_ps()

FORCE_INLINE __m128 _mm_andnot_ps ( __m128  a,
__m128  b 
)

◆ _mm_andnot_si128()

FORCE_INLINE __m128i _mm_andnot_si128 ( __m128i  a,
__m128i  b 
)

◆ _mm_avg_epu16()

FORCE_INLINE __m128i _mm_avg_epu16 ( __m128i  a,
__m128i  b 
)

◆ _mm_avg_epu8()

FORCE_INLINE __m128i _mm_avg_epu8 ( __m128i  a,
__m128i  b 
)

◆ _mm_avg_pu16()

FORCE_INLINE __m64 _mm_avg_pu16 ( __m64  a,
__m64  b 
)

◆ _mm_avg_pu8()

FORCE_INLINE __m64 _mm_avg_pu8 ( __m64  a,
__m64  b 
)

◆ _mm_blend_ps()

FORCE_INLINE __m128 _mm_blend_ps ( __m128  _a,
__m128  _b,
const char  imm8 
)

◆ _mm_blendv_epi8()

FORCE_INLINE __m128i _mm_blendv_epi8 ( __m128i  _a,
__m128i  _b,
__m128i  _mask 
)

◆ _mm_blendv_pd()

FORCE_INLINE __m128d _mm_blendv_pd ( __m128d  _a,
__m128d  _b,
__m128d  _mask 
)

◆ _mm_blendv_ps()

FORCE_INLINE __m128 _mm_blendv_ps ( __m128  _a,
__m128  _b,
__m128  _mask 
)

◆ _mm_castpd_ps()

FORCE_INLINE __m128 _mm_castpd_ps ( __m128d  a)

◆ _mm_castpd_si128()

FORCE_INLINE __m128i _mm_castpd_si128 ( __m128d  a)

◆ _mm_castps_pd()

FORCE_INLINE __m128d _mm_castps_pd ( __m128  a)

◆ _mm_castps_si128()

FORCE_INLINE __m128i _mm_castps_si128 ( __m128  a)

◆ _mm_castsi128_pd()

FORCE_INLINE __m128d _mm_castsi128_pd ( __m128i  a)

◆ _mm_castsi128_ps()

FORCE_INLINE __m128 _mm_castsi128_ps ( __m128i  a)

◆ _mm_ceil_pd()

FORCE_INLINE __m128d _mm_ceil_pd ( __m128d  a)

◆ _mm_ceil_ps()

FORCE_INLINE __m128 _mm_ceil_ps ( __m128  a)

◆ _mm_ceil_sd()

FORCE_INLINE __m128d _mm_ceil_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_ceil_ss()

FORCE_INLINE __m128 _mm_ceil_ss ( __m128  a,
__m128  b 
)

◆ _mm_clflush()

FORCE_INLINE void _mm_clflush ( void const *  p)

◆ _mm_clmulepi64_si128()

FORCE_INLINE __m128i _mm_clmulepi64_si128 ( __m128i  _a,
__m128i  _b,
const int  imm 
)

◆ _mm_cmpeq_epi16()

FORCE_INLINE __m128i _mm_cmpeq_epi16 ( __m128i  a,
__m128i  b 
)

◆ _mm_cmpeq_epi32()

FORCE_INLINE __m128i _mm_cmpeq_epi32 ( __m128i  a,
__m128i  b 
)

◆ _mm_cmpeq_epi64()

FORCE_INLINE __m128i _mm_cmpeq_epi64 ( __m128i  a,
__m128i  b 
)

◆ _mm_cmpeq_epi8()

FORCE_INLINE __m128i _mm_cmpeq_epi8 ( __m128i  a,
__m128i  b 
)

◆ _mm_cmpeq_pd()

FORCE_INLINE __m128d _mm_cmpeq_pd ( __m128d  a,
__m128d  b 
)

◆ _mm_cmpeq_ps()

FORCE_INLINE __m128 _mm_cmpeq_ps ( __m128  a,
__m128  b 
)

◆ _mm_cmpeq_sd()

FORCE_INLINE __m128d _mm_cmpeq_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_cmpeq_ss()

FORCE_INLINE __m128 _mm_cmpeq_ss ( __m128  a,
__m128  b 
)

◆ _mm_cmpge_pd()

FORCE_INLINE __m128d _mm_cmpge_pd ( __m128d  a,
__m128d  b 
)

◆ _mm_cmpge_ps()

FORCE_INLINE __m128 _mm_cmpge_ps ( __m128  a,
__m128  b 
)

◆ _mm_cmpge_sd()

FORCE_INLINE __m128d _mm_cmpge_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_cmpge_ss()

FORCE_INLINE __m128 _mm_cmpge_ss ( __m128  a,
__m128  b 
)

◆ _mm_cmpgt_epi16()

FORCE_INLINE __m128i _mm_cmpgt_epi16 ( __m128i  a,
__m128i  b 
)

◆ _mm_cmpgt_epi32()

FORCE_INLINE __m128i _mm_cmpgt_epi32 ( __m128i  a,
__m128i  b 
)

◆ _mm_cmpgt_epi64()

FORCE_INLINE __m128i _mm_cmpgt_epi64 ( __m128i  a,
__m128i  b 
)

◆ _mm_cmpgt_epi8()

FORCE_INLINE __m128i _mm_cmpgt_epi8 ( __m128i  a,
__m128i  b 
)

◆ _mm_cmpgt_pd()

FORCE_INLINE __m128d _mm_cmpgt_pd ( __m128d  a,
__m128d  b 
)

◆ _mm_cmpgt_ps()

FORCE_INLINE __m128 _mm_cmpgt_ps ( __m128  a,
__m128  b 
)

◆ _mm_cmpgt_sd()

FORCE_INLINE __m128d _mm_cmpgt_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_cmpgt_ss()

FORCE_INLINE __m128 _mm_cmpgt_ss ( __m128  a,
__m128  b 
)

◆ _mm_cmple_pd()

FORCE_INLINE __m128d _mm_cmple_pd ( __m128d  a,
__m128d  b 
)

◆ _mm_cmple_ps()

FORCE_INLINE __m128 _mm_cmple_ps ( __m128  a,
__m128  b 
)

◆ _mm_cmple_sd()

FORCE_INLINE __m128d _mm_cmple_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_cmple_ss()

FORCE_INLINE __m128 _mm_cmple_ss ( __m128  a,
__m128  b 
)

◆ _mm_cmplt_epi16()

FORCE_INLINE __m128i _mm_cmplt_epi16 ( __m128i  a,
__m128i  b 
)

◆ _mm_cmplt_epi32()

FORCE_INLINE __m128i _mm_cmplt_epi32 ( __m128i  a,
__m128i  b 
)

◆ _mm_cmplt_epi8()

FORCE_INLINE __m128i _mm_cmplt_epi8 ( __m128i  a,
__m128i  b 
)

◆ _mm_cmplt_pd()

FORCE_INLINE __m128d _mm_cmplt_pd ( __m128d  a,
__m128d  b 
)

◆ _mm_cmplt_ps()

FORCE_INLINE __m128 _mm_cmplt_ps ( __m128  a,
__m128  b 
)

◆ _mm_cmplt_sd()

FORCE_INLINE __m128d _mm_cmplt_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_cmplt_ss()

FORCE_INLINE __m128 _mm_cmplt_ss ( __m128  a,
__m128  b 
)

◆ _mm_cmpneq_pd()

FORCE_INLINE __m128d _mm_cmpneq_pd ( __m128d  a,
__m128d  b 
)

◆ _mm_cmpneq_ps()

FORCE_INLINE __m128 _mm_cmpneq_ps ( __m128  a,
__m128  b 
)

◆ _mm_cmpneq_sd()

FORCE_INLINE __m128d _mm_cmpneq_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_cmpneq_ss()

FORCE_INLINE __m128 _mm_cmpneq_ss ( __m128  a,
__m128  b 
)

◆ _mm_cmpnge_pd()

FORCE_INLINE __m128d _mm_cmpnge_pd ( __m128d  a,
__m128d  b 
)

◆ _mm_cmpnge_ps()

FORCE_INLINE __m128 _mm_cmpnge_ps ( __m128  a,
__m128  b 
)

◆ _mm_cmpnge_sd()

FORCE_INLINE __m128d _mm_cmpnge_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_cmpnge_ss()

FORCE_INLINE __m128 _mm_cmpnge_ss ( __m128  a,
__m128  b 
)

◆ _mm_cmpngt_pd()

FORCE_INLINE __m128d _mm_cmpngt_pd ( __m128d  a,
__m128d  b 
)

◆ _mm_cmpngt_ps()

FORCE_INLINE __m128 _mm_cmpngt_ps ( __m128  a,
__m128  b 
)

◆ _mm_cmpngt_sd()

FORCE_INLINE __m128d _mm_cmpngt_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_cmpngt_ss()

FORCE_INLINE __m128 _mm_cmpngt_ss ( __m128  a,
__m128  b 
)

◆ _mm_cmpnle_pd()

FORCE_INLINE __m128d _mm_cmpnle_pd ( __m128d  a,
__m128d  b 
)

◆ _mm_cmpnle_ps()

FORCE_INLINE __m128 _mm_cmpnle_ps ( __m128  a,
__m128  b 
)

◆ _mm_cmpnle_sd()

FORCE_INLINE __m128d _mm_cmpnle_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_cmpnle_ss()

FORCE_INLINE __m128 _mm_cmpnle_ss ( __m128  a,
__m128  b 
)

◆ _mm_cmpnlt_pd()

FORCE_INLINE __m128d _mm_cmpnlt_pd ( __m128d  a,
__m128d  b 
)

◆ _mm_cmpnlt_ps()

FORCE_INLINE __m128 _mm_cmpnlt_ps ( __m128  a,
__m128  b 
)

◆ _mm_cmpnlt_sd()

FORCE_INLINE __m128d _mm_cmpnlt_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_cmpnlt_ss()

FORCE_INLINE __m128 _mm_cmpnlt_ss ( __m128  a,
__m128  b 
)

◆ _mm_cmpord_pd()

FORCE_INLINE __m128d _mm_cmpord_pd ( __m128d  a,
__m128d  b 
)

◆ _mm_cmpord_ps()

FORCE_INLINE __m128 _mm_cmpord_ps ( __m128  a,
__m128  b 
)

◆ _mm_cmpord_sd()

FORCE_INLINE __m128d _mm_cmpord_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_cmpord_ss()

FORCE_INLINE __m128 _mm_cmpord_ss ( __m128  a,
__m128  b 
)

◆ _mm_cmpunord_pd()

FORCE_INLINE __m128d _mm_cmpunord_pd ( __m128d  a,
__m128d  b 
)

◆ _mm_cmpunord_ps()

FORCE_INLINE __m128 _mm_cmpunord_ps ( __m128  a,
__m128  b 
)

◆ _mm_cmpunord_sd()

FORCE_INLINE __m128d _mm_cmpunord_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_cmpunord_ss()

FORCE_INLINE __m128 _mm_cmpunord_ss ( __m128  a,
__m128  b 
)

◆ _mm_comieq_sd()

FORCE_INLINE int _mm_comieq_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_comieq_ss()

FORCE_INLINE int _mm_comieq_ss ( __m128  a,
__m128  b 
)

◆ _mm_comige_sd()

FORCE_INLINE int _mm_comige_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_comige_ss()

FORCE_INLINE int _mm_comige_ss ( __m128  a,
__m128  b 
)

◆ _mm_comigt_sd()

FORCE_INLINE int _mm_comigt_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_comigt_ss()

FORCE_INLINE int _mm_comigt_ss ( __m128  a,
__m128  b 
)

◆ _mm_comile_sd()

FORCE_INLINE int _mm_comile_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_comile_ss()

FORCE_INLINE int _mm_comile_ss ( __m128  a,
__m128  b 
)

◆ _mm_comilt_sd()

FORCE_INLINE int _mm_comilt_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_comilt_ss()

FORCE_INLINE int _mm_comilt_ss ( __m128  a,
__m128  b 
)

◆ _mm_comineq_sd()

FORCE_INLINE int _mm_comineq_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_comineq_ss()

FORCE_INLINE int _mm_comineq_ss ( __m128  a,
__m128  b 
)

◆ _mm_crc32_u16()

FORCE_INLINE uint32_t _mm_crc32_u16 ( uint32_t  crc,
uint16_t  v 
)

◆ _mm_crc32_u32()

FORCE_INLINE uint32_t _mm_crc32_u32 ( uint32_t  crc,
uint32_t  v 
)

◆ _mm_crc32_u64()

FORCE_INLINE uint64_t _mm_crc32_u64 ( uint64_t  crc,
uint64_t  v 
)

◆ _mm_crc32_u8()

FORCE_INLINE uint32_t _mm_crc32_u8 ( uint32_t  crc,
uint8_t  v 
)

◆ _mm_cvt_pi2ps()

FORCE_INLINE __m128 _mm_cvt_pi2ps ( __m128  a,
__m64  b 
)

◆ _mm_cvt_ps2pi()

FORCE_INLINE __m64 _mm_cvt_ps2pi ( __m128  a)

◆ _mm_cvt_si2ss()

FORCE_INLINE __m128 _mm_cvt_si2ss ( __m128  a,
int  b 
)

◆ _mm_cvt_ss2si()

FORCE_INLINE int _mm_cvt_ss2si ( __m128  a)

◆ _mm_cvtepi16_epi32()

FORCE_INLINE __m128i _mm_cvtepi16_epi32 ( __m128i  a)

◆ _mm_cvtepi16_epi64()

FORCE_INLINE __m128i _mm_cvtepi16_epi64 ( __m128i  a)

◆ _mm_cvtepi32_epi64()

FORCE_INLINE __m128i _mm_cvtepi32_epi64 ( __m128i  a)

◆ _mm_cvtepi32_pd()

FORCE_INLINE __m128d _mm_cvtepi32_pd ( __m128i  a)

◆ _mm_cvtepi32_ps()

FORCE_INLINE __m128 _mm_cvtepi32_ps ( __m128i  a)

◆ _mm_cvtepi8_epi16()

FORCE_INLINE __m128i _mm_cvtepi8_epi16 ( __m128i  a)

◆ _mm_cvtepi8_epi32()

FORCE_INLINE __m128i _mm_cvtepi8_epi32 ( __m128i  a)

◆ _mm_cvtepi8_epi64()

FORCE_INLINE __m128i _mm_cvtepi8_epi64 ( __m128i  a)

◆ _mm_cvtepu16_epi32()

FORCE_INLINE __m128i _mm_cvtepu16_epi32 ( __m128i  a)

◆ _mm_cvtepu16_epi64()

FORCE_INLINE __m128i _mm_cvtepu16_epi64 ( __m128i  a)

◆ _mm_cvtepu32_epi64()

FORCE_INLINE __m128i _mm_cvtepu32_epi64 ( __m128i  a)

◆ _mm_cvtepu8_epi16()

FORCE_INLINE __m128i _mm_cvtepu8_epi16 ( __m128i  a)

◆ _mm_cvtepu8_epi32()

FORCE_INLINE __m128i _mm_cvtepu8_epi32 ( __m128i  a)

◆ _mm_cvtepu8_epi64()

FORCE_INLINE __m128i _mm_cvtepu8_epi64 ( __m128i  a)

◆ _mm_cvtpd_epi32()

FORCE_INLINE __m128i _mm_cvtpd_epi32 ( __m128d  a)

◆ _mm_cvtpd_pi32()

FORCE_INLINE __m64 _mm_cvtpd_pi32 ( __m128d  a)

◆ _mm_cvtpd_ps()

FORCE_INLINE __m128 _mm_cvtpd_ps ( __m128d  a)

◆ _mm_cvtpi16_ps()

FORCE_INLINE __m128 _mm_cvtpi16_ps ( __m64  a)

◆ _mm_cvtpi32_pd()

FORCE_INLINE __m128d _mm_cvtpi32_pd ( __m64  a)

◆ _mm_cvtpi32_ps()

FORCE_INLINE __m128 _mm_cvtpi32_ps ( __m128  a,
__m64  b 
)

◆ _mm_cvtpi32x2_ps()

FORCE_INLINE __m128 _mm_cvtpi32x2_ps ( __m64  a,
__m64  b 
)

◆ _mm_cvtpi8_ps()

FORCE_INLINE __m128 _mm_cvtpi8_ps ( __m64  a)

◆ _mm_cvtps_epi32()

FORCE_INLINE __m128i _mm_cvtps_epi32 ( __m128  a)

◆ _mm_cvtps_pd()

FORCE_INLINE __m128d _mm_cvtps_pd ( __m128  a)

◆ _mm_cvtps_pi16()

FORCE_INLINE __m64 _mm_cvtps_pi16 ( __m128  a)

◆ _mm_cvtps_pi8()

FORCE_INLINE __m64 _mm_cvtps_pi8 ( __m128  a)

◆ _mm_cvtpu16_ps()

FORCE_INLINE __m128 _mm_cvtpu16_ps ( __m64  a)

◆ _mm_cvtpu8_ps()

FORCE_INLINE __m128 _mm_cvtpu8_ps ( __m64  a)

◆ _mm_cvtsd_f64()

FORCE_INLINE double _mm_cvtsd_f64 ( __m128d  a)

◆ _mm_cvtsd_si32()

FORCE_INLINE int32_t _mm_cvtsd_si32 ( __m128d  a)

◆ _mm_cvtsd_si64()

FORCE_INLINE int64_t _mm_cvtsd_si64 ( __m128d  a)

◆ _mm_cvtsd_ss()

FORCE_INLINE __m128 _mm_cvtsd_ss ( __m128  a,
__m128d  b 
)

◆ _mm_cvtsi128_si32()

FORCE_INLINE int _mm_cvtsi128_si32 ( __m128i  a)

◆ _mm_cvtsi128_si64()

FORCE_INLINE int64_t _mm_cvtsi128_si64 ( __m128i  a)

◆ _mm_cvtsi32_sd()

FORCE_INLINE __m128d _mm_cvtsi32_sd ( __m128d  a,
int32_t  b 
)

◆ _mm_cvtsi32_si128()

FORCE_INLINE __m128i _mm_cvtsi32_si128 ( int  a)

◆ _mm_cvtsi64_sd()

FORCE_INLINE __m128d _mm_cvtsi64_sd ( __m128d  a,
int64_t  b 
)

◆ _mm_cvtsi64_si128()

FORCE_INLINE __m128i _mm_cvtsi64_si128 ( int64_t  a)

◆ _mm_cvtsi64_ss()

FORCE_INLINE __m128 _mm_cvtsi64_ss ( __m128  a,
int64_t  b 
)

◆ _mm_cvtss_f32()

FORCE_INLINE float _mm_cvtss_f32 ( __m128  a)

◆ _mm_cvtss_sd()

FORCE_INLINE __m128d _mm_cvtss_sd ( __m128d  a,
__m128  b 
)

◆ _mm_cvtss_si64()

FORCE_INLINE int64_t _mm_cvtss_si64 ( __m128  a)

◆ _mm_cvtt_ps2pi()

FORCE_INLINE __m64 _mm_cvtt_ps2pi ( __m128  a)

◆ _mm_cvtt_ss2si()

FORCE_INLINE int _mm_cvtt_ss2si ( __m128  a)

◆ _mm_cvttpd_epi32()

FORCE_INLINE __m128i _mm_cvttpd_epi32 ( __m128d  a)

◆ _mm_cvttpd_pi32()

FORCE_INLINE __m64 _mm_cvttpd_pi32 ( __m128d  a)

◆ _mm_cvttps_epi32()

FORCE_INLINE __m128i _mm_cvttps_epi32 ( __m128  a)

◆ _mm_cvttsd_si32()

FORCE_INLINE int32_t _mm_cvttsd_si32 ( __m128d  a)

◆ _mm_cvttsd_si64()

FORCE_INLINE int64_t _mm_cvttsd_si64 ( __m128d  a)

◆ _mm_cvttss_si64()

FORCE_INLINE int64_t _mm_cvttss_si64 ( __m128  a)

◆ _mm_div_pd()

FORCE_INLINE __m128d _mm_div_pd ( __m128d  a,
__m128d  b 
)

◆ _mm_div_ps()

FORCE_INLINE __m128 _mm_div_ps ( __m128  a,
__m128  b 
)

◆ _mm_div_sd()

FORCE_INLINE __m128d _mm_div_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_div_ss()

FORCE_INLINE __m128 _mm_div_ss ( __m128  a,
__m128  b 
)

◆ _mm_dp_pd()

FORCE_INLINE __m128d _mm_dp_pd ( __m128d  a,
__m128d  b,
const int  imm 
)

◆ _mm_dp_ps()

FORCE_INLINE __m128 _mm_dp_ps ( __m128  a,
__m128  b,
const int  imm 
)

◆ _mm_empty()

FORCE_INLINE void _mm_empty ( void  )

◆ _mm_floor_pd()

FORCE_INLINE __m128d _mm_floor_pd ( __m128d  a)

◆ _mm_floor_ps()

FORCE_INLINE __m128 _mm_floor_ps ( __m128  a)

◆ _mm_floor_sd()

FORCE_INLINE __m128d _mm_floor_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_floor_ss()

FORCE_INLINE __m128 _mm_floor_ss ( __m128  a,
__m128  b 
)

◆ _mm_free()

FORCE_INLINE void _mm_free ( void addr)

◆ _MM_GET_ROUNDING_MODE()

FORCE_INLINE unsigned int _MM_GET_ROUNDING_MODE ( )

◆ _mm_getcsr()

FORCE_INLINE unsigned int _mm_getcsr ( )

◆ _mm_hadd_epi16()

FORCE_INLINE __m128i _mm_hadd_epi16 ( __m128i  _a,
__m128i  _b 
)

◆ _mm_hadd_epi32()

FORCE_INLINE __m128i _mm_hadd_epi32 ( __m128i  _a,
__m128i  _b 
)

◆ _mm_hadd_pd()

FORCE_INLINE __m128d _mm_hadd_pd ( __m128d  a,
__m128d  b 
)

◆ _mm_hadd_pi16()

FORCE_INLINE __m64 _mm_hadd_pi16 ( __m64  a,
__m64  b 
)

◆ _mm_hadd_pi32()

FORCE_INLINE __m64 _mm_hadd_pi32 ( __m64  a,
__m64  b 
)

◆ _mm_hadd_ps()

FORCE_INLINE __m128 _mm_hadd_ps ( __m128  a,
__m128  b 
)

◆ _mm_hadds_epi16()

FORCE_INLINE __m128i _mm_hadds_epi16 ( __m128i  _a,
__m128i  _b 
)

◆ _mm_hadds_pi16()

FORCE_INLINE __m64 _mm_hadds_pi16 ( __m64  _a,
__m64  _b 
)

◆ _mm_hsub_epi16()

FORCE_INLINE __m128i _mm_hsub_epi16 ( __m128i  _a,
__m128i  _b 
)

◆ _mm_hsub_epi32()

FORCE_INLINE __m128i _mm_hsub_epi32 ( __m128i  _a,
__m128i  _b 
)

◆ _mm_hsub_pd()

FORCE_INLINE __m128d _mm_hsub_pd ( __m128d  _a,
__m128d  _b 
)

◆ _mm_hsub_pi16()

FORCE_INLINE __m64 _mm_hsub_pi16 ( __m64  _a,
__m64  _b 
)

◆ _mm_hsub_pi32()

FORCE_INLINE __m64 _mm_hsub_pi32 ( __m64  _a,
__m64  _b 
)

◆ _mm_hsub_ps()

FORCE_INLINE __m128 _mm_hsub_ps ( __m128  _a,
__m128  _b 
)

◆ _mm_hsubs_epi16()

FORCE_INLINE __m128i _mm_hsubs_epi16 ( __m128i  _a,
__m128i  _b 
)

◆ _mm_hsubs_pi16()

FORCE_INLINE __m64 _mm_hsubs_pi16 ( __m64  _a,
__m64  _b 
)

◆ _mm_load1_pd()

FORCE_INLINE __m128d _mm_load1_pd ( const double *  p)

◆ _mm_load1_ps()

FORCE_INLINE __m128 _mm_load1_ps ( const float *  p)

◆ _mm_load_pd()

FORCE_INLINE __m128d _mm_load_pd ( const double *  p)

◆ _mm_load_ps()

FORCE_INLINE __m128 _mm_load_ps ( const float *  p)

◆ _mm_load_sd()

FORCE_INLINE __m128d _mm_load_sd ( const double *  p)

◆ _mm_load_si128()

FORCE_INLINE __m128i _mm_load_si128 ( const __m128i p)

◆ _mm_load_ss()

FORCE_INLINE __m128 _mm_load_ss ( const float *  p)

◆ _mm_loadh_pd()

FORCE_INLINE __m128d _mm_loadh_pd ( __m128d  a,
const double *  p 
)

◆ _mm_loadh_pi()

FORCE_INLINE __m128 _mm_loadh_pi ( __m128  a,
__m64 const *  p 
)

◆ _mm_loadl_epi64()

FORCE_INLINE __m128i _mm_loadl_epi64 ( __m128i const *  p)

◆ _mm_loadl_pd()

FORCE_INLINE __m128d _mm_loadl_pd ( __m128d  a,
const double *  p 
)

◆ _mm_loadl_pi()

FORCE_INLINE __m128 _mm_loadl_pi ( __m128  a,
__m64 const *  p 
)

◆ _mm_loadr_pd()

FORCE_INLINE __m128d _mm_loadr_pd ( const double *  p)

◆ _mm_loadr_ps()

FORCE_INLINE __m128 _mm_loadr_ps ( const float *  p)

◆ _mm_loadu_pd()

FORCE_INLINE __m128d _mm_loadu_pd ( const double *  p)

◆ _mm_loadu_ps()

FORCE_INLINE __m128 _mm_loadu_ps ( const float *  p)

◆ _mm_loadu_si128()

FORCE_INLINE __m128i _mm_loadu_si128 ( const __m128i p)

◆ _mm_loadu_si16()

FORCE_INLINE __m128i _mm_loadu_si16 ( const void p)

◆ _mm_loadu_si32()

FORCE_INLINE __m128i _mm_loadu_si32 ( const void p)

◆ _mm_loadu_si64()

FORCE_INLINE __m128i _mm_loadu_si64 ( const void p)

◆ _mm_madd_epi16()

FORCE_INLINE __m128i _mm_madd_epi16 ( __m128i  a,
__m128i  b 
)

◆ _mm_maddubs_epi16()

FORCE_INLINE __m128i _mm_maddubs_epi16 ( __m128i  _a,
__m128i  _b 
)

◆ _mm_maddubs_pi16()

FORCE_INLINE __m64 _mm_maddubs_pi16 ( __m64  _a,
__m64  _b 
)

◆ _mm_malloc()

FORCE_INLINE void* _mm_malloc ( size_t  size,
size_t  align 
)

◆ _mm_maskmove_si64()

FORCE_INLINE void _mm_maskmove_si64 ( __m64  a,
__m64  mask,
char *  mem_addr 
)

◆ _mm_maskmoveu_si128()

FORCE_INLINE void _mm_maskmoveu_si128 ( __m128i  a,
__m128i  mask,
char *  mem_addr 
)

◆ _mm_max_epi16()

FORCE_INLINE __m128i _mm_max_epi16 ( __m128i  a,
__m128i  b 
)

◆ _mm_max_epi32()

FORCE_INLINE __m128i _mm_max_epi32 ( __m128i  a,
__m128i  b 
)

◆ _mm_max_epi8()

FORCE_INLINE __m128i _mm_max_epi8 ( __m128i  a,
__m128i  b 
)

◆ _mm_max_epu16()

FORCE_INLINE __m128i _mm_max_epu16 ( __m128i  a,
__m128i  b 
)

◆ _mm_max_epu32()

FORCE_INLINE __m128i _mm_max_epu32 ( __m128i  a,
__m128i  b 
)

◆ _mm_max_epu8()

FORCE_INLINE __m128i _mm_max_epu8 ( __m128i  a,
__m128i  b 
)

◆ _mm_max_pd()

FORCE_INLINE __m128d _mm_max_pd ( __m128d  a,
__m128d  b 
)

◆ _mm_max_pi16()

FORCE_INLINE __m64 _mm_max_pi16 ( __m64  a,
__m64  b 
)

◆ _mm_max_ps()

FORCE_INLINE __m128 _mm_max_ps ( __m128  a,
__m128  b 
)

◆ _mm_max_pu8()

FORCE_INLINE __m64 _mm_max_pu8 ( __m64  a,
__m64  b 
)

◆ _mm_max_sd()

FORCE_INLINE __m128d _mm_max_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_max_ss()

FORCE_INLINE __m128 _mm_max_ss ( __m128  a,
__m128  b 
)

◆ _mm_min_epi16()

FORCE_INLINE __m128i _mm_min_epi16 ( __m128i  a,
__m128i  b 
)

◆ _mm_min_epi32()

FORCE_INLINE __m128i _mm_min_epi32 ( __m128i  a,
__m128i  b 
)

◆ _mm_min_epi8()

FORCE_INLINE __m128i _mm_min_epi8 ( __m128i  a,
__m128i  b 
)

◆ _mm_min_epu16()

FORCE_INLINE __m128i _mm_min_epu16 ( __m128i  a,
__m128i  b 
)

◆ _mm_min_epu32()

FORCE_INLINE __m128i _mm_min_epu32 ( __m128i  a,
__m128i  b 
)

◆ _mm_min_epu8()

FORCE_INLINE __m128i _mm_min_epu8 ( __m128i  a,
__m128i  b 
)

◆ _mm_min_pd()

FORCE_INLINE __m128d _mm_min_pd ( __m128d  a,
__m128d  b 
)

◆ _mm_min_pi16()

FORCE_INLINE __m64 _mm_min_pi16 ( __m64  a,
__m64  b 
)

◆ _mm_min_ps()

FORCE_INLINE __m128 _mm_min_ps ( __m128  a,
__m128  b 
)

◆ _mm_min_pu8()

FORCE_INLINE __m64 _mm_min_pu8 ( __m64  a,
__m64  b 
)

◆ _mm_min_sd()

FORCE_INLINE __m128d _mm_min_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_min_ss()

FORCE_INLINE __m128 _mm_min_ss ( __m128  a,
__m128  b 
)

◆ _mm_minpos_epu16()

FORCE_INLINE __m128i _mm_minpos_epu16 ( __m128i  a)

◆ _mm_move_epi64()

FORCE_INLINE __m128i _mm_move_epi64 ( __m128i  a)

◆ _mm_move_sd()

FORCE_INLINE __m128d _mm_move_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_move_ss()

FORCE_INLINE __m128 _mm_move_ss ( __m128  a,
__m128  b 
)

◆ _mm_movedup_pd()

FORCE_INLINE __m128d _mm_movedup_pd ( __m128d  a)

◆ _mm_movehdup_ps()

FORCE_INLINE __m128 _mm_movehdup_ps ( __m128  a)

◆ _mm_movehl_ps()

FORCE_INLINE __m128 _mm_movehl_ps ( __m128  __A,
__m128  __B 
)

◆ _mm_moveldup_ps()

FORCE_INLINE __m128 _mm_moveldup_ps ( __m128  a)

◆ _mm_movelh_ps()

FORCE_INLINE __m128 _mm_movelh_ps ( __m128  __A,
__m128  __B 
)

◆ _mm_movemask_epi8()

FORCE_INLINE int _mm_movemask_epi8 ( __m128i  a)

◆ _mm_movemask_pd()

FORCE_INLINE int _mm_movemask_pd ( __m128d  a)

◆ _mm_movemask_pi8()

FORCE_INLINE int _mm_movemask_pi8 ( __m64  a)

◆ _mm_movemask_ps()

FORCE_INLINE int _mm_movemask_ps ( __m128  a)

◆ _mm_movepi64_pi64()

FORCE_INLINE __m64 _mm_movepi64_pi64 ( __m128i  a)

◆ _mm_movpi64_epi64()

FORCE_INLINE __m128i _mm_movpi64_epi64 ( __m64  a)

◆ _mm_mpsadbw_epu8()

FORCE_INLINE __m128i _mm_mpsadbw_epu8 ( __m128i  a,
__m128i  b,
const int  imm 
)

◆ _mm_mul_epi32()

FORCE_INLINE __m128i _mm_mul_epi32 ( __m128i  a,
__m128i  b 
)

◆ _mm_mul_epu32()

FORCE_INLINE __m128i _mm_mul_epu32 ( __m128i  a,
__m128i  b 
)

◆ _mm_mul_pd()

FORCE_INLINE __m128d _mm_mul_pd ( __m128d  a,
__m128d  b 
)

◆ _mm_mul_ps()

FORCE_INLINE __m128 _mm_mul_ps ( __m128  a,
__m128  b 
)

◆ _mm_mul_sd()

FORCE_INLINE __m128d _mm_mul_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_mul_ss()

FORCE_INLINE __m128 _mm_mul_ss ( __m128  a,
__m128  b 
)

◆ _mm_mul_su32()

FORCE_INLINE __m64 _mm_mul_su32 ( __m64  a,
__m64  b 
)

◆ _mm_mulhi_epi16()

FORCE_INLINE __m128i _mm_mulhi_epi16 ( __m128i  a,
__m128i  b 
)

◆ _mm_mulhi_epu16()

FORCE_INLINE __m128i _mm_mulhi_epu16 ( __m128i  a,
__m128i  b 
)

◆ _mm_mulhi_pu16()

FORCE_INLINE __m64 _mm_mulhi_pu16 ( __m64  a,
__m64  b 
)

◆ _mm_mulhrs_epi16()

FORCE_INLINE __m128i _mm_mulhrs_epi16 ( __m128i  a,
__m128i  b 
)

◆ _mm_mulhrs_pi16()

FORCE_INLINE __m64 _mm_mulhrs_pi16 ( __m64  a,
__m64  b 
)

◆ _mm_mullo_epi16()

FORCE_INLINE __m128i _mm_mullo_epi16 ( __m128i  a,
__m128i  b 
)

◆ _mm_mullo_epi32()

FORCE_INLINE __m128i _mm_mullo_epi32 ( __m128i  a,
__m128i  b 
)

◆ _mm_or_pd()

FORCE_INLINE __m128d _mm_or_pd ( __m128d  a,
__m128d  b 
)

◆ _mm_or_ps()

FORCE_INLINE __m128 _mm_or_ps ( __m128  a,
__m128  b 
)

◆ _mm_or_si128()

FORCE_INLINE __m128i _mm_or_si128 ( __m128i  a,
__m128i  b 
)

◆ _mm_packs_epi16()

FORCE_INLINE __m128i _mm_packs_epi16 ( __m128i  a,
__m128i  b 
)

◆ _mm_packs_epi32()

FORCE_INLINE __m128i _mm_packs_epi32 ( __m128i  a,
__m128i  b 
)

◆ _mm_packus_epi16()

FORCE_INLINE __m128i _mm_packus_epi16 ( const __m128i  a,
const __m128i  b 
)

◆ _mm_packus_epi32()

FORCE_INLINE __m128i _mm_packus_epi32 ( __m128i  a,
__m128i  b 
)

◆ _mm_pause()

FORCE_INLINE void _mm_pause ( )

◆ _mm_popcnt_u32()

FORCE_INLINE int _mm_popcnt_u32 ( unsigned int  a)

◆ _mm_popcnt_u64()

FORCE_INLINE int64_t _mm_popcnt_u64 ( uint64_t  a)

◆ _mm_prefetch()

FORCE_INLINE void _mm_prefetch ( const void p,
int  i 
)

◆ _mm_rcp_ps()

FORCE_INLINE __m128 _mm_rcp_ps ( __m128  in)

◆ _mm_rcp_ss()

FORCE_INLINE __m128 _mm_rcp_ss ( __m128  a)

◆ _mm_round_pd()

FORCE_INLINE __m128d _mm_round_pd ( __m128d  a,
int  rounding 
)

◆ _mm_round_ps()

FORCE_INLINE __m128 _mm_round_ps ( __m128  a,
int  rounding 
)

◆ _mm_round_sd()

FORCE_INLINE __m128d _mm_round_sd ( __m128d  a,
__m128d  b,
int  rounding 
)

◆ _mm_round_ss()

FORCE_INLINE __m128 _mm_round_ss ( __m128  a,
__m128  b,
int  rounding 
)

◆ _mm_rsqrt_ps()

FORCE_INLINE __m128 _mm_rsqrt_ps ( __m128  in)

◆ _mm_rsqrt_ss()

FORCE_INLINE __m128 _mm_rsqrt_ss ( __m128  in)

◆ _mm_sad_epu8()

FORCE_INLINE __m128i _mm_sad_epu8 ( __m128i  a,
__m128i  b 
)

◆ _mm_sad_pu8()

FORCE_INLINE __m64 _mm_sad_pu8 ( __m64  a,
__m64  b 
)

◆ _mm_set1_epi16()

FORCE_INLINE __m128i _mm_set1_epi16 ( short  w)

◆ _mm_set1_epi32()

FORCE_INLINE __m128i _mm_set1_epi32 ( int  _i)

◆ _mm_set1_epi64()

FORCE_INLINE __m128i _mm_set1_epi64 ( __m64  _i)

◆ _mm_set1_epi64x()

FORCE_INLINE __m128i _mm_set1_epi64x ( int64_t  _i)

◆ _mm_set1_epi8()

FORCE_INLINE __m128i _mm_set1_epi8 ( signed char  w)

◆ _mm_set1_pd()

FORCE_INLINE __m128d _mm_set1_pd ( double  d)

◆ _mm_set1_ps()

FORCE_INLINE __m128 _mm_set1_ps ( float  _w)

◆ _mm_set_epi16()

FORCE_INLINE __m128i _mm_set_epi16 ( short  i7,
short  i6,
short  i5,
short  i4,
short  i3,
short  i2,
short  i1,
short  i0 
)

◆ _mm_set_epi32()

FORCE_INLINE __m128i _mm_set_epi32 ( int  i3,
int  i2,
int  i1,
int  i0 
)

◆ _mm_set_epi64()

FORCE_INLINE __m128i _mm_set_epi64 ( __m64  i1,
__m64  i2 
)

◆ _mm_set_epi64x()

FORCE_INLINE __m128i _mm_set_epi64x ( int64_t  i1,
int64_t  i2 
)

◆ _mm_set_epi8()

FORCE_INLINE __m128i _mm_set_epi8 ( signed char  b15,
signed char  b14,
signed char  b13,
signed char  b12,
signed char  b11,
signed char  b10,
signed char  b9,
signed char  b8,
signed char  b7,
signed char  b6,
signed char  b5,
signed char  b4,
signed char  b3,
signed char  b2,
signed char  b1,
signed char  b0 
)

◆ _mm_set_pd()

FORCE_INLINE __m128d _mm_set_pd ( double  e1,
double  e0 
)

◆ _mm_set_ps()

FORCE_INLINE __m128 _mm_set_ps ( float  w,
float  z,
float  y,
float  x 
)

◆ _mm_set_ps1()

FORCE_INLINE __m128 _mm_set_ps1 ( float  _w)

◆ _MM_SET_ROUNDING_MODE()

FORCE_INLINE void _MM_SET_ROUNDING_MODE ( int  rounding)

◆ _mm_set_sd()

FORCE_INLINE __m128d _mm_set_sd ( double  a)

◆ _mm_set_ss()

FORCE_INLINE __m128 _mm_set_ss ( float  a)

◆ _mm_setcsr()

FORCE_INLINE void _mm_setcsr ( unsigned int  a)

◆ _mm_setr_epi16()

FORCE_INLINE __m128i _mm_setr_epi16 ( short  w0,
short  w1,
short  w2,
short  w3,
short  w4,
short  w5,
short  w6,
short  w7 
)

◆ _mm_setr_epi32()

FORCE_INLINE __m128i _mm_setr_epi32 ( int  i3,
int  i2,
int  i1,
int  i0 
)

◆ _mm_setr_epi64()

FORCE_INLINE __m128i _mm_setr_epi64 ( __m64  e1,
__m64  e0 
)

◆ _mm_setr_epi8()

FORCE_INLINE __m128i _mm_setr_epi8 ( signed char  b0,
signed char  b1,
signed char  b2,
signed char  b3,
signed char  b4,
signed char  b5,
signed char  b6,
signed char  b7,
signed char  b8,
signed char  b9,
signed char  b10,
signed char  b11,
signed char  b12,
signed char  b13,
signed char  b14,
signed char  b15 
)

◆ _mm_setr_pd()

FORCE_INLINE __m128d _mm_setr_pd ( double  e1,
double  e0 
)

◆ _mm_setr_ps()

FORCE_INLINE __m128 _mm_setr_ps ( float  w,
float  z,
float  y,
float  x 
)

◆ _mm_setzero_pd()

FORCE_INLINE __m128d _mm_setzero_pd ( void  )

◆ _mm_setzero_ps()

FORCE_INLINE __m128 _mm_setzero_ps ( void  )

◆ _mm_setzero_si128()

FORCE_INLINE __m128i _mm_setzero_si128 ( void  )

◆ _mm_sfence()

FORCE_INLINE void _mm_sfence ( void  )

◆ _mm_shuffle_epi8()

FORCE_INLINE __m128i _mm_shuffle_epi8 ( __m128i  a,
__m128i  b 
)

◆ _mm_shuffle_epi_0101()

FORCE_INLINE __m128i _mm_shuffle_epi_0101 ( __m128i  a)

◆ _mm_shuffle_epi_0122()

FORCE_INLINE __m128i _mm_shuffle_epi_0122 ( __m128i  a)

◆ _mm_shuffle_epi_0321()

FORCE_INLINE __m128i _mm_shuffle_epi_0321 ( __m128i  a)

◆ _mm_shuffle_epi_1001()

FORCE_INLINE __m128i _mm_shuffle_epi_1001 ( __m128i  a)

◆ _mm_shuffle_epi_1010()

FORCE_INLINE __m128i _mm_shuffle_epi_1010 ( __m128i  a)

◆ _mm_shuffle_epi_1032()

FORCE_INLINE __m128i _mm_shuffle_epi_1032 ( __m128i  a)

◆ _mm_shuffle_epi_2103()

FORCE_INLINE __m128i _mm_shuffle_epi_2103 ( __m128i  a)

◆ _mm_shuffle_epi_2211()

FORCE_INLINE __m128i _mm_shuffle_epi_2211 ( __m128i  a)

◆ _mm_shuffle_epi_2301()

FORCE_INLINE __m128i _mm_shuffle_epi_2301 ( __m128i  a)

◆ _mm_shuffle_epi_3332()

FORCE_INLINE __m128i _mm_shuffle_epi_3332 ( __m128i  a)

◆ _mm_shuffle_pi8()

FORCE_INLINE __m64 _mm_shuffle_pi8 ( __m64  a,
__m64  b 
)

◆ _mm_shuffle_ps_0011()

FORCE_INLINE __m128 _mm_shuffle_ps_0011 ( __m128  a,
__m128  b 
)

◆ _mm_shuffle_ps_0022()

FORCE_INLINE __m128 _mm_shuffle_ps_0022 ( __m128  a,
__m128  b 
)

◆ _mm_shuffle_ps_0101()

FORCE_INLINE __m128 _mm_shuffle_ps_0101 ( __m128  a,
__m128  b 
)

◆ _mm_shuffle_ps_0321()

FORCE_INLINE __m128 _mm_shuffle_ps_0321 ( __m128  a,
__m128  b 
)

◆ _mm_shuffle_ps_1001()

FORCE_INLINE __m128 _mm_shuffle_ps_1001 ( __m128  a,
__m128  b 
)

◆ _mm_shuffle_ps_1010()

FORCE_INLINE __m128 _mm_shuffle_ps_1010 ( __m128  a,
__m128  b 
)

◆ _mm_shuffle_ps_1032()

FORCE_INLINE __m128 _mm_shuffle_ps_1032 ( __m128  a,
__m128  b 
)

◆ _mm_shuffle_ps_1133()

FORCE_INLINE __m128 _mm_shuffle_ps_1133 ( __m128  a,
__m128  b 
)

◆ _mm_shuffle_ps_2001()

FORCE_INLINE __m128 _mm_shuffle_ps_2001 ( __m128  a,
__m128  b 
)

◆ _mm_shuffle_ps_2010()

FORCE_INLINE __m128 _mm_shuffle_ps_2010 ( __m128  a,
__m128  b 
)

◆ _mm_shuffle_ps_2032()

FORCE_INLINE __m128 _mm_shuffle_ps_2032 ( __m128  a,
__m128  b 
)

◆ _mm_shuffle_ps_2103()

FORCE_INLINE __m128 _mm_shuffle_ps_2103 ( __m128  a,
__m128  b 
)

◆ _mm_shuffle_ps_2200()

FORCE_INLINE __m128 _mm_shuffle_ps_2200 ( __m128  a,
__m128  b 
)

◆ _mm_shuffle_ps_2301()

FORCE_INLINE __m128 _mm_shuffle_ps_2301 ( __m128  a,
__m128  b 
)

◆ _mm_shuffle_ps_3202()

FORCE_INLINE __m128 _mm_shuffle_ps_3202 ( __m128  a,
__m128  b 
)

◆ _mm_shuffle_ps_3210()

FORCE_INLINE __m128 _mm_shuffle_ps_3210 ( __m128  a,
__m128  b 
)

◆ _mm_sign_epi16()

FORCE_INLINE __m128i _mm_sign_epi16 ( __m128i  _a,
__m128i  _b 
)

◆ _mm_sign_epi32()

FORCE_INLINE __m128i _mm_sign_epi32 ( __m128i  _a,
__m128i  _b 
)

◆ _mm_sign_epi8()

FORCE_INLINE __m128i _mm_sign_epi8 ( __m128i  _a,
__m128i  _b 
)

◆ _mm_sign_pi16()

FORCE_INLINE __m64 _mm_sign_pi16 ( __m64  _a,
__m64  _b 
)

◆ _mm_sign_pi32()

FORCE_INLINE __m64 _mm_sign_pi32 ( __m64  _a,
__m64  _b 
)

◆ _mm_sign_pi8()

FORCE_INLINE __m64 _mm_sign_pi8 ( __m64  _a,
__m64  _b 
)

◆ _mm_sll_epi16()

FORCE_INLINE __m128i _mm_sll_epi16 ( __m128i  a,
__m128i  count 
)

◆ _mm_sll_epi32()

FORCE_INLINE __m128i _mm_sll_epi32 ( __m128i  a,
__m128i  count 
)

◆ _mm_sll_epi64()

FORCE_INLINE __m128i _mm_sll_epi64 ( __m128i  a,
__m128i  count 
)

◆ _mm_slli_epi16()

FORCE_INLINE __m128i _mm_slli_epi16 ( __m128i  a,
int  imm 
)

◆ _mm_slli_epi32()

FORCE_INLINE __m128i _mm_slli_epi32 ( __m128i  a,
int  imm 
)

◆ _mm_slli_epi64()

FORCE_INLINE __m128i _mm_slli_epi64 ( __m128i  a,
int  imm 
)

◆ _mm_slli_si128()

FORCE_INLINE __m128i _mm_slli_si128 ( __m128i  a,
int  imm 
)

◆ _mm_sqrt_pd()

FORCE_INLINE __m128d _mm_sqrt_pd ( __m128d  a)

◆ _mm_sqrt_ps()

FORCE_INLINE __m128 _mm_sqrt_ps ( __m128  in)

◆ _mm_sqrt_sd()

FORCE_INLINE __m128d _mm_sqrt_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_sqrt_ss()

FORCE_INLINE __m128 _mm_sqrt_ss ( __m128  in)

◆ _mm_sra_epi16()

FORCE_INLINE __m128i _mm_sra_epi16 ( __m128i  a,
__m128i  count 
)

◆ _mm_sra_epi32()

FORCE_INLINE __m128i _mm_sra_epi32 ( __m128i  a,
__m128i  count 
)

◆ _mm_srai_epi16()

FORCE_INLINE __m128i _mm_srai_epi16 ( __m128i  a,
int  imm 
)

◆ _mm_srl_epi16()

FORCE_INLINE __m128i _mm_srl_epi16 ( __m128i  a,
__m128i  count 
)

◆ _mm_srl_epi32()

FORCE_INLINE __m128i _mm_srl_epi32 ( __m128i  a,
__m128i  count 
)

◆ _mm_srl_epi64()

FORCE_INLINE __m128i _mm_srl_epi64 ( __m128i  a,
__m128i  count 
)

◆ _mm_srli_si128()

FORCE_INLINE __m128i _mm_srli_si128 ( __m128i  a,
int  imm 
)

◆ _mm_store_pd()

FORCE_INLINE void _mm_store_pd ( double *  mem_addr,
__m128d  a 
)

◆ _mm_store_pd1()

FORCE_INLINE void _mm_store_pd1 ( double *  mem_addr,
__m128d  a 
)

◆ _mm_store_ps()

FORCE_INLINE void _mm_store_ps ( float *  p,
__m128  a 
)

◆ _mm_store_ps1()

FORCE_INLINE void _mm_store_ps1 ( float *  p,
__m128  a 
)

◆ _mm_store_sd()

FORCE_INLINE void _mm_store_sd ( double *  mem_addr,
__m128d  a 
)

◆ _mm_store_si128()

FORCE_INLINE void _mm_store_si128 ( __m128i p,
__m128i  a 
)

◆ _mm_store_ss()

FORCE_INLINE void _mm_store_ss ( float *  p,
__m128  a 
)

◆ _mm_storeh_pd()

FORCE_INLINE void _mm_storeh_pd ( double *  mem_addr,
__m128d  a 
)

◆ _mm_storeh_pi()

FORCE_INLINE void _mm_storeh_pi ( __m64 p,
__m128  a 
)

◆ _mm_storel_epi64()

FORCE_INLINE void _mm_storel_epi64 ( __m128i a,
__m128i  b 
)

◆ _mm_storel_pd()

FORCE_INLINE void _mm_storel_pd ( double *  mem_addr,
__m128d  a 
)

◆ _mm_storel_pi()

FORCE_INLINE void _mm_storel_pi ( __m64 p,
__m128  a 
)

◆ _mm_storer_pd()

FORCE_INLINE void _mm_storer_pd ( double *  mem_addr,
__m128d  a 
)

◆ _mm_storer_ps()

FORCE_INLINE void _mm_storer_ps ( float *  p,
__m128  a 
)

◆ _mm_storeu_pd()

FORCE_INLINE void _mm_storeu_pd ( double *  mem_addr,
__m128d  a 
)

◆ _mm_storeu_ps()

FORCE_INLINE void _mm_storeu_ps ( float *  p,
__m128  a 
)

◆ _mm_storeu_si128()

FORCE_INLINE void _mm_storeu_si128 ( __m128i p,
__m128i  a 
)

◆ _mm_storeu_si16()

FORCE_INLINE void _mm_storeu_si16 ( void p,
__m128i  a 
)

◆ _mm_storeu_si32()

FORCE_INLINE void _mm_storeu_si32 ( void p,
__m128i  a 
)

◆ _mm_storeu_si64()

FORCE_INLINE void _mm_storeu_si64 ( void p,
__m128i  a 
)

◆ _mm_stream_load_si128()

FORCE_INLINE __m128i _mm_stream_load_si128 ( __m128i p)

◆ _mm_stream_pd()

FORCE_INLINE void _mm_stream_pd ( double *  p,
__m128d  a 
)

◆ _mm_stream_pi()

FORCE_INLINE void _mm_stream_pi ( __m64 p,
__m64  a 
)

◆ _mm_stream_ps()

FORCE_INLINE void _mm_stream_ps ( float *  p,
__m128  a 
)

◆ _mm_stream_si128()

FORCE_INLINE void _mm_stream_si128 ( __m128i p,
__m128i  a 
)

◆ _mm_stream_si32()

FORCE_INLINE void _mm_stream_si32 ( int *  p,
int  a 
)

◆ _mm_stream_si64()

FORCE_INLINE void _mm_stream_si64 ( __int64 p,
__int64  a 
)

◆ _mm_sub_epi16()

FORCE_INLINE __m128i _mm_sub_epi16 ( __m128i  a,
__m128i  b 
)

◆ _mm_sub_epi32()

FORCE_INLINE __m128i _mm_sub_epi32 ( __m128i  a,
__m128i  b 
)

◆ _mm_sub_epi64()

FORCE_INLINE __m128i _mm_sub_epi64 ( __m128i  a,
__m128i  b 
)

◆ _mm_sub_epi8()

FORCE_INLINE __m128i _mm_sub_epi8 ( __m128i  a,
__m128i  b 
)

◆ _mm_sub_pd()

FORCE_INLINE __m128d _mm_sub_pd ( __m128d  a,
__m128d  b 
)

◆ _mm_sub_ps()

FORCE_INLINE __m128 _mm_sub_ps ( __m128  a,
__m128  b 
)

◆ _mm_sub_sd()

FORCE_INLINE __m128d _mm_sub_sd ( __m128d  a,
__m128d  b 
)

◆ _mm_sub_si64()

FORCE_INLINE __m64 _mm_sub_si64 ( __m64  a,
__m64  b 
)

◆ _mm_sub_ss()

FORCE_INLINE __m128 _mm_sub_ss ( __m128  a,
__m128  b 
)

◆ _mm_subs_epi16()

FORCE_INLINE __m128i _mm_subs_epi16 ( __m128i  a,
__m128i  b 
)

◆ _mm_subs_epi8()

FORCE_INLINE __m128i _mm_subs_epi8 ( __m128i  a,
__m128i  b 
)

◆ _mm_subs_epu16()

FORCE_INLINE __m128i _mm_subs_epu16 ( __m128i  a,
__m128i  b 
)

◆ _mm_subs_epu8()

FORCE_INLINE __m128i _mm_subs_epu8 ( __m128i  a,
__m128i  b 
)

◆ _mm_test_all_ones()

FORCE_INLINE int _mm_test_all_ones ( __m128i  a)

◆ _mm_test_all_zeros()

FORCE_INLINE int _mm_test_all_zeros ( __m128i  a,
__m128i  mask 
)

◆ _mm_test_mix_ones_zeros()

FORCE_INLINE int _mm_test_mix_ones_zeros ( __m128i  a,
__m128i  mask 
)

◆ _mm_testc_si128()

FORCE_INLINE int _mm_testc_si128 ( __m128i  a,
__m128i  b 
)

◆ _mm_testz_si128()

FORCE_INLINE int _mm_testz_si128 ( __m128i  a,
__m128i  b 
)

◆ _mm_undefined_pd()

FORCE_INLINE __m128d _mm_undefined_pd ( void  )

◆ _mm_undefined_ps()

FORCE_INLINE __m128 _mm_undefined_ps ( void  )

◆ _mm_undefined_si128()

FORCE_INLINE __m128i _mm_undefined_si128 ( void  )

◆ _mm_unpackhi_epi16()

FORCE_INLINE __m128i _mm_unpackhi_epi16 ( __m128i  a,
__m128i  b 
)

◆ _mm_unpackhi_epi32()

FORCE_INLINE __m128i _mm_unpackhi_epi32 ( __m128i  a,
__m128i  b 
)

◆ _mm_unpackhi_epi64()

FORCE_INLINE __m128i _mm_unpackhi_epi64 ( __m128i  a,
__m128i  b 
)

◆ _mm_unpackhi_epi8()

FORCE_INLINE __m128i _mm_unpackhi_epi8 ( __m128i  a,
__m128i  b 
)

◆ _mm_unpackhi_pd()

FORCE_INLINE __m128d _mm_unpackhi_pd ( __m128d  a,
__m128d  b 
)

◆ _mm_unpackhi_ps()

FORCE_INLINE __m128 _mm_unpackhi_ps ( __m128  a,
__m128  b 
)

◆ _mm_unpacklo_epi16()

FORCE_INLINE __m128i _mm_unpacklo_epi16 ( __m128i  a,
__m128i  b 
)

◆ _mm_unpacklo_epi32()

FORCE_INLINE __m128i _mm_unpacklo_epi32 ( __m128i  a,
__m128i  b 
)

◆ _mm_unpacklo_epi64()

FORCE_INLINE __m128i _mm_unpacklo_epi64 ( __m128i  a,
__m128i  b 
)

◆ _mm_unpacklo_epi8()

FORCE_INLINE __m128i _mm_unpacklo_epi8 ( __m128i  a,
__m128i  b 
)

◆ _mm_unpacklo_pd()

FORCE_INLINE __m128d _mm_unpacklo_pd ( __m128d  a,
__m128d  b 
)

◆ _mm_unpacklo_ps()

FORCE_INLINE __m128 _mm_unpacklo_ps ( __m128  a,
__m128  b 
)

◆ _mm_xor_pd()

FORCE_INLINE __m128d _mm_xor_pd ( __m128d  a,
__m128d  b 
)

◆ _mm_xor_ps()

FORCE_INLINE __m128 _mm_xor_ps ( __m128  a,
__m128  b 
)

◆ _mm_xor_si128()

FORCE_INLINE __m128i _mm_xor_si128 ( __m128i  a,
__m128i  b 
)

◆ _rdtsc()

FORCE_INLINE uint64_t _rdtsc ( void  )

◆ _sse2neon_kadd_f32()

FORCE_INLINE void _sse2neon_kadd_f32 ( float *  sum,
float *  c,
float  y 
)

◆ _sse2neon_mm_get_denormals_zero_mode()

FORCE_INLINE unsigned int _sse2neon_mm_get_denormals_zero_mode ( )

◆ _sse2neon_mm_get_flush_zero_mode()

FORCE_INLINE unsigned int _sse2neon_mm_get_flush_zero_mode ( )

◆ _sse2neon_mm_set_denormals_zero_mode()

FORCE_INLINE void _sse2neon_mm_set_denormals_zero_mode ( unsigned int  flag)

◆ _sse2neon_mm_set_flush_zero_mode()

FORCE_INLINE void _sse2neon_mm_set_flush_zero_mode ( unsigned int  flag)

◆ _sse2neon_vld1q_u8_x4()

FORCE_INLINE uint8x16x4_t _sse2neon_vld1q_u8_x4 ( const uint8_t *  p)

◆ _sse2neon_vmull_p64()

static uint64x2_t _sse2neon_vmull_p64 ( uint64x1_t  _a,
uint64x1_t  _b 
)
static

◆ ALIGN_STRUCT()

union ALIGN_STRUCT ( 16  )

Variable Documentation

◆ SIMDVec

SIMDVec

◆ SSE2NEON_sbox

const uint8_t SSE2NEON_sbox[256] = SSE2NEON_AES_DATA(SSE2NEON_AES_H0)
static