BitMagic-C++
|
Processor specific optimizations for AVX2 instructions (internals) More...
Functions | |
bm::id_t | bm::avx2_bit_count (const __m256i *BMRESTRICT block, const __m256i *BMRESTRICT block_end) |
AVX2 Harley-Seal popcount The algorithm is based on the paper "Faster Population Counts
using AVX2 Instructions" by Daniel Lemire, Nathan Kurz and Wojciech Mula (23 Nov 2016). More... | |
bm::id_t | bm::avx2_bit_block_count (const bm::word_t *const block, bm::id64_t digest) |
Calculate population count based on digest. More... | |
bm::id_t | bm::avx2_bit_count_and (const __m256i *BMRESTRICT block, const __m256i *BMRESTRICT block_end, const __m256i *BMRESTRICT mask_block) |
AND bit count for two aligned bit-blocks. More... | |
bm::id_t | bm::avx2_bit_count_xor (const __m256i *BMRESTRICT block, const __m256i *BMRESTRICT block_end, const __m256i *BMRESTRICT mask_block) |
XOR bit count for two aligned bit-blocks. More... | |
bm::id_t | bm::avx2_bit_count_sub (const __m256i *BMRESTRICT block, const __m256i *BMRESTRICT block_end, const __m256i *BMRESTRICT mask_block) |
AND NOT bit count for two aligned bit-blocks. More... | |
void | bm::avx2_xor_arr_2_mask (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src, const __m256i *BMRESTRICT src_end, bm::word_t mask) |
XOR array elements to specified mask dst = *src ^ mask. More... | |
void | bm::avx2_andnot_arr_2_mask (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src, const __m256i *BMRESTRICT src_end, bm::word_t mask) |
Inverts array elements and NOT them to specified mask dst = ~*src & mask. More... | |
unsigned | bm::avx2_and_block (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
AND array elements against another array dst &= *src. More... | |
bool | bm::avx2_and_digest (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
AND block digest stride dst &= *src. More... | |
bool | bm::avx2_and_digest_2way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2) |
AND block digest stride 2 way dst = *src1 & *src2. More... | |
bool | bm::avx2_and_or_digest_2way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2) |
AND-OR block digest stride 2 way dst |= *src1 & *src2. More... | |
bool | bm::avx2_and_digest_5way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2, const __m256i *BMRESTRICT src3, const __m256i *BMRESTRICT src4) |
AND block digest stride. More... | |
unsigned | bm::avx2_and_arr_unal (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src, const __m256i *BMRESTRICT src_end) |
AND array elements against another array (unaligned) dst &= *src. More... | |
bool | bm::avx2_or_block (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
OR array elements against another array dst |= *src. More... | |
bool | bm::avx2_or_arr_unal (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src, const __m256i *BMRESTRICT src_end) |
OR array elements against another unaligned array dst |= *src. More... | |
bool | bm::avx2_or_block_2way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2) |
OR 2 arrays and copy to the destination dst = *src1 | src2. More... | |
bool | bm::avx2_or_block_3way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2) |
OR array elements against another 2 arrays dst |= *src1 | src2. More... | |
bool | bm::avx2_or_block_5way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2, const __m256i *BMRESTRICT src3, const __m256i *BMRESTRICT src4) |
OR array elements against another 4 arrays dst |= *src1 | src2. More... | |
unsigned | bm::avx2_xor_block (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
XOR block against another dst ^= *src. More... | |
unsigned | bm::avx2_xor_block_2way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2) |
3 operand XOR dst = *src1 ^ src2 More... | |
unsigned | bm::avx2_sub_block (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
AND-NOT (SUB) array elements against another array dst &= ~*src. More... | |
bool | bm::avx2_sub_digest (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
SUB (AND NOT) block digest stride dst &= ~*src. More... | |
bool | bm::avx2_sub_digest_2way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2) |
2-operand SUB (AND NOT) block digest stride dst = *src1 & ~*src2 More... | |
BMFORCEINLINE void | bm::avx2_set_block (__m256i *BMRESTRICT dst, bm::word_t value) |
AVX2 block memset dst = value. More... | |
void | bm::avx2_copy_block (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
AVX2 block copy dst = *src. More... | |
void | bm::avx2_copy_block_unalign (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
AVX2 block copy (unaligned SRC) dst = *src. More... | |
void | bm::avx2_stream_block (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
AVX2 block copy dst = *src. More... | |
void | bm::avx2_stream_block_unalign (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
AVX2 block copy (unaligned SRC) dst = *src. More... | |
void | bm::avx2_invert_block (__m256i *BMRESTRICT dst) |
Invert bit-block dst = ~*dst or dst ^= *dst. More... | |
bool | bm::avx2_is_all_zero (const __m256i *BMRESTRICT block) |
check if block is all zero bits More... | |
bool | bm::avx2_is_digest_zero (const __m256i *BMRESTRICT block) |
check if digest stride is all zero bits More... | |
void | bm::avx2_block_set_digest (__m256i *dst, unsigned value) |
set digest stride to 0xFF.. or 0x0 value More... | |
bool | bm::avx2_is_all_one (const __m256i *BMRESTRICT block) |
check if block is all one bits More... | |
BMFORCEINLINE bool | bm::avx2_test_all_one_wave (const void *ptr) |
check if wave of pointers is all 0xFFF More... | |
BMFORCEINLINE bool | bm::avx2_test_all_zero_wave (const void *ptr) |
check if wave of pointers is all NULL More... | |
BMFORCEINLINE bool | bm::avx2_test_all_zero_wave2 (const void *ptr0, const void *ptr1) |
check if 2 wave of pointers are all NULL More... | |
BMFORCEINLINE bool | bm::avx2_test_all_eq_wave2 (const void *ptr0, const void *ptr1) |
check if 2 wave of pointers are all the same (NULL or FULL) More... | |
bool | bm::avx2_shift_l1 (__m256i *block, bm::word_t *empty_acc, unsigned co1) |
block shift left by 1 More... | |
bool | bm::avx2_shift_r1 (__m256i *block, bm::word_t *empty_acc, unsigned co1) |
block shift right by 1 More... | |
bool | bm::avx2_shift_r1_and (__m256i *BMRESTRICT block, bm::word_t co1, const __m256i *BMRESTRICT mask_block, bm::id64_t *BMRESTRICT digest) |
fused block shift right by 1 plus AND More... | |
unsigned | bm::avx2_bit_block_calc_change (const __m256i *BMRESTRICT block, unsigned size) |
More... | |
void | bm::avx2_bit_block_calc_xor_change (const __m256i *BMRESTRICT block, const __m256i *BMRESTRICT xor_block, unsigned size, unsigned *BMRESTRICT gcount, unsigned *BMRESTRICT bcount) |
More... | |
void | bm::avx2_bit_block_calc_change_bc (const __m256i *BMRESTRICT block, unsigned *gcount, unsigned *bcount) |
More... | |
bool | bm::avx2_bit_find_first_diff (const __m256i *BMRESTRICT block1, const __m256i *BMRESTRICT block2, unsigned *pos) |
Find first bit which is different between two bit-blocks. More... | |
bool | bm::avx2_bit_find_first (const __m256i *BMRESTRICT block, unsigned *pos) |
Find first bit set. More... | |
int | bm::avx2_cmpge_u32 (__m256i vect8, unsigned value) |
Experimental (test) function to do SIMD vector search (lower bound) in sorted, growing array. More... | |
int | bm::avx2_cmpge_u16 (__m256i vect16, unsigned short value) |
Experimental (test) function to do SIMD vector search in sorted, growing array. More... | |
unsigned | bm::avx2_gap_bfind (const unsigned short *BMRESTRICT buf, unsigned pos, unsigned *BMRESTRICT is_set) |
Hybrid binary search, starts as binary, then switches to scan. More... | |
unsigned | bm::avx2_gap_test (const unsigned short *BMRESTRICT buf, unsigned pos) |
Hybrid binary search, starts as binary, then switches to scan. More... | |
unsigned | bm::avx2_lower_bound_scan_u32 (const unsigned *BMRESTRICT arr, unsigned target, unsigned from, unsigned to) |
lower bound (great or equal) linear scan in ascending order sorted array More... | |
unsigned | bm::avx2_bit_to_gap (gap_word_t *BMRESTRICT dest, const unsigned *BMRESTRICT block, unsigned dest_len) |
Convert bit block to GAP block. More... | |
void | bm::avx2_bit_block_xor (bm::word_t *target_block, const bm::word_t *block, const bm::word_t *xor_block, bm::id64_t digest) |
Build partial XOR product of 2 bit-blocks using digest mask. More... | |
void | bm::avx2_bit_block_xor_2way (bm::word_t *target_block, const bm::word_t *xor_block, bm::id64_t digest) BMNOEXCEPT |
Build partial XOR product of 2 bit-blocks using digest mask. More... | |
Processor specific optimizations for AVX2 instructions (internals)
|
inline |
AND array elements against another array (unaligned) dst &= *src.
Definition at line 729 of file bmavx2.h.
Referenced by bm::decoder::get_32_AND().
|
inline |
AND array elements against another array dst &= *src.
Definition at line 496 of file bmavx2.h.
References BMRESTRICT, and bm::set_block_size.
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
AVX2 calculate number of bit changes from 0 to 1
Definition at line 1870 of file bmavx2.h.
References BM_ALIGN32, BM_ALIGN32ATTR, BM_AVX2_BIT_COUNT, and BM_AVX2_POPCNT_PROLOG.
|
inline |
AVX2 calculate number of bit changes from 0 to 1 and bitcount
Definition at line 2038 of file bmavx2.h.
References BM_ALIGN32, BM_ALIGN32ATTR, BM_AVX2_BIT_COUNT, BM_AVX2_POPCNT_PROLOG, and bm::set_block_size.
|
inline |
AVX2 calculate number of bit changes from 0 to 1 from a XOR product
Definition at line 1941 of file bmavx2.h.
References BM_ALIGN32, BM_ALIGN32ATTR, BM_AVX2_BIT_COUNT, BM_AVX2_POPCNT_PROLOG, and BMRESTRICT.
|
inline |
Calculate population count based on digest.
Definition at line 232 of file bmavx2.h.
References BM_AVX2_BIT_COUNT, BM_AVX2_POPCNT_PROLOG, bm::bmi_blsi_u64(), bm::bmi_bslr_u64(), BMRESTRICT, and bm::set_block_digest_wave_size.
|
inline |
Build partial XOR product of 2 bit-blocks using digest mask.
target_block | - target := block ^ xor_block |
block | - arg1 |
xor_block | - arg2 |
digest | - mask for each block wave to XOR (1) or just copy (0) |
Definition at line 3109 of file bmavx2.h.
References bm::block_waves, and bm::set_block_digest_wave_size.
|
inline |
Build partial XOR product of 2 bit-blocks using digest mask.
target_block | - target ^= xor_block |
xor_block | - arg1 |
digest | - mask for each block wave to XOR (1) |
Definition at line 3160 of file bmavx2.h.
References bm::bmi_blsi_u64(), bm::bmi_bslr_u64(), and bm::set_block_digest_wave_size.
|
inline |
AVX2 Harley-Seal popcount The algorithm is based on the paper "Faster Population Counts using AVX2 Instructions" by Daniel Lemire, Nathan Kurz and Wojciech Mula (23 Nov 2016).
Definition at line 156 of file bmavx2.h.
References BM_AVX2_BIT_COUNT, BM_AVX2_POPCNT_PROLOG, and BM_CSA256.
|
inline |
AND bit count for two aligned bit-blocks.
Definition at line 290 of file bmavx2.h.
References BM_AVX2_BIT_COUNT, and BM_AVX2_POPCNT_PROLOG.
|
inline |
AND NOT bit count for two aligned bit-blocks.
Definition at line 413 of file bmavx2.h.
References BM_AVX2_BIT_COUNT, and BM_AVX2_POPCNT_PROLOG.
|
inline |
XOR bit count for two aligned bit-blocks.
Definition at line 368 of file bmavx2.h.
References BM_AVX2_BIT_COUNT, and BM_AVX2_POPCNT_PROLOG.
|
inline |
Find first bit set.
Definition at line 2181 of file bmavx2.h.
References BM_ALIGN32, BM_ALIGN32ATTR, BM_ASSERT, and bm::set_block_size.
|
inline |
Find first bit which is different between two bit-blocks.
Definition at line 2123 of file bmavx2.h.
References BM_ALIGN32, BM_ALIGN32ATTR, BM_ASSERT, and bm::set_block_size.
|
inline |
Convert bit block to GAP block.
Definition at line 2995 of file bmavx2.h.
References BM_ASSERT, BMRESTRICT, and bm::set_block_size.
|
inline |
|
inline |
|
inline |
|
inline |
AVX2 block copy dst = *src.
Definition at line 1290 of file bmavx2.h.
References BMRESTRICT, and bm::set_block_size.
|
inline |
AVX2 block copy (unaligned SRC) dst = *src.
Definition at line 1332 of file bmavx2.h.
References BMRESTRICT, and bm::set_block_size.
|
inline |
Hybrid binary search, starts as binary, then switches to scan.
NOTE: AVX code uses _mm256_subs_epu16 - saturated substraction which gives 0 if A-B=0 if A < B (not negative a value).
buf | - GAP buffer pointer. |
pos | - index of the element. |
is_set | - output. GAP value (0 or 1). |
Definition at line 2724 of file bmavx2.h.
References BM_ASSERT, and bm::gap_max_bits.
Referenced by bm::avx2_gap_test().
|
inline |
Hybrid binary search, starts as binary, then switches to scan.
Definition at line 2823 of file bmavx2.h.
References bm::avx2_gap_bfind().
Referenced by bm::gap_test_unr().
|
inline |
Invert bit-block dst = ~*dst or dst ^= *dst.
Definition at line 1464 of file bmavx2.h.
References BMRESTRICT, and bm::set_block_size.
|
inline |
check if block is all one bits
Definition at line 1554 of file bmavx2.h.
References BMRESTRICT, and bm::set_block_size.
|
inline |
check if block is all zero bits
Definition at line 1495 of file bmavx2.h.
References BMRESTRICT, and bm::set_block_size.
|
inline |
|
inline |
lower bound (great or equal) linear scan in ascending order sorted array
Definition at line 2836 of file bmavx2.h.
References BMRESTRICT.
|
inline |
OR array elements against another unaligned array dst |= *src.
Definition at line 840 of file bmavx2.h.
Referenced by bm::decoder::get_32_OR().
|
inline |
OR array elements against another array dst |= *src.
Definition at line 787 of file bmavx2.h.
References BMRESTRICT, and bm::set_block_size.
|
inline |
OR 2 arrays and copy to the destination dst = *src1 | src2.
Definition at line 893 of file bmavx2.h.
References BMRESTRICT, and bm::set_block_size.
|
inline |
OR array elements against another 2 arrays dst |= *src1 | src2.
Definition at line 939 of file bmavx2.h.
References BMRESTRICT, and bm::set_block_size.
|
inline |
OR array elements against another 4 arrays dst |= *src1 | src2.
Definition at line 991 of file bmavx2.h.
References BMRESTRICT, and bm::set_block_size.
BMFORCEINLINE void bm::avx2_set_block | ( | __m256i *BMRESTRICT | dst, |
bm::word_t | value | ||
) |
AVX2 block memset dst = value.
Definition at line 1264 of file bmavx2.h.
References BMRESTRICT, and bm::set_block_size.
|
inline |
|
inline |
|
inline |
fused block shift right by 1 plus AND
Definition at line 1746 of file bmavx2.h.
References BM_ASSERT, and bm::set_block_digest_wave_size.
|
inline |
AVX2 block copy dst = *src.
Definition at line 1376 of file bmavx2.h.
References BMRESTRICT, and bm::set_block_size.
|
inline |
AVX2 block copy (unaligned SRC) dst = *src.
Definition at line 1418 of file bmavx2.h.
References BMRESTRICT, and bm::set_block_size.
|
inline |
AND-NOT (SUB) array elements against another array dst &= ~*src.
Definition at line 1156 of file bmavx2.h.
References BMRESTRICT, and bm::set_block_size.
|
inline |
|
inline |
BMFORCEINLINE bool bm::avx2_test_all_eq_wave2 | ( | const void * | ptr0, |
const void * | ptr1 | ||
) |
check if 2 wave of pointers are all the same (NULL or FULL)
Definition at line 1616 of file bmavx2.h.
Referenced by bm::bvector< Alloc >::combine_operation_or().
BMFORCEINLINE bool bm::avx2_test_all_one_wave | ( | const void * | ptr | ) |
BMFORCEINLINE bool bm::avx2_test_all_zero_wave | ( | const void * | ptr | ) |
check if wave of pointers is all NULL
Definition at line 1592 of file bmavx2.h.
Referenced by bm::bvector< Alloc >::combine_operation_and(), bm::bvector< Alloc >::combine_operation_sub(), bm::find_not_null_ptr(), bm::for_each_bit(), and bm::for_each_nzblock().
BMFORCEINLINE bool bm::avx2_test_all_zero_wave2 | ( | const void * | ptr0, |
const void * | ptr1 | ||
) |
check if 2 wave of pointers are all NULL
Definition at line 1603 of file bmavx2.h.
Referenced by bm::bvector< Alloc >::combine_operation_xor().
|
inline |
|
inline |
XOR block against another dst ^= *src.
Definition at line 1060 of file bmavx2.h.
References BMRESTRICT, and bm::set_block_size.
|
inline |
3 operand XOR dst = *src1 ^ src2
Definition at line 1106 of file bmavx2.h.
References BMRESTRICT, and bm::set_block_size.