1#ifndef BMUTIL__H__INCLUDED__
2#define BMUTIL__H__INCLUDED__
29#if defined(__arm64__) || defined(__arm__)
32 #if defined(_M_AMD64) || defined(_M_X64)
34 #elif defined(__x86_64__)
35 #include <x86intrin.h>
40#pragma GCC diagnostic push
41#pragma GCC diagnostic ignored "-Wconversion"
45#pragma warning( push )
46#pragma warning( disable : 4146)
65#if defined(BMAVX512OPT)
71#if defined(BMSSE2OPT) || defined(BMSSE42OPT)
81 explicit operator __m512i*() {
return &
b_.w512[0]; }
82 explicit operator const __m512i*()
const {
return &
b_.w512[0]; }
85 explicit operator __m256i*() {
return &
b_.w256[0]; }
86 explicit operator const __m256i*()
const {
return &
b_.w256[0]; }
88#if defined(BMSSE2OPT) || defined(BMSSE42OPT)
89 explicit operator __m128i*() {
return &
b_.w128[0]; }
90 explicit operator const __m128i*()
const {
return &
b_.w128[0]; }
105 return v1 < v2 ? v1 : v2;
114 static bool test() {
return true; }
118 static bool test() {
return false; }
130 if (x >= 1<<16) { x = (T)(x >> 16); l |= 16; }
131 if (x >= 1<<8) { x = (T)(x >> 8); l |= 8; }
132 if (x >= 1<<4) { x = (T)(x >> 4); l |= 4; }
133 if (x >= 1<<2) { x = (T)(x >> 2); l |= 2; }
134 if (x >= 1<<1) l |=1;
146 if (x >= 1<<1) l |=1;
177 ((x >= (1U << 24)) ? ((x >= (1 << 28)) ? 28u : 24u) : ((x >= (1U << 20)) ? 20u : 16u))
179 ((x >= (1U << 8)) ? ((x >= (1U << 12)) ? 12u : 8u) : ((x >= (1U << 4)) ? 4u : 0u));
231#if defined(BM_x86) && !(defined(__arm__) || defined(__aarch64__))
238 asm volatile(
" bsfl %1, %0":
"=r"(r):
"rm"(v) );
246 asm volatile(
" bsrl %1, %0":
"=r"(r):
"rm"(v) );
254#if defined(_M_AMD64) || defined(_M_X64)
257unsigned int bsr_asm32(
unsigned int value)
BMNOEXCEPT
260 _BitScanReverse(&r, value);
265unsigned int bsf_asm32(
unsigned int value)
BMNOEXCEPT
268 _BitScanForward(&r, value);
275unsigned int bsr_asm32(
unsigned int value)
BMNOEXCEPT
281unsigned int bsf_asm32(
unsigned int value)
BMNOEXCEPT
307#if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
308 return (
unsigned) (31 - __builtin_clz(w));
310# if defined(BM_x86) && (defined(__GNUG__) || defined(_MSC_VER))
311 return bm::bsr_asm32(w);
313 return bm::ilog2_LUT<unsigned int>(w);
322#if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
323 return (
unsigned) __builtin_ctz(w);
325# if defined(BM_x86) && (defined(__GNUG__) || defined(_MSC_VER))
326 return bm::bsf_asm32(w);
337#if defined(BMAVX2OPT) || defined (BMAVX512OPT)
347#if defined(BMAVX2OPT) || defined (BMAVX512OPT)
359#if defined(BMAVX2OPT) || defined (BMAVX512OPT)
360 return (
unsigned)_lzcnt_u32(w);
362 #if defined(BM_USE_GCC_BUILD) || defined(__GNUG__)
363 return (
unsigned) __builtin_clz(w);
376#if defined(BMAVX2OPT) || defined (BMAVX512OPT)
377 return (
unsigned)_lzcnt_u64(w);
379 #if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
380 return (
unsigned) __builtin_clzll(w);
383 unsigned w1 = unsigned(w >> 32);
405#if defined(BMAVX2OPT) || defined (BMAVX512OPT)
406 return (
unsigned)_tzcnt_u32(w);
408 #if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
409 return (
unsigned) __builtin_ctz(w);
423#if defined(BMAVX2OPT) || defined (BMAVX512OPT)
424 return (
unsigned)_tzcnt_u64(w);
426 #if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
427 return (
unsigned) __builtin_ctzll(w);
430 unsigned w1 = unsigned(w);
434 w1 = unsigned(w >> 32);
459 #if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
460 return (
unsigned) (63 - __builtin_clzll(value));
464 unsigned v = (unsigned)v8;
509 {
return v1 & ~~v2; }
516 unsigned m = (~0u << nbit);
525 unsigned m = ~0u >> (31 - nbit);
537 x ^= y; y ^= x; x ^= y;
542#pragma GCC diagnostic pop
545#pragma warning( pop )
559 for (
unsigned i = 0; w && (i < 8); ++i, w >>= 8)
561 if ((
unsigned char) w)
575#if defined(BMSSE42OPT) || defined(BMAVX2OPT) || defined(BMAVX512OPT)
578 #if defined(BM_USE_GCC_BUILD)
579 return (
bm::id_t)__builtin_popcount(w);
598#if defined(BMSSE42OPT) || defined(BMAVX2OPT) || defined(BMAVX512OPT)
599 #if defined(BM64_SSE4) || defined(BM64_AVX2) || defined(BM64_AVX512)
600 return unsigned(_mm_popcnt_u64(x));
602 return _mm_popcnt_u32(x >> 32) + _mm_popcnt_u32((
unsigned)x);
605 #if defined(BM_USE_GCC_BUILD) || defined(__arm64__)
606 return (
unsigned)__builtin_popcountll(x);
608 #if (defined(__arm__))
611 x = x - ((x >> 1) & 0x5555555555555555);
612 x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333);
613 x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0F;
627template<
typename T >
630#if defined (BM_ALLOC_ALIGN)
Constants, lookup tables and typedefs.
Mini auto-pointer for internal memory management.
ptr_guard(T *p) BMNOEXCEPT
BMFORCEINLINE bm::id_t word_bitcount(bm::id_t w) BMNOEXCEPT
BMFORCEINLINE unsigned count_leading_zeros(unsigned x) BMNOEXCEPT
Portable LZCNT with (uses minimal LUT)
BMFORCEINLINE unsigned word_bitcount64(bm::id64_t x) BMNOEXCEPT
unsigned bit_scan_reverse(T value) BMNOEXCEPT
BMFORCEINLINE unsigned count_trailing_zeros(unsigned v) BMNOEXCEPT
Portable TZCNT with (uses 37-LUT)
unsigned count_leading_zeros_u32(unsigned w) BMNOEXCEPT
32-bit bit-scan reverse
BMFORCEINLINE unsigned bit_scan_forward32(unsigned w) BMNOEXCEPT
BMFORCEINLINE T ilog2(T x) BMNOEXCEPT
Fast loop-less function to find LOG2.
BMFORCEINLINE unsigned mask_l_u32(unsigned nbit) BMNOEXCEPT
BMFORCEINLINE T bit_scan_fwd(T v) BMNOEXCEPT
BMFORCEINLINE unsigned bit_scan_reverse32(unsigned w) BMNOEXCEPT
BMFORCEINLINE T ilog2_LUT(T x) BMNOEXCEPT
Lookup table based integer LOG2.
BMFORCEINLINE void xor_swap(W &x, W &y) BMNOEXCEPT
XOR swap two variables.
BMFORCEINLINE T min_value(T v1, T v2) BMNOEXCEPT
Get minimum of 2 values.
unsigned count_leading_zeros_u64(bm::id64_t w) BMNOEXCEPT
64-bit bit-scan reverse
const unsigned set_block_size
unsigned long long int id64_t
unsigned compute_h64_mask(unsigned long long w)
Сompute mask of bytes presense in 64-bit word.
BMFORCEINLINE unsigned mask_r_u32(unsigned nbit) BMNOEXCEPT
BMFORCEINLINE unsigned long long bmi_bslr_u64(unsigned long long w) BMNOEXCEPT
bool is_aligned(T *p)
Check pointer alignment.
unsigned short gap_word_t
unsigned count_trailing_zeros_u64(bm::id64_t w) BMNOEXCEPT
64-bit bit-scan fwd
BMFORCEINLINE unsigned long long bmi_blsi_u64(unsigned long long w)
unsigned count_trailing_zeros_u32(unsigned w) BMNOEXCEPT
32-bit bit-scan fwd
static BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
bit-block array wrapped into union for correct interpretation of 32-bit vs 64-bit access vs SIMD
union bm::bit_block_t::bunion_t b_
const bm::word_t * end() const
const bm::word_t * begin() const
Structure to aid in counting bits table contains count of bits in 0-255 diapason of numbers.
Structure keeps all-left/right ON bits masks.
ad-hoc conditional expressions
Structure keeps index of first right 1 bit for every byte.
Structure for LZCNT constants (4-bit)
static BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
static BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
Structure for TZCNT constants.
static BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
__m128i BM_VECT_ALIGN w128[bm::set_block_size/4] BM_VECT_ALIGN_ATTR
__m256i BM_VECT_ALIGN w256[bm::set_block_size/8] BM_VECT_ALIGN_ATTR
bm::id64_t BM_VECT_ALIGN w64[bm::set_block_size/2] BM_VECT_ALIGN_ATTR
bm::word_t BM_VECT_ALIGN w32[bm::set_block_size] BM_VECT_ALIGN_ATTR