BitMagic-C++
bmutil.h
Go to the documentation of this file.
1#ifndef BMUTIL__H__INCLUDED__
2#define BMUTIL__H__INCLUDED__
3/*
4Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5
6Licensed under the Apache License, Version 2.0 (the "License");
7you may not use this file except in compliance with the License.
8You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12Unless required by applicable law or agreed to in writing, software
13distributed under the License is distributed on an "AS IS" BASIS,
14WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15See the License for the specific language governing permissions and
16limitations under the License.
17
18For more information please visit: http://bitmagic.io
19*/
20
21/*! \file bmutil.h
22 \brief Bit manipulation primitives (internal)
23*/
24
25
26#include "bmdef.h"
27#include "bmconst.h"
28
29#if defined(__arm64__) || defined(__arm__)
30//#include "sse2neon.h"
31#else
32 #if defined(_M_AMD64) || defined(_M_X64)
33 #include <intrin.h>
34 #elif defined(__x86_64__)
35 #include <x86intrin.h>
36 #endif
37#endif
38
39#ifdef __GNUG__
40#pragma GCC diagnostic push
41#pragma GCC diagnostic ignored "-Wconversion"
42#endif
43
44#ifdef _MSC_VER
45#pragma warning( push )
46#pragma warning( disable : 4146)
47#endif
48
49
50namespace bm
51{
52
53 /**
54 bit-block array wrapped into union for correct interpretation of
55 32-bit vs 64-bit access vs SIMD
56 @internal
57 */
59 {
61 {
64
65#if defined(BMAVX512OPT)
67#endif
68#if defined(BMAVX2OPT)
70#endif
71#if defined(BMSSE2OPT) || defined(BMSSE42OPT)
73#endif
74 } b_;
75
76 operator bm::word_t*() { return &(b_.w32[0]); }
77 operator const bm::word_t*() const { return &(b_.w32[0]); }
78 explicit operator bm::id64_t*() { return &b_.w64[0]; }
79 explicit operator const bm::id64_t*() const { return &b_.w64[0]; }
80#ifdef BMAVX512OPT
81 explicit operator __m512i*() { return &b_.w512[0]; }
82 explicit operator const __m512i*() const { return &b_.w512[0]; }
83#endif
84#ifdef BMAVX2OPT
85 explicit operator __m256i*() { return &b_.w256[0]; }
86 explicit operator const __m256i*() const { return &b_.w256[0]; }
87#endif
88#if defined(BMSSE2OPT) || defined(BMSSE42OPT)
89 explicit operator __m128i*() { return &b_.w128[0]; }
90 explicit operator const __m128i*() const { return &b_.w128[0]; }
91#endif
92
93 const bm::word_t* begin() const { return (b_.w32 + 0); }
94 bm::word_t* begin() { return (b_.w32 + 0); }
95 const bm::word_t* end() const { return (b_.w32 + bm::set_block_size); }
96 bm::word_t* end() { return (b_.w32 + bm::set_block_size); }
97 };
98
99/**
100 Get minimum of 2 values
101*/
102template<typename T>
104{
105 return v1 < v2 ? v1 : v2;
106}
107
108/**
109 \brief ad-hoc conditional expressions
110 \internal
111*/
112template <bool b> struct conditional
113{
114 static bool test() { return true; }
115};
116template <> struct conditional<false>
117{
118 static bool test() { return false; }
119};
120
121
122/**
123 Fast loop-less function to find LOG2
124*/
125template<typename T>
127{
128 unsigned int l = 0;
129
130 if (x >= 1<<16) { x = (T)(x >> 16); l |= 16; }
131 if (x >= 1<<8) { x = (T)(x >> 8); l |= 8; }
132 if (x >= 1<<4) { x = (T)(x >> 4); l |= 4; }
133 if (x >= 1<<2) { x = (T)(x >> 2); l |= 2; }
134 if (x >= 1<<1) l |=1;
135 return (T)l;
136}
137
138template<>
141{
142 unsigned int l = 0;
143 if (x >= 1<<8) { x = (bm::gap_word_t)(x >> 8); l |= 8; }
144 if (x >= 1<<4) { x = (bm::gap_word_t)(x >> 4); l |= 4; }
145 if (x >= 1<<2) { x = (bm::gap_word_t)(x >> 2); l |= 2; }
146 if (x >= 1<<1) l |=1;
147 return (bm::gap_word_t)l;
148}
149
150/**
151 Mini auto-pointer for internal memory management
152 @internal
153*/
154template<class T>
156{
157public:
158 ptr_guard(T* p) BMNOEXCEPT : ptr_(p) {}
159 ~ptr_guard() { delete ptr_; }
160private:
161 ptr_guard(const ptr_guard<T>& p);
162 ptr_guard& operator=(const ptr_guard<T>& p);
163private:
164 T* ptr_;
165};
166
167/**
168 Portable LZCNT with (uses minimal LUT)
169 @ingroup bitfunc
170 @internal
171*/
173unsigned count_leading_zeros(unsigned x) BMNOEXCEPT
174{
175 unsigned n =
176 (x >= (1U << 16)) ?
177 ((x >= (1U << 24)) ? ((x >= (1 << 28)) ? 28u : 24u) : ((x >= (1U << 20)) ? 20u : 16u))
178 :
179 ((x >= (1U << 8)) ? ((x >= (1U << 12)) ? 12u : 8u) : ((x >= (1U << 4)) ? 4u : 0u));
180 return unsigned(bm::lzcnt_table<true>::_lut[x >> n]) - n;
181}
182
183/**
184 Portable TZCNT with (uses 37-LUT)
185 @ingroup bitfunc
186 @internal
187*/
190{
191 // (v & -v) isolates the last set bit
192 return unsigned(bm::tzcnt_table<true>::_lut[(-v & v) % 37]);
193}
194
195/**
196 Lookup table based integer LOG2
197*/
198template<typename T>
200{
201 unsigned l = 0;
202 if (x & 0xffff0000)
203 {
204 l += 16; x >>= 16;
205 }
206
207 if (x & 0xff00)
208 {
209 l += 8; x >>= 8;
210 }
211 return l + T(bm::first_bit_table<true>::_idx[x]);
212}
213
214/**
215 Lookup table based short integer LOG2
216*/
217template<>
219{
220 if (x & 0xff00)
221 {
222 x = bm::gap_word_t(x >> 8u);
224 }
226}
227
228
229// if we are running on x86 CPU we can use inline ASM
230
231#if defined(BM_x86) && !(defined(__arm__) || defined(__aarch64__))
232#ifdef __GNUG__
233
235unsigned bsf_asm32(unsigned int v) BMNOEXCEPT
236{
237 unsigned r;
238 asm volatile(" bsfl %1, %0": "=r"(r): "rm"(v) );
239 return r;
240}
241
243unsigned bsr_asm32(unsigned int v) BMNOEXCEPT
244{
245 unsigned r;
246 asm volatile(" bsrl %1, %0": "=r"(r): "rm"(v) );
247 return r;
248}
249
250#endif // __GNUG__
251
252#ifdef _MSC_VER
253
254#if defined(_M_AMD64) || defined(_M_X64) // inline assembly not supported
255
257unsigned int bsr_asm32(unsigned int value) BMNOEXCEPT
258{
259 unsigned long r;
260 _BitScanReverse(&r, value);
261 return r;
262}
263
265unsigned int bsf_asm32(unsigned int value) BMNOEXCEPT
266{
267 unsigned long r;
268 _BitScanForward(&r, value);
269 return r;
270}
271
272#else
273
275unsigned int bsr_asm32(unsigned int value) BMNOEXCEPT
276{
277 __asm bsr eax, value
278}
279
281unsigned int bsf_asm32(unsigned int value) BMNOEXCEPT
282{
283 __asm bsf eax, value
284}
285
286#endif
287
288#endif // _MSC_VER
289
290#endif // BM_x86
291
292
293// From:
294// http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.37.8562
295//
296template<typename T>
298{
299 return
300 DeBruijn_bit_position<true>::_multiply[(((v & -v) * 0x077CB531U)) >> 27];
301}
302
304unsigned bit_scan_reverse32(unsigned w) BMNOEXCEPT
305{
306 BM_ASSERT(w);
307#if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
308 return (unsigned) (31 - __builtin_clz(w));
309#else
310# if defined(BM_x86) && (defined(__GNUG__) || defined(_MSC_VER))
311 return bm::bsr_asm32(w);
312# else
313 return bm::ilog2_LUT<unsigned int>(w);
314# endif
315#endif
316}
317
319unsigned bit_scan_forward32(unsigned w) BMNOEXCEPT
320{
321 BM_ASSERT(w);
322#if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
323 return (unsigned) __builtin_ctz(w);
324#else
325# if defined(BM_x86) && (defined(__GNUG__) || defined(_MSC_VER))
326 return bm::bsf_asm32(w);
327# else
328 return bit_scan_fwd(w);
329# endif
330#endif
331}
332
333
335unsigned long long bmi_bslr_u64(unsigned long long w) BMNOEXCEPT
336{
337#if defined(BMAVX2OPT) || defined (BMAVX512OPT)
338 return _blsr_u64(w);
339#else
340 return w & (w - 1);
341#endif
342}
343
345unsigned long long bmi_blsi_u64(unsigned long long w)
346{
347#if defined(BMAVX2OPT) || defined (BMAVX512OPT)
348 return _blsi_u64(w);
349#else
350 return w & (-w);
351#endif
352}
353
354/// 32-bit bit-scan reverse
355inline
357{
358 BM_ASSERT(w);
359#if defined(BMAVX2OPT) || defined (BMAVX512OPT)
360 return (unsigned)_lzcnt_u32(w);
361#else
362 #if defined(BM_USE_GCC_BUILD) || defined(__GNUG__)
363 return (unsigned) __builtin_clz(w);
364 #else
365 return bm::count_leading_zeros(w); // portable
366 #endif
367#endif
368}
369
370
371/// 64-bit bit-scan reverse
372inline
374{
375 BM_ASSERT(w);
376#if defined(BMAVX2OPT) || defined (BMAVX512OPT)
377 return (unsigned)_lzcnt_u64(w);
378#else
379 #if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
380 return (unsigned) __builtin_clzll(w);
381 #else
382 unsigned z;
383 unsigned w1 = unsigned(w >> 32);
384 if (!w1)
385 {
386 z = 32;
387 w1 = unsigned(w);
388 z += 31 - bm::bit_scan_reverse32(w1);
389 }
390 else
391 {
392 z = 31 - bm::bit_scan_reverse32(w1);
393 }
394 return z;
395 #endif
396#endif
397}
398
399/// 32-bit bit-scan fwd
400inline
402{
403 BM_ASSERT(w);
404
405#if defined(BMAVX2OPT) || defined (BMAVX512OPT)
406 return (unsigned)_tzcnt_u32(w);
407#else
408 #if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
409 return (unsigned) __builtin_ctz(w);
410 #else
411 return bm::bit_scan_forward32(w);
412 #endif
413#endif
414}
415
416
417/// 64-bit bit-scan fwd
418inline
420{
421 BM_ASSERT(w);
422
423#if defined(BMAVX2OPT) || defined (BMAVX512OPT)
424 return (unsigned)_tzcnt_u64(w);
425#else
426 #if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
427 return (unsigned) __builtin_ctzll(w);
428 #else
429 unsigned z;
430 unsigned w1 = unsigned(w);
431 if (!w1)
432 {
433 z = 32;
434 w1 = unsigned(w >> 32);
435 z += bm::bit_scan_forward32(w1);
436 }
437 else
438 {
440 }
441 return z;
442 #endif
443#endif
444}
445
446
447
448/*!
449 Returns BSR value
450 @ingroup bitfunc
451*/
452template <class T>
454{
455 BM_ASSERT(value);
456
457 if (bm::conditional<sizeof(T)==8>::test())
458 {
459 #if defined(BM_USE_GCC_BUILD) || (defined(__GNUG__) && (defined(__arm__) || defined(__aarch64__)))
460 return (unsigned) (63 - __builtin_clzll(value));
461 #else
462 bm::id64_t v8 = value;
463 v8 >>= 32;
464 unsigned v = (unsigned)v8;
465 if (v)
466 {
468 return v + 32;
469 }
470 #endif
471 }
472 return bm::bit_scan_reverse32((unsigned)value);
473}
474
475/*! \brief and functor
476 \internal
477 */
479{
480 static
481 BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
482 { return v1 & v2; }
483};
484/*! \brief xor functor
485 \internal
486 */
488{
489 static
490 BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
491 { return v1 ^ v2; }
492};
493/*! \brief or functor
494 \internal
495 */
497{
498 static
499 BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
500 { return v1 | v2; }
501};
502/*! \brief sub functor
503 \internal
504 */
506{
507 static
508 BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
509 { return v1 & ~~v2; }
510};
511
513unsigned mask_r_u32(unsigned nbit) BMNOEXCEPT
514{
515 BM_ASSERT(nbit < 32);
516 unsigned m = (~0u << nbit);
518 return m;
519}
520
522unsigned mask_l_u32(unsigned nbit) BMNOEXCEPT
523{
524 BM_ASSERT(nbit < 32);
525 unsigned m = ~0u >> (31 - nbit);
527 return m;
528}
529
530/// XOR swap two variables
531///
532/// @internal
533template<typename W>
535{
536 BM_ASSERT(&x != &y);
537 x ^= y; y ^= x; x ^= y;
538}
539
540
541#ifdef __GNUG__
542#pragma GCC diagnostic pop
543#endif
544#ifdef _MSC_VER
545#pragma warning( pop )
546#endif
547
548/**
549 Сompute mask of bytes presense in 64-bit word
550
551 @param w - [in] input 64-bit word
552 @return mask with 8 bits
553 @internal
554 */
555inline
556unsigned compute_h64_mask(unsigned long long w)
557{
558 unsigned h_mask = 0;
559 for (unsigned i = 0; w && (i < 8); ++i, w >>= 8)
560 {
561 if ((unsigned char) w)
562 h_mask |= (1u<<i);
563 } // for
564 return h_mask;
565}
566
567
568/*!
569 Returns bit count
570 @ingroup bitfunc
571*/
574{
575#if defined(BMSSE42OPT) || defined(BMAVX2OPT) || defined(BMAVX512OPT)
576 return bm::id_t(_mm_popcnt_u32(w));
577#else
578 #if defined(BM_USE_GCC_BUILD)
579 return (bm::id_t)__builtin_popcount(w);
580 #else
581 return
582 bm::bit_count_table<true>::_count[(unsigned char)(w)] +
583 bm::bit_count_table<true>::_count[(unsigned char)((w) >> 8)] +
584 bm::bit_count_table<true>::_count[(unsigned char)((w) >> 16)] +
585 bm::bit_count_table<true>::_count[(unsigned char)((w) >> 24)];
586 #endif
587#endif
588}
589
590
591/*!
592 Function calculates number of 1 bits in 64-bit word.
593 @ingroup bitfunc
594*/
597{
598#if defined(BMSSE42OPT) || defined(BMAVX2OPT) || defined(BMAVX512OPT)
599 #if defined(BM64_SSE4) || defined(BM64_AVX2) || defined(BM64_AVX512)
600 return unsigned(_mm_popcnt_u64(x));
601 #else // 32-bit
602 return _mm_popcnt_u32(x >> 32) + _mm_popcnt_u32((unsigned)x);
603 #endif
604#else
605 #if defined(BM_USE_GCC_BUILD) || defined(__arm64__)
606 return (unsigned)__builtin_popcountll(x);
607 #else
608 #if (defined(__arm__)) // 32-bit
609 return bm::word_bitcount(x >> 32) + bm::word_bitcount((unsigned)x);
610 #else
611 x = x - ((x >> 1) & 0x5555555555555555);
612 x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333);
613 x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0F;
614 x = x + (x >> 8);
615 x = x + (x >> 16);
616 x = x + (x >> 32);
617 return x & 0xFF;
618 #endif
619 #endif
620#endif
621}
622
623/**
624 Check pointer alignment
625 @internal
626 */
627template< typename T >
628bool is_aligned(T* p)
629{
630#if defined (BM_ALLOC_ALIGN)
631 return !(reinterpret_cast<unsigned int*>(p) % BM_ALLOC_ALIGN);
632#else
633 (void)p;
634 return true;
635#endif
636}
637
638
639} // bm
640
641#endif
#define BM_ALLOC_ALIGN
Definition: bmalloc.h:35
Constants, lookup tables and typedefs.
Definitions(internal)
#define BMNOEXCEPT
Definition: bmdef.h:82
#define BMFORCEINLINE
Definition: bmdef.h:213
#define BM_ASSERT
Definition: bmdef.h:139
#define BMNOEXCEPT2
Definition: bmdef.h:108
#define BM_VECT_ALIGN
Definition: bmdef.h:320
Mini auto-pointer for internal memory management.
Definition: bmutil.h:156
ptr_guard(T *p) BMNOEXCEPT
Definition: bmutil.h:158
BMFORCEINLINE bm::id_t word_bitcount(bm::id_t w) BMNOEXCEPT
Definition: bmutil.h:573
BMFORCEINLINE unsigned count_leading_zeros(unsigned x) BMNOEXCEPT
Portable LZCNT with (uses minimal LUT)
Definition: bmutil.h:173
BMFORCEINLINE unsigned word_bitcount64(bm::id64_t x) BMNOEXCEPT
Definition: bmutil.h:596
unsigned bit_scan_reverse(T value) BMNOEXCEPT
Definition: bmutil.h:453
BMFORCEINLINE unsigned count_trailing_zeros(unsigned v) BMNOEXCEPT
Portable TZCNT with (uses 37-LUT)
Definition: bmutil.h:189
Definition: bm.h:78
unsigned int word_t
Definition: bmconst.h:39
unsigned count_leading_zeros_u32(unsigned w) BMNOEXCEPT
32-bit bit-scan reverse
Definition: bmutil.h:356
BMFORCEINLINE unsigned bit_scan_forward32(unsigned w) BMNOEXCEPT
Definition: bmutil.h:319
BMFORCEINLINE T ilog2(T x) BMNOEXCEPT
Fast loop-less function to find LOG2.
Definition: bmutil.h:126
BMFORCEINLINE unsigned mask_l_u32(unsigned nbit) BMNOEXCEPT
Definition: bmutil.h:522
BMFORCEINLINE T bit_scan_fwd(T v) BMNOEXCEPT
Definition: bmutil.h:297
BMFORCEINLINE unsigned bit_scan_reverse32(unsigned w) BMNOEXCEPT
Definition: bmutil.h:304
BMFORCEINLINE T ilog2_LUT(T x) BMNOEXCEPT
Lookup table based integer LOG2.
Definition: bmutil.h:199
BMFORCEINLINE void xor_swap(W &x, W &y) BMNOEXCEPT
XOR swap two variables.
Definition: bmutil.h:534
BMFORCEINLINE T min_value(T v1, T v2) BMNOEXCEPT
Get minimum of 2 values.
Definition: bmutil.h:103
unsigned count_leading_zeros_u64(bm::id64_t w) BMNOEXCEPT
64-bit bit-scan reverse
Definition: bmutil.h:373
const unsigned set_block_size
Definition: bmconst.h:55
unsigned long long int id64_t
Definition: bmconst.h:35
unsigned int id_t
Definition: bmconst.h:38
unsigned compute_h64_mask(unsigned long long w)
Сompute mask of bytes presense in 64-bit word.
Definition: bmutil.h:556
BMFORCEINLINE unsigned mask_r_u32(unsigned nbit) BMNOEXCEPT
Definition: bmutil.h:513
BMFORCEINLINE unsigned long long bmi_bslr_u64(unsigned long long w) BMNOEXCEPT
Definition: bmutil.h:335
bool is_aligned(T *p)
Check pointer alignment.
Definition: bmutil.h:628
unsigned short gap_word_t
Definition: bmconst.h:78
unsigned count_trailing_zeros_u64(bm::id64_t w) BMNOEXCEPT
64-bit bit-scan fwd
Definition: bmutil.h:419
BMFORCEINLINE unsigned long long bmi_blsi_u64(unsigned long long w)
Definition: bmutil.h:345
unsigned count_trailing_zeros_u32(unsigned w) BMNOEXCEPT
32-bit bit-scan fwd
Definition: bmutil.h:401
DeBruijn majic table.
Definition: bmconst.h:261
and functor
Definition: bmutil.h:479
static BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
Definition: bmutil.h:481
bit-block array wrapped into union for correct interpretation of 32-bit vs 64-bit access vs SIMD
Definition: bmutil.h:59
union bm::bit_block_t::bunion_t b_
const bm::word_t * end() const
Definition: bmutil.h:95
bm::word_t * begin()
Definition: bmutil.h:94
const bm::word_t * begin() const
Definition: bmutil.h:93
bm::word_t * end()
Definition: bmutil.h:96
Structure to aid in counting bits table contains count of bits in 0-255 diapason of numbers.
Definition: bmconst.h:308
Structure keeps all-left/right ON bits masks.
Definition: bmconst.h:363
static bool test()
Definition: bmutil.h:118
ad-hoc conditional expressions
Definition: bmutil.h:113
static bool test()
Definition: bmutil.h:114
Structure keeps index of first right 1 bit for every byte.
Definition: bmconst.h:276
Structure for LZCNT constants (4-bit)
Definition: bmconst.h:330
or functor
Definition: bmutil.h:497
static BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
Definition: bmutil.h:499
sub functor
Definition: bmutil.h:506
static BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
Definition: bmutil.h:508
Structure for TZCNT constants.
Definition: bmconst.h:345
xor functor
Definition: bmutil.h:488
static BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
Definition: bmutil.h:490
__m128i BM_VECT_ALIGN w128[bm::set_block_size/4] BM_VECT_ALIGN_ATTR
Definition: bmutil.h:72
__m256i BM_VECT_ALIGN w256[bm::set_block_size/8] BM_VECT_ALIGN_ATTR
Definition: bmutil.h:69
bm::id64_t BM_VECT_ALIGN w64[bm::set_block_size/2] BM_VECT_ALIGN_ATTR
Definition: bmutil.h:63
bm::word_t BM_VECT_ALIGN w32[bm::set_block_size] BM_VECT_ALIGN_ATTR
Definition: bmutil.h:62