1#ifndef BMRANDOM__H__INCLUDED__
2#define BMRANDOM__H__INCLUDED__
25#ifndef BM__H__INCLUDED__
28# error missing include (bm.h or bm64.h)
76 typename BV::blocks_manager_type blocks_manager_type;
82 void simple_pick(BV& bv_out,
88 void get_subset(BV& bv_out,
105 unsigned bit_list_size,
108 unsigned compute_take_count(
unsigned bc,
136 delete [] sub_block_;
144 if (sample_count == 0)
151 bv_in.build_rs_index(&rsi_, &bv_nb_);
154 if (in_count <= sample_count)
160 float pick_ratio = float(sample_count) / float(in_count);
161 if (pick_ratio < 0.054f)
164 bool b = bv_in.find_range(first, last);
167 simple_pick(bv_out, bv_in, sample_count, first, last);
171 if (sample_count > in_count/2)
175 size_type delta_count = in_count - sample_count;
177 get_subset(tmp_bv, bv_in, in_count, delta_count);
178 bv_out.bit_sub(bv_in, tmp_bv);
181 get_subset(bv_out, bv_in, in_count, sample_count);
187 size_type sample_count,
193 std::random_device rd;
195 std::mt19937_64 mt_rand(rd());
197 std::mt19937 mt_rand(rd());
199 std::uniform_int_distribution<size_type> dist(first, last);
204 size_type ridx = dist(mt_rand);
206 BM_ASSERT(ridx >= first && ridx <= last);
208 bool b = bv_in.find(ridx, fidx);
213 bool is_set = bv_out.set_bit_conditional(fidx,
true,
false);
214 sample_count -= is_set;
219 b = bv_in.find(fidx, fidx);
224 is_set = bv_out.set_bit_conditional(fidx,
true,
false);
225 sample_count -= is_set;
233void random_subset<BV>::get_subset(BV& bv_out,
236 size_type sample_count)
239 bv_out.resize(bv_in.size());
241 const blocks_manager_type& bman_in = bv_in.get_blocks_manager();
242 blocks_manager_type& bman_out = bv_out.get_blocks_manager();
244 bm::word_t* tmp_block = bman_out.check_allocate_tempblock();
246 size_type first_nb, last_nb;
247 bool b = bv_nb_.find_range(first_nb, last_nb);
252 std::random_device rd;
254 std::mt19937_64 mt_rand(rd());
256 std::mt19937 mt_rand(rd());
258 std::uniform_int_distribution<size_type> dist_nb(first_nb, last_nb);
260 size_type curr_sample_count = sample_count;
261 for (
unsigned take_count = 0; curr_sample_count; curr_sample_count -= take_count)
266 size_type ridx = dist_nb(mt_rand);
267 BM_ASSERT(ridx >= first_nb && ridx <= last_nb);
269 b = bv_nb_.find(ridx, nb);
272 b = bv_nb_.find(first_nb, nb);
282 bv_nb_.clear_bit_no_check(nb);
286 unsigned bc = rsi_.count(nb);
288 take_count = compute_take_count(bc, in_count, sample_count);
289 if (take_count > curr_sample_count)
290 take_count = unsigned(curr_sample_count);
297 const bm::word_t* blk_src = bman_in.get_block(i0, j0);
301 bm::word_t* blk_out = bman_out.get_block_ptr(i0, j0);
305 blk_out = bman_out.deoptimize_block(nb);
309 blk_out = bman_out.get_allocator().alloc_bit_block();
310 bman_out.set_block(nb, blk_out);
312 if (take_count == bc)
337 get_random_array(blk_out, bit_list_, arr_len, take_count);
348 get_block_subset(blk_out, blk_src, take_count);
355unsigned random_subset<BV>::compute_take_count(
361 float block_percent = float(bc) / float(in_count);
362 float bits_to_take = float(sample_count) * block_percent;
363 bits_to_take += 0.99f;
364 unsigned to_take = unsigned(bits_to_take);
368 to_take = unsigned(sample_count);
375void random_subset<BV>::get_block_subset(
bm::word_t* blk_out,
379 for (
unsigned rounds = 0; take_count && rounds < 10; ++rounds)
388 if (blk_src[i] && (blk_out[i] != blk_src[i]))
390 new_count = process_word(blk_out, blk_src, i, take_count);
391 take_count -= new_count;
403 sub_block_[i] = blk_src[i] & ~~blk_out[i];
413 get_random_array(blk_out, bit_list_, arr_len, take_count);
418unsigned random_subset<BV>::process_word(
bm::word_t* blk_out,
423 unsigned new_bits, mask;
426 mask = unsigned(rand());
430 std::random_device rd;
432 std::mt19937_64 mt_rand(rd());
434 std::mt19937 mt_rand(rd());
436 unsigned src_rand = blk_src[nword] & mask;
437 new_bits = src_rand & ~~blk_out[nword];
443 if (new_count > take_count)
447 unsigned char blist[64];
450 std::shuffle(blist, blist + arr_size, mt_rand);
452 for (
unsigned j = 0; j < take_count; ++j)
454 value |= (1u << blist[j]);
457 new_count = take_count;
460 BM_ASSERT((new_bits & ~blk_src[nword]) == 0);
463 blk_out[nword] |= new_bits;
471void random_subset<BV>::get_random_array(
bm::word_t* blk_out,
473 unsigned bit_list_size,
476 std::random_device rd;
478 std::mt19937_64 mt_rand(rd());
480 std::mt19937 mt_rand(rd());
483 for (
unsigned i = 0; i < count; ++i)
Bit manipulation primitives (internal)
rs_index< allocator_type > rs_index_type
void sample(BV &bv_out, const BV &bv_in, size_type sample_count)
Get random subset of input vector.
BMFORCEINLINE bm::id_t word_bitcount(bm::id_t w) BMNOEXCEPT
void bit_block_copy(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Bitblock copy operation.
unsigned short bitscan(V w, B *bits) BMNOEXCEPT
Templated Bitscan with dynamic dispatch for best type.
BMFORCEINLINE void set_bit(unsigned *dest, unsigned bitpos) BMNOEXCEPT
Set 1 bit in a block.
unsigned bit_block_convert_to_arr(T *BMRESTRICT dest, const unsigned *BMRESTRICT src, bool inverted) BMNOEXCEPT
Convert bit block into an array of ints corresponding to 1 bits.
void bit_block_set(bm::word_t *BMRESTRICT dst, bm::word_t value) BMNOEXCEPT
Bitblock memset operation.
unsigned bit_list(T w, B *bits) BMNOEXCEPT
Unpacks word into list of ON bit indexes.
D gap_convert_to_arr(D *BMRESTRICT dest, const T *BMRESTRICT buf, unsigned dest_len, bool invert=false) BMNOEXCEPT
Convert gap block into array of ints corresponding to 1 bits.
void gap_convert_to_bitset(unsigned *BMRESTRICT dest, const T *BMRESTRICT buf, unsigned len=0) BMNOEXCEPT
GAP block to bitblock conversion.
BMFORCEINLINE void get_block_coord(BI_TYPE nb, unsigned &i, unsigned &j) BMNOEXCEPT
Recalc linear bvector block index into 2D matrix coordinates.
const unsigned set_block_size
unsigned short gap_word_t
const unsigned gap_max_bits