1#ifndef BMAGGREGATOR__H__INCLUDED__
2#define BMAGGREGATOR__H__INCLUDED__
26#ifndef BM__H__INCLUDED__
29# error missing include (bm.h or bm64.h)
222 template<
class Opt = bm::agg_run_options<> >
364 { compute_count_ = count_mode; count_ = 0; }
426 template<
class TPipe>
449 template<
typename BII>
531 template<
typename BII>
594 size_t src_sub_size);
659 const size_t* and_idx,
661 const size_t* sub_idx,
663 int* is_result_full);
678 bool init_clear =
true);
720 unsigned i,
unsigned j);
725 unsigned i,
unsigned j);
727 const size_t* src_idx,
729 unsigned i,
unsigned j);
733 unsigned i,
unsigned j,
const arena& ar);
776 unsigned k,
unsigned i,
unsigned j)
BMNOEXCEPT;
785 return new(p)
arena();
831 unsigned top_block_size_ = 0;
832 pipeline_bcache* bcache_ptr_ = 0;
835 bool range_set_ =
false;
864template<
typename Agg,
typename It>
869 int pipeline_size = 0;
870 for (It it = first; it != last; ++it, ++pipeline_size)
882 for (It it = first; it != last; ++it, ++w)
885 auto op_st = agg.get_operation_status();
886 if (op_st != Agg::op_done)
888 op_st = agg.run_step(i, j);
889 pipeline_size -= (op_st == Agg::op_done);
892 if (pipeline_size <= 0)
909 compute_count_(false),
944 ar_->reset_all_blocks();
945 operation_ = top_block_size_ = 0;
946 operation_status_ = op_undefined;
947 count_ = 0; bcache_ptr_ = 0; gap_cache_cnt_ = 0;
965 range_from_ = from; range_to_ = to;
988 return ag_.add(bv, agr_group);
997 combine_or(bv_target, ag_.arg_bv0.data(), ag_.arg_bv0.size());
1002template<
typename BV>
1008 combine_and_sub(bv_target,
1009 ag_.arg_bv0.data(), ag_.arg_bv0.size(),
1016template<
typename BV>
1020 return combine_and_sub(bv_target,
1021 ag_.arg_bv0.data(), ag_.arg_bv0.size(),
1022 ag_.arg_bv1.data(), ag_.arg_bv1.size(),
1028template<
typename BV>
1032 return combine_and_sub(bv_target,
1033 ag_.arg_bv0.data(), ag_.arg_bv0.size(),
1034 ag_.arg_bv1.data(), ag_.arg_bv1.size(),
1040template<
typename BV>
template<
typename BII>
1043 return combine_and_sub(bi,
1044 ag_.arg_bv0.data(), ag_.arg_bv0.size(),
1045 ag_.arg_bv1.data(), ag_.arg_bv1.size());
1051template<
typename BV>
1054 return find_first_and_sub(idx,
1055 ag_.arg_bv0.data(), ag_.arg_bv0.size(),
1056 ag_.arg_bv1.data(), ag_.arg_bv1.size());
1061template<
typename BV>
1066 ar_->reset_all_blocks();
1067 combine_shift_right_and(bv_target, ag_.arg_bv0.data(), ag_.arg_bv0.size(),
1073template<
typename BV>
1084 ar_->reset_or_blocks();
1085 unsigned top_blocks = resize_target(bv_target, bv_src, src_size);
1086 for (
unsigned i = 0; i < top_blocks; ++i)
1088 unsigned set_array_max =
1089 find_effective_sub_block_size(i, bv_src, src_size,
false);
1090 for (
unsigned j = 0; j < set_array_max; ++j)
1092 combine_or(i, j, bv_target, bv_src, src_size);
1099template<
typename BV>
1117 ar_->reset_and_blocks();
1118 unsigned top_blocks = resize_target(bv_target, bv_src, src_size);
1119 for (
unsigned i = 0; i < top_blocks; ++i)
1122 unsigned set_array_max =
1123 find_effective_sub_block_size(i, bv_src, src_size,
true);
1124 for (
unsigned j = 0; j < set_array_max; ++j)
1134template<
typename BV>
1141 bool global_found =
false;
1143 if (!bv_src_and || !src_and_size)
1151 unsigned top_blocks = resize_target(bv_target, bv_src_and, src_and_size);
1152 unsigned top_blocks2 = resize_target(bv_target, bv_src_sub, src_sub_size,
false);
1154 if (top_blocks2 > top_blocks)
1155 top_blocks = top_blocks2;
1157 for (
unsigned i = 0; i < top_blocks; ++i)
1159 const unsigned set_array_max =
1160 find_effective_sub_block_size(i, bv_src_and, src_and_size,
1161 bv_src_sub, src_sub_size);
1162 for (
unsigned j = 0; j < set_array_max; ++j)
1166 0, bv_src_and, src_and_size,
1167 0, bv_src_sub, src_sub_size,
1171 bman_target.check_alloc_top_subblock(i);
1174 bman_target.validate_top_full(i);
1180 bool found = digest;
1183 bman_target.opt_copy_bit_block(i, j, tb_ar_->tb1,
1188 global_found |= found;
1192 return global_found;
1197template<
typename BV>
template<
typename BII>
1202 bool global_found =
false;
1204 if (!bv_src_and || !src_and_size)
1207 unsigned top_blocks = 0;
1210 for (
unsigned i = 0; i < src_and_size; ++i)
1214 unsigned arg_top_blocks = bv->get_blocks_manager().top_block_size();
1215 if (arg_top_blocks > top_blocks)
1216 top_blocks = arg_top_blocks;
1218 for (
unsigned i = 0; i < src_sub_size; ++i)
1222 unsigned arg_top_blocks = bv->get_blocks_manager().top_block_size();
1223 if (arg_top_blocks > top_blocks)
1224 top_blocks = arg_top_blocks;
1228 for (
unsigned i = 0; i < top_blocks; ++i)
1230 const unsigned set_array_max =
1231 find_effective_sub_block_size(i, bv_src_and, src_and_size,
1232 bv_src_sub, src_sub_size);
1233 for (
unsigned j = 0; j < set_array_max; ++j)
1237 0, bv_src_and, src_and_size,
1238 0, bv_src_sub, src_sub_size,
1249 bool found = digest;
1250 global_found |= found;
1256 return global_found;
1264template<
typename BV>
template<
class TPipe>
1270 size_t pipe_size = pipe_args.size();
1276 bcache_ptr_ = &pipe.get_bcache();
1278 unsigned top_blocks = pipe.get_top_blocks();
1281 if (pipe.bv_or_target_)
1282 pipe.bv_or_target_->get_blocks_manager().reserve_top_blocks(top_blocks);
1284 unsigned i_from(0), j_from(0), i_to(0), j_to(0);
1295 size_t batch_size = pipe.get_options().batch_size;
1297 batch_size = pipe.compute_run_batch();
1299 for (
size_t batch_from(0), batch_to(0); batch_from < pipe_size;
1300 batch_from = batch_to)
1302 batch_to = batch_from + batch_size;
1303 if (batch_to > pipe_size)
1304 batch_to = pipe_size;
1307 for (
unsigned i = i_from; i < top_blocks; ++i)
1310 if constexpr(TPipe::options_type::is_masks())
1321 for (; j < sub_size; ++j)
1323 size_t p = batch_from;
1324 for (; p < batch_to; ++p)
1327 size_t src_and_size = ag->
arg_bv0.size();
1332 const size_t* bv_src_and_idx = ag->
arg_idx0.data();
1335 const size_t* bv_src_sub_idx = ag->
arg_idx1.data();
1336 size_t src_sub_size = ag->
arg_bv1.size();
1338 if constexpr (TPipe::options_type::is_compute_counts())
1341 if (pipe.count_res_vect_[p] >= pipe.search_count_limit_)
1348 bv_src_and, src_and_size,
1350 bv_src_sub, src_sub_size,
1352 if (digest || is_res_full)
1354 if (pipe.bv_or_target_)
1357 pipe.bv_or_target_->get_blocks_manager();
1361 bman.check_alloc_top_subblock(i);
1363 pipe.bv_or_target_->combine_operation_block_or(
1364 i, j, blk, arg_blk);
1366 if constexpr (!TPipe::options_type::is_make_results())
1368 if constexpr (TPipe::options_type::is_compute_counts())
1373 pipe.count_res_vect_[p]+=
1380 pipe.get_bv_res_vector();
1386 bv_targets_vect[p] = bv_target;
1388 bv_target->get_blocks_manager();
1390 bman.reserve_top_blocks(top_blocks);
1393 bv_target->get_blocks_manager();
1396 bman.check_alloc_top_subblock(i);
1397 bman.set_block_ptr(i, j,
1400 bman.validate_top_full(i);
1401 if constexpr (TPipe::options_type::is_compute_counts())
1406 if constexpr (TPipe::options_type::is_compute_counts())
1407 pipe.count_res_vect_[p] +=
1409 bman.opt_copy_bit_block(i, j, tb_ar_->tb1,
1416 if (pipe.bv_or_target_ && p == pipe_size)
1419 pipe.bv_or_target_->get_blocks_manager();
1420 if (
bm::word_t* blk = bman.get_block_ptr(i, j))
1421 bman.optimize_block(i, j, blk,
1433template<
typename BV>
1438 if (!bv_src_and || !src_and_size)
1441 unsigned top_blocks = max_top_blocks(bv_src_and, src_and_size);
1442 unsigned top_blocks2 = max_top_blocks(bv_src_sub, src_sub_size);
1444 if (top_blocks2 > top_blocks)
1445 top_blocks = top_blocks2;
1456 if (nblock_from == nblock_to)
1459 unsigned i = top_from;
1462 0, bv_src_and, src_and_size,
1463 0, bv_src_sub, src_sub_size,
1468 unsigned block_bit_idx = 0;
1477 if (top_to < top_blocks)
1478 top_blocks = top_to+1;
1485 for (
unsigned i = top_from; i < top_blocks; ++i)
1502 set_array_max = find_effective_sub_block_size(i, bv_src_and, src_and_size,
true);
1507 unsigned set_array_max2 =
1508 find_effective_sub_block_size(i, bv_src_sub, src_sub_size,
false);
1511 if (set_array_max2 < set_array_max)
1512 set_array_max = set_array_max2;
1515 for (; j < set_array_max; ++j)
1519 0, bv_src_and, src_and_size,
1520 0, bv_src_sub, src_sub_size,
1524 unsigned block_bit_idx = 0;
1538template<
typename BV>
1552 for (
size_t k = 0; k < src_size; ++k)
1557 bv->get_blocks_manager();
1558 const bm::word_t*
const* blk_blk_arg = bman_arg.get_topblock(i);
1561 if (top_null_as_zero)
1587template<
typename BV>
1594 unsigned set_array_max = find_effective_sub_block_size(i, bv_src1, src_size1,
true);
1595 if (set_array_max && src_size2)
1597 unsigned set_array_max2 =
1598 find_effective_sub_block_size(i, bv_src2, src_size2,
false);
1599 if (set_array_max2 > set_array_max)
1600 set_array_max = set_array_max2;
1602 return set_array_max;
1608template<
typename BV>
1615 bv_target.get_blocks_manager();
1617 ar_->reset_or_blocks();
1618 bm::word_t* blk = sort_input_blocks_or(0, bv_src, src_size, i, j);
1624 bman_target.check_alloc_top_subblock(i);
1625 bman_target.set_block_ptr(i, j, blk);
1627 bman_target.validate_top_full(i);
1631 size_t arg_blk_count = ar_->v_arg_or_blk.size();
1632 size_t arg_blk_gap_count = ar_->v_arg_or_blk_gap.size();
1633 if (arg_blk_count || arg_blk_gap_count)
1635 bool all_one = process_bit_blocks_or(bman_target, i, j, *ar_);
1638 if (arg_blk_gap_count)
1639 process_gap_blocks_or(*ar_);
1641 bman_target.opt_copy_bit_block(i, j, tb_ar_->tb1,
1642 opt_mode_, tb_ar_->tb_opt);
1650template<
typename BV>
1654 size_t src_and_size)
1658 bm::word_t* blk = sort_input_blocks_and(0, bv_src, src_and_size, i, j);
1663 size_t arg_blk_and_count = ar_->v_arg_and_blk.size();
1664 size_t arg_blk_and_gap_count = ar_->v_arg_and_blk_gap.size();
1665 if (arg_blk_and_count || arg_blk_and_gap_count)
1667 if (!arg_blk_and_gap_count && (arg_blk_and_count == 1))
1673 bman_target.check_alloc_top_subblock(i);
1674 bman_target.set_block_ptr(i, j, blk);
1676 bman_target.validate_top_full(i);
1683 digest = process_bit_blocks_and(*ar_, digest);
1689 if (arg_blk_and_gap_count)
1691 digest = process_gap_blocks_and(*ar_, digest);
1696 bman_target.opt_copy_bit_block(i, j, tb_ar_->tb1,
1697 opt_mode_, tb_ar_->tb_opt);
1704template<
typename BV>
1707 unsigned i,
unsigned j,
1708 const size_t* and_idx,
1710 const size_t* sub_idx,
1712 int* is_result_full)
1717 *is_result_full = 0;
1718 bm::word_t* blk = sort_input_blocks_and(and_idx, bv_src_and, src_and_size, i, j);
1724 size_t arg_blk_and_count = ar_->v_arg_and_blk.size();
1725 size_t arg_blk_and_gap_count = ar_->v_arg_and_blk_gap.size();
1726 if (!(arg_blk_and_count | arg_blk_and_gap_count))
1729 ar_->reset_or_blocks();
1732 blk = sort_input_blocks_or(sub_idx, bv_src_sub, src_sub_size, i, j);
1739 if (!arg_blk_and_gap_count && (arg_blk_and_count == 1))
1743 *is_result_full = 1;
1754 digest = process_bit_blocks_and(*ar_, digest);
1757 digest = process_bit_blocks_sub(*ar_, digest);
1763 digest = process_gap_blocks_and(*ar_, digest);
1767 digest = process_gap_blocks_sub(*ar_, digest);
1774template<
typename BV>
1779 for (
size_t k = 0; k < arg_blk_gap_count; ++k)
1785template<
typename BV>
1792 bool single_bit_found;
1793 unsigned single_bit_idx;
1794 for (
size_t k = 0; k < arg_blk_gap_count; ++k)
1806 if (single_bit_found)
1808 for (++k; k < arg_blk_gap_count; ++k)
1823template<
typename BV>
1830 bool single_bit_found;
1831 unsigned single_bit_idx;
1832 for (
size_t k = 0; k < arg_blk_gap_count; ++k)
1845 if (single_bit_found)
1847 for (++k; k < arg_blk_gap_count; ++k)
1862template<
typename BV>
1867 for (
size_t i = 0; i < arg_blk_gap_count && b; ++i)
1874template<
typename BV>
1879 for (
size_t i = 0; i < arg_blk_gap_count; ++i)
1891template<
typename BV>
1893 unsigned i,
unsigned j,
1909 size_t unroll_factor, len, len_unr;
1912 len = arg_blk_count - k;
1913 len_unr = len - (len % unroll_factor);
1914 for( ;k < len_unr; k+=unroll_factor)
1929 len = arg_blk_count - k;
1930 len_unr = len - (len % unroll_factor);
1931 for( ;k < len_unr; k+=unroll_factor)
1943 for (; k < arg_blk_count; ++k)
1960template<
typename BV>
1973 if (range_set_ && (nb_from == nb_to))
1983 switch (arg_blk_count)
1998 size_t unroll_factor, len, len_unr;
1999 unsigned single_bit_idx;
2002 len = arg_blk_count - k;
2003 len_unr = len - (len % unroll_factor);
2004 for (; k < len_unr; k += unroll_factor)
2008 args[k], args[k + 1],
2009 args[k + 2], args[k + 3],
2020 for (++k; k < arg_blk_count; ++k)
2022 if (!(mask & args[k][nword]))
2033 for (; k < arg_blk_count; ++k)
2046template<
typename BV>
2053 unsigned single_bit_idx;
2057 for (; k < arg_blk_count; ++k)
2075 for (++k; k < arg_blk_count; ++k)
2077 if (mask & args[k][nword])
2092template<
typename BV>
2100 if (bman_target.is_init())
2101 bman_target.deinit_tree();
2104 unsigned top_blocks = bman_target.top_block_size();
2106 bool need_realloc =
false;
2109 for (
unsigned i = 0; i < src_size; ++i)
2114 bv->get_blocks_manager();
2115 unsigned arg_top_blocks = bman_arg.top_block_size();
2116 if (arg_top_blocks > top_blocks)
2118 need_realloc =
true;
2119 top_blocks = arg_top_blocks;
2122 if (arg_size > size)
2127 bman_target.reserve_top_blocks(top_blocks);
2128 if (!bman_target.is_init())
2129 bman_target.init_tree();
2130 if (size > bv_target.size())
2131 bv_target.resize(size);
2138template<
typename BV>
2143 unsigned top_blocks = 1;
2146 for (
unsigned i = 0; i < src_size; ++i)
2152 unsigned arg_top_blocks = bman_arg.top_block_size();
2153 if (arg_top_blocks > top_blocks)
2154 top_blocks = arg_top_blocks;
2161template<
typename BV>
2163 const size_t* src_idx,
2166 unsigned i,
unsigned j)
2169 ar_->v_arg_or_blk.reset(); ar_->v_arg_or_blk_gap.reset();
2170 for (
size_t k = 0; k < src_size; ++k)
2173 bv_src[k]->get_blocks_manager().get_block_ptr(i, j);
2180 BM_ASSERT(bv == bcache_ptr_->bv_inp_vect_[src_idx[k]]);
2181 bm::word_t* bit_blk = cache_gap_block(arg_blk, src_idx, k, i, j);
2184 ar_->v_arg_or_blk.push_back(bit_blk);
2189 ar_->v_arg_or_blk_gap.push_back(
BMGAP_PTR(arg_blk));
2197 ar_->v_arg_or_blk.push_back(arg_blk);
2205template<
typename BV>
2207 const size_t* src_idx,
2210 unsigned i,
unsigned j)
2212 ar_->v_arg_tmp_blk.resize_no_copy(src_size);
2213 auto blocks_arr = ar_->v_arg_tmp_blk.data();
2214 for (
size_t k = 0; k < src_size; ++k)
2217 bv_src[k]->get_blocks_manager().get_block_ptr(i, j);
2222 bool has_full_blk =
false;
2224 auto& bit_v = ar_->v_arg_and_blk;
2225 auto& gap_v = ar_->v_arg_and_blk_gap;
2226 bit_v.resize_no_copy(src_size);
2227 gap_v.resize_no_copy(src_size);
2228 size_t bc(0), gc(0);
2230 auto bit_arr = bit_v.data();
2231 auto gap_arr = gap_v.data();
2232 for (
size_t k = 0; k < src_size; ++k)
2243 size_t bv_idx = src_idx[k];
2244 auto cnt = bcache_ptr_->cnt_vect_[bv_idx];
2245 if (cnt && len > 1024 && bc < (src_size / 4))
2247 bm::word_t* bit_blk = cache_gap_block(arg_blk, src_idx, k, i, j);
2250 bit_arr[bc++] = bit_blk;
2256 gap_arr[gc++] = gap_blk;
2262 has_full_blk =
true;
2265 bit_arr[bc++] = arg_blk;
2269 bit_v.resize_no_copy(bc);
2270 gap_v.resize_no_copy(gc);
2272 if (has_full_blk && (!bc && !gc))
2280template<
typename BV>
2282 const size_t* src_idx,
2284 unsigned i,
unsigned j)
2289 size_t bv_idx = src_idx[k];
2290 auto cnt = bcache_ptr_->cnt_vect_[bv_idx];
2293 bm::word_t* bit_blk = bcache_ptr_->blk_vect_[bv_idx];
2298 pair_ij.
first = i+1;
2299 bcache_ptr_->blk_vect_[bv_idx] = bit_blk;
2306 bcache_ptr_->blk_ij_vect_[bv_idx] = pair_ij;
2316template<
typename BV>
2330 for (
unsigned i = 1; i < src_size; ++i)
2334 bv_target.bit_or(*bv);
2340template<
typename BV>
2355 for (
unsigned i = 1; i < src_size; ++i)
2359 bv_target.bit_and(*bv);
2365template<
typename BV>
2368 size_t src_and_size,
2370 size_t src_sub_size)
2375 combine_and_horizontal(bv_target, bv_src_and, src_and_size);
2377 for (
unsigned i = 0; i < src_sub_size; ++i)
2387template<
typename BV>
2392 top_block_size_ = resize_target(bv_target, bv_src, src_size);
2395 ar_->carry_overs.resize(src_size);
2396 for (
unsigned i = 0; i < src_size; ++i)
2397 ar_->carry_overs[i] = 0;
2402template<
typename BV>
2413 prepare_shift_right_and(bv_target, bv_src_and, src_and_size);
2417 if (i > top_block_size_)
2419 if (!any_carry_overs(ar_->carry_overs.data(), src_and_size))
2427 combine_shift_right_and(i, j, bv_target, bv_src_and, src_and_size);
2435 return bool(count_);
2437 return bv_target.any();
2442template<
typename BV>
2448 bm::word_t* blk = temp_blk_ ? temp_blk_ : tb_ar_->tb1;
2449 unsigned char* carry_overs = ar_->carry_overs.data();
2453 bool blk_zero =
false;
2458 const bm::word_t* arg_blk = bman_arg.get_block(i, j);
2479 for (
unsigned k = 1; k < src_size; ++k)
2481 unsigned carry_over = carry_overs[k];
2482 if (!digest && !carry_over)
2487 blk_zero = !blk_zero;
2489 const bm::word_t* arg_blk = get_arg_block(bv_src, k, i, j);
2490 carry_overs[k] = (
unsigned char)
2491 process_shift_right_and(blk, arg_blk, digest, carry_over);
2492 BM_ASSERT(carry_overs[k] == 0 || carry_overs[k] == 1);
2512 bman_target.opt_copy_bit_block(i, j, blk, opt_mode_, tb_ar_->tb_opt);
2521template<
typename BV>
2528 BM_ASSERT(carry_over == 1 || carry_over == 0);
2540 blk[0] = carry_over;
2561 blk[0] = carry_over & arg_blk[0];
2582template<
typename BV>
2585 unsigned k,
unsigned i,
unsigned j)
BMNOEXCEPT
2587 return bv_src[k]->get_blocks_manager().get_block(i, j);
2592template<
typename BV>
2597 unsigned acc = carry_overs[0];
2598 for (
size_t i = 1; i < co_size; ++i)
2599 acc |= carry_overs[i];
2605template<
typename BV>
2611 temp_blk_ = temp_block;
2615 case BM_NOT_DEFINED:
2617 case BM_SHIFT_R_AND:
2618 prepare_shift_right_and(*bv_target, ag_.arg_bv0.data(), ag_.arg_bv0.size());
2619 operation_status_ = op_prepared;
2628template<
typename BV>
2632 BM_ASSERT(operation_status_ == op_prepared || operation_status_ == op_in_progress);
2637 case BM_NOT_DEFINED:
2640 case BM_SHIFT_R_AND:
2642 if (i > top_block_size_)
2644 if (!this->any_carry_overs(ar_->carry_overs.data(), ag_.arg_bv0.size()))
2646 operation_status_ = op_done;
2647 return operation_status_;
2651 this->combine_shift_right_and(i, j, *bv_target_,
2652 ag_.arg_bv0.data(), ag_.arg_bv0.size());
2653 operation_status_ = op_in_progress;
2661 return operation_status_;
2669template<
typename BV>
template<
class Opt>
2672 size_t sz = arg_vect_.size();
2674 for (
size_t i = 0; i < sz; ++i)
2675 free_arg_group(arr[i]);
2676 sz = bv_res_vect_.size();
2678 for (
size_t i = 0; i < sz; ++i)
2683 sz = bcache_.blk_vect_.size();
2684 bm::word_t** blk_arr = bcache_.blk_vect_.data();
2685 for (
size_t i = 0; i < sz; ++i)
2691template<
typename BV>
template<
class Opt>
2697 if constexpr (Opt::is_make_results())
2700 size_t sz = arg_vect_.size();
2701 bv_res_vect_.resize(sz);
2703 for (
size_t i = 0; i < sz; ++i)
2706 if constexpr (Opt::is_compute_counts())
2708 size_t sz = arg_vect_.size();
2709 count_res_vect_.resize(sz);
2710 size_type* cnt_arr = count_res_vect_.data();
2711 ::memset(cnt_arr, 0, sz *
sizeof(cnt_arr[0]));
2715 size_t pipe_size = pipe_args.size();
2717 for (
size_t p = 0; p < pipe_size; ++p)
2720 complete_arg_group(ag);
2723 size_t src_and_size = ag->
arg_bv0.size();
2724 unsigned top_blocks1 = max_top_blocks(bv_src_and, src_and_size);
2725 if (top_blocks1 > top_blocks_)
2726 top_blocks_ = top_blocks1;
2729 size_t src_sub_size = ag->
arg_bv1.size();
2730 unsigned top_blocks2 = max_top_blocks(bv_src_sub, src_sub_size);
2731 if (top_blocks2 > top_blocks_)
2732 top_blocks_ = top_blocks2;
2735 is_complete_ =
true;
2737 BM_ASSERT(bcache_.bv_inp_vect_.size() == bcache_.cnt_vect_.size());
2738 BM_ASSERT(bcache_.bv_inp_vect_.size() == bcache_.blk_vect_.size());
2739 BM_ASSERT(bcache_.bv_inp_vect_.size() == bcache_.blk_ij_vect_.size());
2744template<
typename BV>
template<
class Opt>
2758template<
typename BV>
template<
class Opt>
2760 index_vector_type& idx_vect,
2761 const bvector_type_const_ptr* bv_src,
size_t size)
2765 for (
size_t k = 0; k < size; ++k)
2767 bool found(
false);
size_t bv_idx(0);
2771 const bvector_type** bv_arr = bcache_.bv_inp_vect_.data();
2773 bm::find_ptr((
void**)bv_arr, bcache_.bv_inp_vect_.size(),
2777 bcache_.cnt_vect_[bv_idx]++;
2780 bv_idx = bcache_.bv_inp_vect_.size();
2781 bcache_.bv_inp_vect_.push_back(bv);
2782 bcache_.cnt_vect_.push_back(0);
2783 bcache_.blk_vect_.push_back(0);
2787 idx_vect.push_back(bv_idx);
2795template<
typename BV>
template<
class Opt>
2796typename aggregator<BV>::arg_groups*
2801 arg_vect_.push_back(arg);
2807template<
typename BV>
template<
class Opt>
2810 const size_t cache_size = 256 * 1024;
2815 const float bv_struct_overhead = (64 * 8);
2816 const float cached_vect = float(cache_size) / float(block_size + bv_struct_overhead);
2818 size_t bv_count = unique_vectors();
2819 size_t args_total = arg_vect_.size();
2820 if ((bv_count < cached_vect) || (args_total < 2))
2823 size_t avg_vect_per_group = total_vect_ / args_total;
2825 const float reuse_coeff = 0.7f;
2826 float f_batch_size =
2827 (1+reuse_coeff)*(
float(avg_vect_per_group) / float(cached_vect) + 0.99f);
2828 size_t batch_size = size_t(f_batch_size);
2838template<
typename BV>
#define BM_DECLARE_TEMP_BLOCK(x)
#define BM_ASSERT_THROW(x, xerrcode)
#define FULL_BLOCK_FAKE_ADDR
#define FULL_BLOCK_REAL_ADDR
Bit manipulation primitives (internal)
Pipeline vector for running a group of aggregation operations on a family of vectors.
bool is_complete_
ready to run state flag
bool is_complete() const BMNOEXCEPT
return true if pipeline is ready for execution (complete)
size_type size() const BMNOEXCEPT
Return size() of pileine.
size_t total_vect_
total number of vector mentions in all groups
const arg_vector_type & get_args_vector() const BMNOEXCEPT
Return argument vector used for pipeline execution.
bvect_vector_type & get_bv_res_vector() BMNOEXCEPT
Return results vector used for pipeline execution.
void set_or_target(bvector_type *bv_or) BMNOEXCEPT
Attach OR (aggregator vector).
size_t unique_vectors() const BMNOEXCEPT
Return number of unique vectors in the pipeline (after complete())
pipeline_bcache & get_bcache() BMNOEXCEPT
bvector_type * bv_or_target_
OR target bit-bector ptr.
bv_count_vector_type & get_bv_count_vector() BMNOEXCEPT
Return results vector count used for pipeline execution.
unsigned top_blocks_
top-level structure size, max of all bvectors
bv_count_vector_type count_res_vect_
results (counts)
unsigned get_top_blocks() const BMNOEXCEPT
Return number of top blocks after complete.
arg_groups * add()
Add new arguments group.
size_type search_count_limit_
search limit by count
pipeline(const pipeline &)=delete
pipeline & operator=(const pipeline &)=delete
void complete()
Prepare pipeline for the execution (resize and init internal structures) Once complete,...
pipeline_bcache bcache_
blocks cache structure
arg_vector_type arg_vect_
input arg. groups
run_options options_
execution parameters
run_options & options() BMNOEXCEPT
Set pipeline run options.
const bv_vector_type & get_all_input_vect() const BMNOEXCEPT
void set_search_count_limit(size_type limit) BMNOEXCEPT
Set search limit for results.
const count_vector_type & get_all_input_cnt_vect() const BMNOEXCEPT
const run_options & get_options() const BMNOEXCEPT
Get pipeline run options.
size_t compute_run_batch() const BMNOEXCEPT
Function looks at the pipeline to apply euristics to suggest optimal run_batch parameter.
bvect_vector_type bv_res_vect_
results (bit-vector ptrs)
Algorithms for fast aggregation of a group of bit-vectors.
BV::block_idx_type block_idx_type
void combine_or(unsigned i, unsigned j, bvector_type &bv_target, const bvector_type_const_ptr *bv_src, size_t src_size)
void set_compute_count(bool count_mode) BMNOEXCEPT
bm::word_t * cache_gap_block(const bm::word_t *arg_blk, const size_t *src_idx, size_t k, unsigned i, unsigned j)
digest_type combine_and_sub(unsigned i, unsigned j, const size_t *and_idx, const bvector_type_const_ptr *bv_src_and, size_t src_and_size, const size_t *sub_idx, const bvector_type_const_ptr *bv_src_sub, size_t src_sub_size, int *is_result_full)
bvector_type * check_create_target()
bool test_gap_blocks_sub(size_t block_count, unsigned bit_idx)
arg_groups * arg_groups_type_ptr
digest_type process_bit_blocks_sub(const arena &ar, digest_type digest)
bm::heap_vector< bm::pair< unsigned, unsigned >, allocator_type, true > block_ij_vector_type
void combine_and(unsigned i, unsigned j, bvector_type &bv_target, const bvector_type_const_ptr *bv_src, size_t src_size)
void combine_or(bvector_type &bv_target, const bvector_type_const_ptr *bv_src, size_t src_size)
Aggregate group of vectors using logical OR.
void reset_range_hint() BMNOEXCEPT
Reset range hint to false.
static unsigned process_shift_right_and(bm::word_t *BMRESTRICT blk, const bm::word_t *BMRESTRICT arg_blk, digest_type &BMRESTRICT digest, unsigned carry_over) BMNOEXCEPT
static unsigned find_effective_sub_block_size(unsigned i, const bvector_type_const_ptr *bv_src1, size_t src_size1, const bvector_type_const_ptr *bv_src2, size_t src_size2) BMNOEXCEPT
bm::heap_vector< bvector_type *, allocator_type, true > bvect_vector_type
bm::heap_vector< bvector_type_const_ptr, allocator_type, true > bv_vector_type
void combine_shift_right_and(bvector_type &bv_target)
Aggregate added group of vectors using SHIFT-RIGHT and logical AND Operation does NOT perform an expl...
bm::heap_vector< size_type, allocator_type, true > bv_count_vector_type
BV::allocator_type allocator_type
static void free_arg_group(arg_groups *arg)
operation_status run_step(unsigned i, unsigned j)
Run a step of current arrgegation operation.
void combine_and_sub(TPipe &pipe)
Run AND-SUB: AND (groups1) AND NOT ( OR(group0)) for a pipeline.
void stage(bm::word_t *temp_block)
Prepare operation, create internal resources, analyse dependencies.
bool combine_and_sub(bvector_type &bv_target)
Aggregate added group of vectors using fused logical AND-SUB Operation does NOT perform an explicit r...
bm::heap_vector< size_t, allocator_type, true > index_vector_type
void reset()
Reset aggregate groups, forget all attached vectors.
bvector_type::blocks_manager_type blocks_manager_type
void combine_or(bvector_type &bv_target)
Aggregate added group of vectors using logical OR Operation does NOT perform an explicit reset of arg...
bm::heap_vector< unsigned, allocator_type, true > count_vector_type
bool combine_and_sub(bvector_type &bv_target, bool any)
Aggregate added group of vectors using fused logical AND-SUB Operation does NOT perform an explicit r...
void combine_and(bvector_type &bv_target)
Aggregate added group of vectors using logical AND Operation does NOT perform an explicit reset of ar...
digest_type process_gap_blocks_sub(const arena &ar, digest_type digest)
void set_operation(int op_code) BMNOEXCEPT
Set operation code for the aggregator.
bm::heap_vector< const bm::word_t *, allocator_type, true > block_ptr_vector_type
bool combine_shift_right_and(bvector_type &bv_target, const bvector_type_const_ptr *bv_src_and, size_t src_and_size, bool any)
Fusion aggregate group of vectors using SHIFT right with AND.
bm::word_t * sort_input_blocks_or(const size_t *src_idx, const bvector_type_const_ptr *bv_src, size_t src_size, unsigned i, unsigned j)
const bvector_type * get_target() const BMNOEXCEPT
static arg_groups * construct_arg_group()
const bvector_type * bvector_type_const_ptr
static const bm::word_t * get_arg_block(const bvector_type_const_ptr *bv_src, unsigned k, unsigned i, unsigned j) BMNOEXCEPT
void set_range_hint(size_type from, size_type to) BMNOEXCEPT
Set search hint for the range, where results needs to be searched (experimental for internal use).
void set_optimization(typename bvector_type::optmode opt=bvector_type::opt_compress) BMNOEXCEPT
set on-the-fly bit-block compression By default aggregator does not try to optimize result,...
static unsigned find_effective_sub_block_size(unsigned i, const bvector_type_const_ptr *bv_src, size_t src_size, bool top_null_as_zero) BMNOEXCEPT
bool find_first_and_sub(size_type &idx)
Aggregate added group of vectors using fused logical AND-SUB, find the first match.
size_t add(const bvector_type *bv, unsigned agr_group=0)
Attach source bit-vector to a argument group (0 or 1).
bool combine_shift_right_and(unsigned i, unsigned j, bvector_type &bv_target, const bvector_type_const_ptr *bv_src, size_t src_size)
operation
Codes for aggregation operations which can be pipelined for efficient execution.
int get_operation() const BMNOEXCEPT
Get current operation code.
bm::heap_vector< unsigned char, allocator_type, true > uchar_vector_type
static void free_arena(arena *ar) BMNOEXCEPT
bool combine_and_sub(BII bi, const bvector_type_const_ptr *bv_src_and, size_t src_and_size, const bvector_type_const_ptr *bv_src_sub, size_t src_sub_size)
void combine_and_horizontal(bvector_type &bv_target, const bvector_type_const_ptr *bv_src, size_t src_size)
Horizontal AND aggregation (potentially slower) method.
bm::heap_vector< bm::word_t *, allocator_type, true > blockptr_vector_type
digest_type process_gap_blocks_and(const arena &ar, digest_type digest)
bool test_gap_blocks_and(size_t block_count, unsigned bit_idx)
bool combine_and_sub_bi(BII bi)
Aggregate added group of vectors using fused logical AND-SUB.
bool find_first_and_sub(size_type &idx, const bvector_type_const_ptr *bv_src_and, size_t src_and_size, const bvector_type_const_ptr *bv_src_sub, size_t src_sub_size)
void prepare_shift_right_and(bvector_type &bv_target, const bvector_type_const_ptr *bv_src, size_t src_size)
void process_gap_blocks_or(const arena &ar)
static unsigned resize_target(bvector_type &bv_target, const bvector_type_const_ptr *bv_src, size_t src_size, bool init_clear=true)
bvector_type::allocator_type::allocator_pool_type allocator_pool_type
static arena * construct_arena()
static unsigned max_top_blocks(const bvector_type_const_ptr *bv_src, size_t src_size) BMNOEXCEPT
bm::id64_t get_cache_gap_hits() const BMNOEXCEPT
bm::heap_vector< arg_groups_type_ptr, allocator_type, true > arg_vector_type
bm::word_t * sort_input_blocks_and(const size_t *src_idx, const bvector_type_const_ptr *bv_src, size_t src_size, unsigned i, unsigned j)
bool combine_and_sub(bvector_type &bv_target, const bvector_type_const_ptr *bv_src_and, size_t src_and_size, const bvector_type_const_ptr *bv_src_sub, size_t src_sub_size, bool any)
Fusion aggregate group of vectors using logical AND MINUS another set.
bm::word_t * get_temp_block() BMNOEXCEPT
static bool any_carry_overs(const unsigned char *carry_overs, size_t co_size) BMNOEXCEPT
bm::heap_vector< const bm::gap_word_t *, allocator_type, true > gap_block_ptr_vector_type
digest_type process_bit_blocks_and(const arena &ar, digest_type digest)
void combine_or_horizontal(bvector_type &bv_target, const bvector_type_const_ptr *bv_src, size_t src_size)
Horizontal OR aggregation (potentially slower) method.
void combine_and_sub_horizontal(bvector_type &bv_target, const bvector_type_const_ptr *bv_src_and, size_t src_and_size, const bvector_type_const_ptr *bv_src_sub, size_t src_sub_size)
Horizontal AND-SUB aggregation (potentially slower) method.
void combine_and(bvector_type &bv_target, const bvector_type_const_ptr *bv_src, size_t src_size)
Aggregate group of vectors using logical AND.
operation_status get_operation_status() const
bool process_bit_blocks_or(blocks_manager_type &bman_target, unsigned i, unsigned j, const arena &ar)
Bitvector Bit-vector container with runtime compression of bits.
optmode
Optimization mode Every next level means additional checks (better compression vs time)
@ opt_compress
compress blocks when possible (GAP/prefix sum)
allocator_type::allocator_pool_type allocator_pool_type
blocks_manager< Alloc > blocks_manager_type
blocks_manager_type::block_idx_type block_idx_type
bm::id64_t bit_block_and_5way(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src0, const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2, const bm::word_t *BMRESTRICT src3, bm::id64_t digest) BMNOEXCEPT
digest based bit-block AND 5-way
void bit_block_copy(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Bitblock copy operation.
void block_init_digest0(bm::word_t *const block, bm::id64_t digest) BMNOEXCEPT
Init block with 000111000 pattren based on digest.
bm::id64_t bit_block_and(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Plain bitblock AND operation. Function does not analyse availability of source and destination blocks...
bm::id_t bit_block_count(const bm::word_t *block) BMNOEXCEPT
Bitcount for bit block.
int for_each_bit_blk(const bm::word_t *block, SIZE_TYPE offset, Func &bit_functor)
for-each visitor, calls a visitor functor for each 1 bit group
bool bit_block_or_5way(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2, const bm::word_t *BMRESTRICT src3, const bm::word_t *BMRESTRICT src4) BMNOEXCEPT
5 way (target, source1, source2) bitblock OR operation. Function does not analyse availability of sou...
bool bit_block_shift_r1_and_unr(bm::word_t *BMRESTRICT block, bm::word_t co_flag, const bm::word_t *BMRESTRICT mask_block, bm::id64_t *BMRESTRICT digest) BMNOEXCEPT
Right bit-shift bitblock by 1 bit (reference) + AND.
BMFORCEINLINE unsigned word_bitcount64(bm::id64_t x) BMNOEXCEPT
bm::id64_t calc_block_digest0(const bm::word_t *const block) BMNOEXCEPT
Compute digest for 64 non-zero areas.
bm::id64_t bit_block_and_2way(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2, bm::id64_t digest) BMNOEXCEPT
digest based bit-block AND
bool bit_find_first(const bm::word_t *BMRESTRICT block, unsigned *BMRESTRICT pos) BMNOEXCEPT
BIT block find the first set bit.
bool bit_block_or_3way(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2) BMNOEXCEPT
3 way (target | source1 | source2) bitblock OR operation. Function does not analyse availability of s...
void bit_block_set(bm::word_t *BMRESTRICT dst, bm::word_t value) BMNOEXCEPT
Bitblock memset operation.
BMFORCEINLINE bm::id64_t digest_mask(unsigned from, unsigned to) BMNOEXCEPT
Compute digest mask for [from..to] positions.
bool bit_block_or(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Plain bitblock OR operation. Function does not analyse availability of source and destination blocks.
bool bit_is_all_zero(const bm::word_t *BMRESTRICT start) BMNOEXCEPT
Returns "true" if all bits in the block are 0.
bool bit_find_first_if_1(const bm::word_t *BMRESTRICT block, unsigned *BMRESTRICT first, bm::id64_t digest) BMNOEXCEPT
BIT block find the first set bit if only 1 bit is set.
bm::id64_t update_block_digest0(const bm::word_t *const block, bm::id64_t digest) BMNOEXCEPT
Compute digest for 64 non-zero areas based on existing digest (function revalidates zero areas)
bm::id64_t bit_block_sub(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Plain bitblock SUB (AND NOT) operation. Function does not analyse availability of source and destinat...
bool is_bits_one(const bm::wordop_t *start) BMNOEXCEPT
Returns "true" if all bits in the block are 1.
@ READWRITE
mutable (read-write object)
@ BM_GAP
GAP compression is ON.
unsigned gap_test_unr(const T *BMRESTRICT buf, const unsigned pos) BMNOEXCEPT
Tests if bit = pos is true. Analog of bm::gap_test with SIMD unrolling.
void gap_and_to_bitset(unsigned *BMRESTRICT dest, const T *BMRESTRICT pcurr) BMNOEXCEPT
ANDs GAP block to bitblock.
void gap_convert_to_bitset(unsigned *BMRESTRICT dest, const T *BMRESTRICT buf, unsigned len=0) BMNOEXCEPT
GAP block to bitblock conversion.
void gap_sub_to_bitset(unsigned *BMRESTRICT dest, const T *BMRESTRICT pcurr) BMNOEXCEPT
SUB (AND NOT) GAP block to bitblock.
void gap_add_to_bitset(unsigned *BMRESTRICT dest, const T *BMRESTRICT pcurr, unsigned len) BMNOEXCEPT
Adds(OR) GAP block to bitblock.
BMFORCEINLINE bm::gap_word_t gap_length(const bm::gap_word_t *BMRESTRICT buf) BMNOEXCEPT
Returs GAP block length.
bm::agg_run_options< agg_disable_result_bvectors, agg_disable_counts > agg_opt_disable_bvects_and_counts
Pre-defined aggregator options to disable both intermediate results and counts.
void combine_and(BV &bv, It first, It last)
AND Combine bitvector and the iterable sequence.
const bool agg_disable_search_masks
const bool agg_produce_result_bvectors
bm::agg_run_options< agg_disable_result_bvectors, agg_compute_counts > agg_opt_only_counts
Pre-defined aggregator options for counts-only (results dropped) operation.
void combine_or(BV &bv, It first, It last)
OR Combine bitvector and the iterable sequence.
void aggregator_pipeline_execute(It first, It last)
Experimental method ro run multiple aggregators in sync.
const bool agg_disable_counts
bm::agg_run_options< agg_produce_result_bvectors, agg_compute_counts > agg_opt_bvect_and_counts
Pre-defined aggregator options for results plus counts operation.
const bool agg_disable_result_bvectors
const bool agg_compute_counts
const unsigned set_array_mask
const unsigned set_block_mask
void aligned_free(void *ptr) BMNOEXCEPT
Aligned free.
const unsigned set_sub_array_size
BMFORCEINLINE void get_block_coord(BI_TYPE nb, unsigned &i, unsigned &j) BMNOEXCEPT
Recalc linear bvector block index into 2D matrix coordinates.
void * aligned_new_malloc(size_t size)
Aligned malloc (unlike classic malloc it throws bad_alloc exception)
const unsigned set_word_shift
const unsigned set_block_size
unsigned long long int id64_t
bool find_ptr(const void *const *p_arr, size_t arr_size, const void *ptr, size_t *idx) BMNOEXCEPT
Scan search for pointer value in unordered array.
const unsigned set_array_shift
bm::id_t block_to_global_index(unsigned i, unsigned j, unsigned block_idx) BMNOEXCEPT
calculate bvector<> global bit-index from block-local coords
unsigned short gap_word_t
const unsigned gap_max_bits
const unsigned set_top_array_size
const unsigned set_block_shift
const unsigned set_word_mask
const unsigned bits_in_block
Aggregation options to control execution Default settings are to support only result bit-vector filte...
static constexpr bool is_make_results() BMNOEXCEPT
make result(target) vectors (aggregation search results) (Default: true) when false is used - means w...
static constexpr bool is_masks() BMNOEXCEPT
Support for masking operations (Default: false)
static constexpr bool is_compute_counts() BMNOEXCEPT
Compute counts for the target vectors, when set to true, population count is computed for each result...
Temporary operations vectors.
gap_block_ptr_vector_type v_arg_and_blk_gap
source GAP blocks list (AND)
block_ptr_vector_type v_arg_and_blk
source blocks list (AND)
block_ptr_vector_type v_arg_or_blk
source blocks list (OR)
gap_block_ptr_vector_type v_arg_or_blk_gap
source GAP blocks list (OR)
block_ptr_vector_type v_arg_tmp_blk
source blocks list
uchar_vector_type carry_overs
shift carry over flags
size_t add(const bvector_type *bv, unsigned agr_group)
Add bit-vector pointer to its aggregation group.
bv_vector_type arg_bv0
arg group 0
bv_vector_type arg_bv1
arg group 1
void reset()
Reset argument groups to zero.
index_vector_type arg_idx0
indexes of vectors for arg group 0
index_vector_type arg_idx1
Block cache for pipeline execution.
blockptr_vector_type blk_vect_
cached block ptrs for bv_inp_vect_
count_vector_type cnt_vect_
usage count for bv_inp (all groups)
bv_vector_type bv_inp_vect_
all input vectors from all groups
block_ij_vector_type blk_ij_vect_
current block coords
Aggregation options for runtime control.
size_t batch_size
Batch size sets number of argument groups processed at a time Default: 0 means this parameter will be...
functor-adaptor for back-inserter