1#ifndef BMSPARSEVEC_ALGO__H__INCLUDED__
2#define BMSPARSEVEC_ALGO__H__INCLUDED__
24#ifndef BM__H__INCLUDED__
27# error missing include (bm.h or bm64.h)
40# pragma warning( disable: 4146 )
68 unsigned sv_planes = svect.slices();
70 BM_ASSERT(sv_planes > high_bit && high_bit > 0);
76 for (i = high_bit+1; i < sv_planes; ++i)
86 for (i = high_bit;
true; --i)
108 if (low_bit == 0)
return;
111 unsigned sv_planes = svect.slices();
116 for (i = low_bit+1; i < sv_planes; ++i)
126 for (i = low_bit-1;
true; --i)
143 bv_acc1.bit_xor(bv_acc2);
144 bv_low_plane->bit_or(bv_acc1);
171 typename SV::size_type& midx,
179 unsigned planes1 = (unsigned)sv1.get_bmatrix().rows();
182 typename SV::bvector_type_const_ptr bv_null1 = sv1.get_null_bvector();
183 typename SV::bvector_type_const_ptr bv_null2 = sv2.get_null_bvector();
195 if (bv_null1 && bv_null2)
197 bool f = bv_null1->find_first_mismatch(*bv_null2, midx, mismatch);
198 if (f && (midx < mismatch))
200 found = f; mismatch = midx;
208 bv_tmp.
resize(sv2.size());
212 bool f = bv_null1->find_first_mismatch(bv_tmp, midx, mismatch);
213 if (f && (midx < mismatch))
215 found = f; mismatch = midx;
221 bv_tmp.
resize(sv1.size());
224 bool f = bv_null2->find_first_mismatch(bv_tmp, midx, mismatch);
225 if (f && (midx < mismatch))
227 found = f; mismatch = midx;
234 for (
unsigned i = 0; mismatch && (i < planes1); ++i)
236 typename SV::bvector_type_const_ptr bv1 = sv1.get_slice(i);
239 typename SV::bvector_type_const_ptr bv2 = sv2.get_slice(i);
247 bool f = bv2->find(midx);
248 if (f && (midx < mismatch))
250 found = f; sv_idx = 2; mismatch = midx;
257 bool f = bv1->find(midx);
258 if (f && (midx < mismatch))
260 found = f; sv_idx = 1; mismatch = midx;
266 bool f = bv1->find_first_mismatch(*bv2, midx, mismatch);
267 if (f && (midx < mismatch))
269 found = f; mismatch = midx;
272 sv_idx = (bv1->test(mismatch)) ? 1 : 2;
289 found = sv1.find_rank(midx + 1, mismatch);
292 found = sv2.find_rank(midx + 1, mismatch);
303 bool f = bv_null1->find_first_mismatch(*bv_null2, midx, mismatch);
304 if (f && (midx < mismatch))
306 found = f; mismatch = midx;
346template<
typename SV1,
typename SV2>
354 typedef typename allocator_type::allocator_pool_type allocator_pool_type;
356 allocator_pool_type pool;
358 mp_guard_bv.assign_if_not_set(pool, bv);
372 unsigned planes = sv1.effective_slices();;
373 if (planes < sv2.slices())
374 planes = sv2.slices();
376 for (
unsigned i = 0; i < planes; ++i)
378 typename SV1::bvector_type_const_ptr bv1 = sv1.get_slice(i);
379 typename SV2::bvector_type_const_ptr bv2 = sv2.get_slice(i);
399 mp_guard.assign_if_not_set(pool, bv_xor);
401 bv_xor.
bit_xor(*bv1, *bv2, SV1::bvector_type::opt_none);
408 typename SV1::size_type sz1 = sv1.
size();
409 typename SV2::size_type sz2 = sv2.size();
419 bv.set_range(sz1, sz2-1);
425 typename SV1::bvector_type_const_ptr bv_null1 = sv1.get_null_bvector();
426 typename SV2::bvector_type_const_ptr bv_null2 = sv2.get_null_bvector();
433 if (bv_null1 && bv_null2)
437 mp_guard.assign_if_not_set(pool, bv_or);
451 if (bv_null1 && bv_null2)
455 mp_guard.assign_if_not_set(pool, bv_xor);
464 mp_guard.assign_if_not_set(pool, bv_null);
468 bv_null.
resize(sv1.size());
473 bv_null.
resize(sv2.size());
513 bm::heap_vector<value_type, typename bvector_type::allocator_type, true>
536 template<
class Opt = bm::agg_run_options<> >
571 {
agg_pipe_.set_search_count_limit(limit); }
588 void add(
const typename SV::value_type* str);
604 {
return agg_pipe_.get_bv_res_vector(); }
608 {
return agg_pipe_.get_bv_count_vector(); }
640 void bind(
const SV& sv,
bool sorted);
672 template<typename BII>
811 template<class TPipe>
887 template<typename It>
894 mp_guard.assign_if_not_set(pool_, bv_out);
898 bool any_zero =
false;
899 for (; start < end; ++start)
902 any_zero |= (v == 0);
928 bool null_correct =
true);
985 unsigned octet_start,
999 unsigned from,
unsigned total_planes);
1006 unsigned from,
unsigned total_planes);
1012 if constexpr (std::is_signed<value_type>::value)
1034 bm::heap_vector<bm::id64_t, typename bvector_type::allocator_type, true>
1040 template<
typename AGG>
1047 unsigned char bits[64];
1048 unsigned short bit_count_v =
bm::bitscan(value, bits);
1049 for (
unsigned i = 0; i < bit_count_v; ++i)
1051 unsigned bit_idx = bits[i];
1053 unsigned plane_idx = (unsigned(octet_idx) * 8) + bit_idx;
1058 unsigned vinv = unsigned(value);
1067 vinv &= unsigned(planes_mask);
1068 for (
unsigned octet_plane = (
unsigned(octet_idx) * 8); vinv; vinv &= vinv-1)
1072 unsigned plane_idx = octet_plane + bit_idx;
1090 const SV* bound_sv_;
1113template<
typename SV>
1176 remap(bv_in, sv_brel, bv_out);
1189 void attach_sv(
const SV* sv_brel,
bool compute_stats =
false);
1198 template<
unsigned BSIZE>
1233template<
typename SV>
1235: sv_ptr_(0), gb_(0), have_stats_(false)
1238 static_assert(std::is_unsigned<value_type>::value,
1239 "BM: unsigned sparse vector is required for transform");
1243 SV::throw_bad_alloc();
1249template<
typename SV>
1259template<
typename SV>
1265 have_stats_ =
false;
1269 if (sv_brel->empty() || !compute_stats)
1271 const bvector_type* bv_non_null = sv_brel->get_null_bvector();
1277 mp_g_z.assign_if_not_set(pool_, bv_zero_);
1287template<
typename SV>
1292 if (sv_brel.empty())
1295 const bvector_type* bv_non_null = sv_brel.get_null_bvector();
1298 if (!bv_non_null->test(id_from))
1301 size_type idx = sv_brel.translate_address(id_from);
1302 id_to = sv_brel.get(idx);
1308template<
typename SV>
1314 mp_g_out.assign_if_not_set(pool_, bv_out);
1315 mp_g_p.assign_if_not_set(pool_, bv_product_);
1316 mp_g_z.assign_if_not_set(pool_, bv_zero_);
1318 attach_sv(&sv_brel);
1320 remap(bv_in, bv_out);
1325template<
typename SV>
1333 if (sv_ptr_ == 0 || sv_ptr_->empty())
1339 mp_g_out.assign_if_not_set(pool_, bv_out);
1340 mp_g_p.assign_if_not_set(pool_, bv_product_);
1341 mp_g_z.assign_if_not_set(pool_, bv_zero_);
1346 const bvector_type * bv_non_null = sv_ptr_->get_null_bvector();
1350 bv_product_ = bv_in;
1351 bv_product_.bit_and(*bv_non_null);
1352 enum_bv = &bv_product_;
1363 bv_product_ = bv_in;
1365 bv_product_.bit_sub(bv_zero_);
1371 bv_out.set_bit_no_check(0);
1373 enum_bv = &bv_product_;
1380 buf_cnt = nb_old = 0;
1383 for (; en.
valid(); ++en)
1385 typename SV::size_type idx = *en;
1386 idx = sv_ptr_->translate_address(idx);
1393 sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt,
BM_SORTED_UNIFORM);
1394 bv_out.set(&gb_->buffer_[0], buf_cnt,
BM_SORTED);
1398 gb_->gather_idx_[buf_cnt++] = idx;
1402 gb_->gather_idx_[buf_cnt++] = idx;
1405 if (buf_cnt == sv_g_size)
1407 sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt,
BM_SORTED_UNIFORM);
1414 sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt,
BM_SORTED_UNIFORM);
1422template<
typename SV>
1427 if (sv_brel.empty())
1432 typename SV::const_iterator it = sv_brel.begin();
1433 for (; it.valid(); ++it)
1435 typename SV::value_type t_id = *it;
1437 if (bv_in.test(idx))
1439 bv_out.set_bit_no_check(t_id);
1449template<
typename SV>
1456 effective_str_max_ = 0;
1461template<
typename SV>
1467 if constexpr (SV::is_str())
1469 effective_str_max_ = sv.effective_vector_max();
1476 block0_elements_cache_.resize(total_nb, effective_str_max_+1);
1477 block0_elements_cache_.set_zero();
1479 block3_elements_cache_.resize(total_nb * 3, effective_str_max_+1);
1480 block3_elements_cache_.set_zero();
1486 value_type* s0 = block0_elements_cache_.row(nb);
1487 sv.get(i, s0,
size_type(block0_elements_cache_.cols()));
1491 value_type* s1 = block3_elements_cache_.row(nb * 3 + k);
1493 sv.get(idx, s1,
size_type(block3_elements_cache_.cols()));
1499 vector_plane_masks_.resize(effective_str_max_);
1500 for (
unsigned i = 0; i < effective_str_max_; ++i)
1502 vector_plane_masks_[i] = sv.get_slice_mask(i);
1509template<
typename SV>
1513 effective_str_max_ = 0;
1518template<
typename SV>
1528 find_nonzero(sv, bv_out);
1529 if constexpr (SV::is_compressed())
1532 bv_out.set_range(sv.effective_size(),
bm::id_max - 1,
false);
1533 decompress(sv, bv_out);
1540 correct_nulls(sv, bv_out);
1545template<
typename SV>
1554 auto old_sz = bv_out.size();
1555 bv_out.resize(sv.size());
1557 bv_out.resize(old_sz);
1558 correct_nulls(sv, bv_out);
1563template<
typename SV>
1569 bv_out.bit_and(*bv_null);
1574template<
typename SV>
1585 find_zero(sv, bv_out);
1586 return bv_out.any();
1590 bool found = prepare_and_sub_aggregator(sv, value);
1597 bool any = (search_limit == 1);
1598 found = agg_.combine_and_sub(bv_out, any);
1605template<
typename SV>
1618 bool found = prepare_and_sub_aggregator(sv, value);
1621 found = agg_.find_first_and_sub(idx);
1629template<
typename SV>
1644 unsigned common_prefix_len = 0;
1647 agg_.set_range_hint(mask_from_, mask_to_);
1648 common_prefix_len = sv.common_prefix_length(mask_from_, mask_to_);
1653 str = remap_value_vect_.data();
1657 if (sv.is_remap() && (str != remap_value_vect_.data()))
1659 bool r = sv.remap_tosv(
1660 remap_value_vect_.data(), sv.effective_max_str(), str);
1663 str = remap_value_vect_.data();
1667 bool found = prepare_and_sub_aggregator(sv, str, common_prefix_len,
true);
1671 found = agg_.find_first_and_sub(idx);
1678template<
typename SV>
1681 unsigned octet_start,
1685 for (; str[len] != 0; ++len)
1690 for (
int octet_idx = len-1; octet_idx >= 0; --octet_idx)
1692 if (
unsigned(octet_idx) < octet_start)
1695 unsigned value = unsigned(str[octet_idx]) & 0xFF;
1699 if (&sv == bound_sv_)
1700 planes_mask = vector_plane_masks_[unsigned(octet_idx)];
1702 planes_mask = sv.get_slice_mask(
unsigned(octet_idx));
1704 if ((value & planes_mask) != value)
1707 add_agg_char(agg_, sv, octet_idx, planes_mask, value);
1714 unsigned plane_idx = unsigned(len * 8);
1715 typename SV::size_type planes = sv.get_bmatrix().rows_not_null();
1716 for (; plane_idx < planes; ++plane_idx)
1727template<
typename SV>
1731 using unsigned_value_type =
typename SV::unsigned_value_type;
1735 unsigned char bits[
sizeof(value) * 8];
1736 unsigned_value_type uv = sv.s2u(value);
1738 unsigned short bit_count_v =
bm::bitscan(uv, bits);
1740 const unsigned_value_type mask1 = 1;
1745 for (
unsigned i = bit_count_v; i > 0; --i)
1747 unsigned bit_idx = bits[i-1];
1754 unsigned sv_planes = sv.effective_slices();
1755 for (
unsigned i = 0; i < sv_planes; ++i)
1757 unsigned_value_type mask = mask1 << i;
1767template<
typename SV>
1779 find_zero(sv, bv_out);
1783 unsigned char bits[
sizeof(value) * 8];
1784 typename SV::unsigned_value_type uv = sv.s2u(value);
1785 unsigned short bit_count_v =
bm::bitscan(uv, bits);
1789 if (
const bvector_type* bv_plane = sv.get_slice(bits[--bit_count_v]))
1796 for (
unsigned i = 0; i < bit_count_v; ++i)
1800 bv_out &= *bv_plane;
1809 unsigned sv_planes = sv.effective_slices();
1810 for (
unsigned i = 0; (i < sv_planes) && uv; ++i)
1812 if (
const bvector_type* bv = sv.get_slice(i); bv && !(uv & (1u << i)))
1819template<
typename SV>
1825 find_gt_horizontal(sv, val, bv_out,
true );
1830template<
typename SV>
1835 if constexpr (std::is_signed<value_type>::value)
1837 if (val == std::numeric_limits<int>::min())
1840 find_eq(sv, val, bv_min);
1841 find_gt_horizontal(sv, val, bv_out,
true );
1842 bv_out.merge(bv_min);
1847 find_gt_horizontal(sv, val, bv_out,
true );
1855 find_gt_horizontal(sv, val, bv_out,
true );
1860 bv_out.set_range(0, sv.size()-1);
1861 correct_nulls(sv, bv_out);
1868template<
typename SV>
1873 find_ge(sv, val, bv_out);
1879template<
typename SV>
1884 find_gt(sv, val, bv_out);
1890template<
typename SV>
1898 find_ge(sv, from, bv_out);
1901 find_gt(sv, to, bv_gt);
1902 bv_out.bit_sub(bv_gt);
1908template<
typename SV>
1914 unsigned char blist[64];
1915 unsigned bit_count_v;
1926 find_zero(sv, bv_zero);
1927 auto sz = sv.size();
1928 bv_out.set_range(0, sz-1);
1929 bv_out.bit_sub(bv_zero);
1931 if constexpr (std::is_signed<value_type>::value)
1934 bv_out.bit_sub(*bv_sign);
1937 correct_nulls(sv, bv_out);
1944 if constexpr (std::is_signed<value_type>::value)
1946 find_positive(sv, gtz_bv);
1949 bv_out.swap(gtz_bv);
1951 correct_nulls(sv, bv_out);
1956 find_eq(sv, -1, bv_out);
1957 bv_out.bit_or(gtz_bv);
1959 correct_nulls(sv, bv_out);
1963 auto uvalue = SV::s2u(value +
bool(value < 0));
1978 unsigned total_planes = sv.effective_slices();
1979 const bvector_type* bv_sign = sv.get_slice(0); (void)bv_sign;
1982 if constexpr (std::is_signed<value_type>::value)
1988 bv_out.swap(gtz_bv);
1990 correct_nulls(sv, bv_out);
1993 aggregate_AND_OR_slices(top_or_bv, *bv_sign, sv, blist[bit_count_v-1]+1, total_planes);
1997 aggregate_OR_slices(top_or_bv, sv, blist[bit_count_v-1]+1, total_planes);
2002 if (total_planes <
unsigned(blist[bit_count_v-1]+1))
2004 aggregate_OR_slices(top_or_bv, sv, blist[bit_count_v-1]+1, total_planes);
2008 bv_out.merge(top_or_bv);
2016 for (
int i =
int(bit_count_v)-1; i >= 0; --i)
2018 int slice_idx = blist[i];
2019 if (slice_idx == gt_slice_limit())
2021 const bvector_type* bv_base_plane = sv.get_slice(
unsigned(slice_idx));
2027 if constexpr (std::is_signed<value_type>::value)
2030 and_eq_bv.bit_and(*bv_base_plane, *bv_sign);
2032 and_eq_bv.bit_and(*bv_base_plane, gtz_bv);
2035 and_eq_bv = *bv_base_plane;
2038 and_eq_bv.bit_and(*bv_base_plane);
2040 int next_slice_idx = 0;
2043 next_slice_idx = blist[i-1];
2044 if (slice_idx-1 == next_slice_idx)
2051 for (
int j = slice_idx-1; j >= next_slice_idx; --j)
2053 if constexpr (std::is_signed<value_type>::value)
2056 if (
const bvector_type* bv_sub_plane = sv.get_slice(
unsigned(j)))
2057 bv_out.bit_or_and(and_eq_bv, *bv_sub_plane);
2061 if constexpr (std::is_signed<value_type>::value)
2066 top_or_bv.set_range(0, sv.size()-1);
2067 top_or_bv.bit_sub(bv_out);
2068 bv_out.swap(top_or_bv);
2072 gtz_bv.resize(sv.size());
2074 bv_out.bit_sub(gtz_bv);
2078 decompress(sv, bv_out);
2082 if constexpr (!std::is_signed<value_type>::value)
2087 correct_nulls(sv, bv_out);
2094template<
typename SV>
2098 unsigned from,
unsigned total_planes)
2100 for (
unsigned i = from; i < total_planes; ++i)
2104 BM_ASSERT(bv_slice != sv.get_null_bvector());
2108 agg_.combine_or(bv_target);
2113template<
typename SV>
2117 unsigned from,
unsigned total_planes)
2119 for (
unsigned i = from; i < total_planes; ++i)
2123 BM_ASSERT(bv_slice != sv.get_null_bvector());
2124 bv_target.bit_or_and(bv_mask, *bv_slice);
2131template<
typename SV>
2133 const typename SV::value_type* str,
2136 return find_eq_str_impl(sv, str, bv_out,
false);
2142template<
typename SV>
2144 typename SV::size_type& pos)
2147 return this->find_eq_str(*bound_sv_, str, pos);
2152template<
typename SV>
2154 const typename SV::value_type* str,
2155 typename SV::size_type& pos)
2162 bool remaped =
false;
2163 if constexpr (SV::is_remap_support::value)
2165 if (sv.is_remap() && str != remap_value_vect_.data())
2167 auto str_len = sv.effective_vector_max();
2168 remap_value_vect_.resize(str_len);
2169 remaped = sv.remap_tosv(
2170 remap_value_vect_.data(), str_len, str);
2173 str = remap_value_vect_.data();
2178 found = find_first_eq(sv, str, found_pos, remaped);
2182 if constexpr (SV::is_rsc_support::value)
2184 if constexpr (sv.is_compressed())
2185 found = sv.find_rank(found_pos + 1, pos);
2193 find_zero(sv, bv_zero);
2194 found = bv_zero.find(pos);
2201template<
typename SV>
2206 return find_eq_str(*bound_sv_, str, bv_out);
2211template<
typename SV>
2213 const typename SV::value_type* str,
2216 return find_eq_str_impl(sv, str, bv_out,
true);
2221template<
typename SV>
2224 const typename SV::value_type* str,
2227 auto str_len = sv.effective_vector_max();
2228 remap_vect_target.resize(str_len);
2229 bool remaped = sv.remap_tosv(remap_vect_target.data(), str_len, str);
2235template<
typename SV>
2237 const typename SV::value_type* str,
2245 if constexpr (SV::is_rsc_support::value)
2253 if constexpr (SV::is_remap_support::value)
2255 if (sv.is_remap() && (str != remap_value_vect_.data()))
2257 bool remaped = remap_tosv(remap_value_vect_, str, sv);
2260 str = remap_value_vect_.data();
2266 const unsigned common_prefix_len = 0;
2267 found = prepare_and_sub_aggregator(sv, str, common_prefix_len,
2271 found = agg_.combine_and_sub(bv_out);
2277 find_zero(sv, bv_out);
2278 found = bv_out.any();
2286template<
typename SV>
template<
class TPipe>
2289 if (pipe.bv_and_mask_)
2292 bool any = pipe.bv_and_mask_->find_range(first, last);
2295 agg_.set_range_hint(first, last);
2297 agg_.combine_and_sub(pipe.agg_pipe_);
2298 agg_.reset_range_hint();
2303template<
typename SV>
2306 const typename SV::value_type* str,
2307 typename SV::size_type& pos)
2315 bool remaped =
false;
2317 if constexpr (SV::is_remap_support::value)
2319 if (sv.is_remap() && (str != remap_value_vect_.data()))
2321 auto str_len = sv.effective_vector_max();
2322 remap_value_vect_.resize(str_len);
2323 remaped = sv.remap_tosv(remap_value_vect_.data(), str_len, str);
2329 reset_search_range();
2333 size_type l, r, dist;
2334 l = 0; r = sv.size()-1;
2335 size_type found_pos;
2341 if (dist < min_distance_cutoff)
2354 int cmp = this->compare_str(sv, mid, str);
2372 int cmp = this->compare_str(sv, mid, str);
2377 cmp = this->compare_str(sv, mid, str);
2382 cmp = this->compare_str(sv, mid, str);
2400 set_search_range(l, r);
2404 typename SV::size_type mid = dist/2+l;
2416 int cmp = this->compare_str(sv, mid, str);
2421 set_search_range(l, mid);
2431 found = find_first_eq(sv, str, found_pos, remaped);
2435 if constexpr (SV::is_compressed())
2436 found = sv.find_rank(found_pos + 1, pos);
2438 reset_search_range();
2444 find_zero(sv, bv_zero);
2445 found = bv_zero.find(pos);
2452template<
typename SV>
2454 typename SV::size_type& pos)
2457 return bfind_eq_str(*bound_sv_, str, pos);
2462template<
typename SV>
2464 const typename SV::value_type val,
2465 typename SV::size_type& pos)
2477 cmp = this->compare(sv, l, val);
2488 cmp = this->compare(sv, r, val);
2496 cmp = this->compare(sv, r, val);
2510 if (dist < linear_cutoff1)
2514 cmp = this->compare(sv, l, val);
2531 cmp = this->compare(sv, mid, val);
2538 cmp = this->compare(sv, i, val);
2552 if (dist < linear_cutoff2)
2554 typename SV::const_iterator it(&sv, l);
2555 for (; it.valid(); ++it, ++l)
2557 typename SV::value_type sv_value = it.value();
2558 if (sv_value == val)
2582template<
typename SV>
2585 const typename SV::value_type* str,
2586 typename SV::size_type& pos)
2599 cmp = this->compare_str(sv, l, str);
2610 cmp = this->compare_str(sv, r, str);
2618 cmp = this->compare_str(sv, r, str);
2632 if (dist < linear_cutoff1)
2636 cmp = this->compare_str(sv, l, str);
2652 cmp = this->compare_str(sv, mid, str);
2659 cmp = this->compare_str(sv, i, str);
2673 if (dist < linear_cutoff2)
2675 if (!hmatr_.is_init())
2677 unsigned max_str = (unsigned)sv.effective_max_str();
2678 hmatr_.resize(linear_cutoff2, max_str+1,
false);
2681 dist = sv.decode(hmatr_, l, dist+1);
2682 for (
unsigned i = 0; i < dist; ++i, ++l)
2684 const typename SV::value_type* hm_str = hmatr_.row(i);
2685 cmp = ::strcmp(hm_str, str);
2697 cmp = this->compare_str(sv, l, str);
2716template<
typename SV>
2721 if (bound_sv_ == &sv)
2727 value_type* s0 = block0_elements_cache_.row(nb);
2730 sv.get(idx, s0,
size_type(block0_elements_cache_.cols()));
2733 for (
unsigned i = 0; i < block0_elements_cache_.cols(); ++i)
2735 char octet = str[i];
char value = s0[i];
2736 res = (value > octet) - (value < octet);
2747 value_type* s1 = block3_elements_cache_.row(nb * 3 + k);
2749 for (
unsigned i = 0; i < block3_elements_cache_.cols(); ++i)
2751 char octet = str[i];
char value = s1[i];
2752 res = (value > octet) - (value < octet);
2760 return sv.compare(idx, str);
2765template<
typename SV>
2771 return sv.compare(idx, val);
2776template<
typename SV>
2778 typename SV::value_type value,
2788 find_zero(sv, bv_out);
2792 find_eq_with_nulls(sv, value, bv_out, 0);
2794 decompress(sv, bv_out);
2795 correct_nulls(sv, bv_out);
2800template<
typename SV>
template<
typename BII>
2803 static_assert(!SV::is_compressed(),
"BM:find_eq on RSC vector not implemented");
2811 find_zero(sv, bv_out);
2812 typename SV::bvector_type::enumerator en = bv_out.get_enumerator(0);
2813 for (; en.valid(); ++en)
2822 bool found = prepare_and_sub_aggregator(sv, value);
2826 found = agg_.combine_and_sub_bi(bi);
2833template<
typename SV>
2835 typename SV::value_type value,
2836 typename SV::size_type& pos)
2841 find_eq(sv, value, bv_zero);
2842 bool found = bv_zero.find(pos);
2847 bool found = find_first_eq(sv, value, found_pos);
2853 if constexpr (SV::is_compressed())
2854 found = sv.find_rank(found_pos + 1, pos);
2862template<
typename SV>
2867 auto sz = sv.effective_slices();
2868 for (
unsigned i = 0; i < sz; ++i)
2869 agg_.add(sv.get_slice(i));
2870 agg_.combine_or(bv_out);
2876template<
typename SV>
2881 bv_out.set_range(0, sv.size()-1);
2882 if constexpr (std::is_signed<value_type>::value)
2885 bv_out.bit_sub(*bv_sign);
2891template<
typename SV>
2895 if constexpr (SV::is_compressed())
2897 const bvector_type* bv_non_null = sv.get_null_bvector();
2901 rank_compr_.decompress(bv_tmp_, *bv_non_null, bv_out);
2902 bv_out.swap(bv_tmp_);
2906 (void)sv; (void)bv_out;
2912template<
typename SV>
2923template<
typename SV>
2935template<
typename SV>
template<
class Opt>
2939 static_assert(Opt::is_masks(),
2940 "BM: Search masking needs to be enabled in template parameter options before function call. see bm::agg_run_options<> ");
2941 bv_and_mask_ = bv_mask;
2946template<
typename SV>
template<
class Opt>
2952 typename aggregator_type::arg_groups* arg = agg_pipe_.add();
2955 if constexpr (SV::is_remap_support::value)
2957 if (sv_.is_remap() && (str != remap_value_vect_.data()))
2959 bool remaped = remap_tosv(remap_value_vect_, str, sv_);
2962 str = remap_value_vect_.data();
2966 for (; str[len] != 0; ++len)
2970 if constexpr(Opt::is_masks())
2971 arg->add(bv_and_mask_, 0);
2972 arg->add(sv_.get_null_bvector(), 0);
2976 for (
int octet_idx = len-1; octet_idx >= 0; --octet_idx)
2981 unsigned value = unsigned(str[octet_idx]) & 0xFF;
2986 if (&sv == bound_sv_)
2987 planes_mask = vector_plane_masks_[unsigned(octet_idx)];
2990 planes_mask = sv_.get_slice_mask(
unsigned(octet_idx));
2992 if ((value & planes_mask) != value)
2999 *arg, sv_, octet_idx, planes_mask, value);
3007 unsigned plane_idx = unsigned(len * 8);
3010 for (; plane_idx < planes; ++plane_idx)
Algorithms for fast aggregation of N bvectors.
Algorithms for bvector<> (main include)
Sparse constainer sparse_vector<> for integer types using bit-transposition transform.
bm::heap_vector< size_type, allocator_type, true > bv_count_vector_type
Constant iterator designed to enumerate "ON" bits.
bool valid() const BMNOEXCEPT
Checks if iterator is still valid.
Bitvector Bit-vector container with runtime compression of bits.
bm::bvector< Alloc > & bit_or(const bm::bvector< Alloc > &bv1, const bm::bvector< Alloc > &bv2, typename bm::bvector< Alloc >::optmode opt_mode=opt_none)
3-operand OR : this := bv1 OR bv2
@ opt_none
no optimization
bvector< Alloc > & invert()
Invert/NEG all bits It should be noted, invert is affected by size() if size is set - it only inverts...
allocator_type::allocator_pool_type allocator_pool_type
size_type size() const BMNOEXCEPT
Returns bvector's capacity (number of bits it can store)
void resize(size_type new_size)
Change size of the bvector.
bm::bvector< Alloc > & bit_xor(const bm::bvector< Alloc > &bv1, const bm::bvector< Alloc > &bv2, typename bm::bvector< Alloc >::optmode opt_mode=opt_none)
3-operand XOR : this := bv1 XOR bv2
Algorithms for rank compression of bit-vector.
Pipeline to run multiple searches against a particular SV faster using cache optimizations.
const run_options & get_options() const BMNOEXCEPT
Get pipeline run options.
bv_count_vector_type & get_bv_count_vector() BMNOEXCEPT
Return results vector count used for pipeline execution.
remap_vector_type remap_value_vect_
remap buffer
void complete()
Prepare pipeline for the execution (resize and init internal structures) Once complete,...
const SV & sv_
target sparse vector ref
bool is_complete() const BMNOEXCEPT
aggregator_pipeline_type agg_pipe_
traget aggregator pipeline
bvect_vector_type & get_bv_res_vector() BMNOEXCEPT
Return results vector used for pipeline execution.
void set_or_target(bvector_type *bv_or) BMNOEXCEPT
Attach OR (aggregator vector).
aggregator_pipeline_type & get_aggregator_pipe() BMNOEXCEPT
get aggregator pipeline (access to compute internals)
void set_search_mask(const bvector_type *bv_mask) BMNOEXCEPT
Set pipeline mask bit-vector to restrict the search space.
void add(const typename SV::value_type *str)
Add new search string.
aggregator_type::template pipeline< Opt > aggregator_pipeline_type
pipeline & operator=(const pipeline &)=delete
pipeline(const pipeline &)=delete
size_t eff_slices_
number of effectice NOT NULL value slices
size_type size() const BMNOEXCEPT
run_options & options() BMNOEXCEPT
Set pipeline run options.
const bvector_type * bv_and_mask_
void set_search_count_limit(size_type limit) BMNOEXCEPT
Set search limit for results.
algorithms for sparse_vector scan/search
bm::aggregator< bvector_type > aggregator_type
SV::bvector_type bvector_type
bool find_eq_str_impl(const SV &sv, const value_type *str, bvector_type &bv_out, bool prefix_sub)
find EQ str / prefix impl
SV::value_type value_type
void correct_nulls(const SV &sv, bvector_type &bv_out)
Exclude possible NULL values from the result vector.
bvector_type::allocator_type allocator_type
bool prepare_and_sub_aggregator(const SV &sv, value_type value)
Prepare aggregator for AND-SUB (EQ) search.
bool find_eq_with_nulls(const SV &sv, value_type value, bvector_type &bv_out, size_type search_limit=0)
find value (may include NULL indexes)
aggregator_type::bv_count_vector_type bv_count_vector_type
static bool remap_tosv(remap_vector_type &remap_vect_target, const value_type *str, const SV &sv)
Remap input value into SV char encodings.
bm::dynamic_heap_matrix< value_type, allocator_type > matrix_search_buf_type
bool find_first_eq(const SV &sv, value_type value, size_type &idx)
find first value (may include NULL indexes)
void find_eq_with_nulls_horizontal(const SV &sv, value_type value, bvector_type &bv_out)
For testing purposes only.
void find_zero(const SV &sv, bvector_type &bv_out, bool null_correct=true)
find all sparse vector elements EQ to 0
bvector_type * bvector_type_ptr
bool bfind_eq_str(const SV &sv, const value_type *str, size_type &pos)
binary find first sparse vector element (string) Sparse vector must be sorted.
bm::heap_vector< value_type, typename bvector_type::allocator_type, true > remap_vector_type
void invert(const SV &sv, bvector_type &bv_out)
invert search result ("EQ" to "not EQ")
sparse_vector_scanner(const sparse_vector_scanner &)=delete
void find_gt_horizontal(const SV &sv, value_type value, bvector_type &bv_out, bool null_correct=true)
For testing purposes only.
int compare(const SV &sv, size_type idx, const value_type val) BMNOEXCEPT
compare sv[idx] with input value
bm::dynamic_heap_matrix< value_type, allocator_type > heap_matrix_type
void reset_binding() BMNOEXCEPT
reset sparse vector binding
void find_eq(const SV &sv, value_type value, bvector_type &bv_out)
find all sparse vector elements EQ to search value
void decompress(const SV &sv, bvector_type &bv_out)
Rank-Select decompression for RSC vectors.
void operator=(const sparse_vector_scanner &)=delete
void find_nonzero(const SV &sv, bvector_type &bv_out)
Find non-zero elements Output vector is computed as a logical OR (join) of all planes.
static constexpr int gt_slice_limit() noexcept
Return the slice limit for signed/unsigned vector value types.
void find_lt(const SV &sv, value_type val, bvector_type &bv_out)
find all elements sparse vector element less (<) than value
bm::heap_vector< bvector_type *, allocator_type, true > bvect_vector_type
allocator_type::allocator_pool_type allocator_pool_type
void bind(const SV &sv, bool sorted)
bind sparse vector for all searches
static void add_agg_char(AGG &agg, const SV &sv, int octet_idx, bm::id64_t planes_mask, unsigned value)
Addd char to aggregator (AND-SUB)
bool bfind(const SV &sv, const value_type val, size_type &pos)
binary search for position in the sorted sparse vector
void reset_search_range()
reset (disable) search range
void find_positive(const SV &sv, bvector_type &bv_out)
Find positive (greter than zero elements) Output vector is computed as a logical OR (join) of all pla...
bool find_eq_str_prefix(const SV &sv, const value_type *str, bvector_type &bv_out)
find sparse vector elements with a given prefix (string)
bool lower_bound_str(const SV &sv, const value_type *str, size_type &pos)
lower bound search for an array position
int compare_str(const SV &sv, size_type idx, const value_type *str)
compare sv[idx] with input str
const bvector_type * bvector_type_const_ptr
void set_search_range(size_type from, size_type to)
set search boundaries (hint for the aggregator)
void find_ge(const SV &sv, value_type val, bvector_type &bv_out)
find all elements sparse vector element greater or equal (>=) than value
void find_le(const SV &sv, value_type val, bvector_type &bv_out)
find all elements sparse vector element less or equal (<=) than value
void find_gt(const SV &sv, value_type val, bvector_type &bv_out)
find all elements sparse vector element greater (>) than value
void find_range(const SV &sv, value_type from, value_type to, bvector_type &bv_out)
find all elements sparse vector element in closed range [left..right] interval
void aggregate_OR_slices(bvector_type &bv_target, const SV &sv, unsigned from, unsigned total_planes)
bool find_eq_str(const SV &sv, const value_type *str, bvector_type &bv_out)
find sparse vector elements (string)
bm::heap_vector< bm::id64_t, typename bvector_type::allocator_type, true > mask_vector_type
aggregator_type::run_options run_options
Scanner options to control execution.
allocator_pool_type & get_bvector_alloc_pool() BMNOEXCEPT
Return allocator pool for blocks (Can be used to improve performance of repeated searches with the sa...
static void aggregate_AND_OR_slices(bvector_type &bv_target, const bvector_type &bv_mask, const SV &sv, unsigned from, unsigned total_planes)
BMFORCEINLINE bm::id_t word_bitcount(bm::id_t w) BMNOEXCEPT
unsigned short bitscan(V w, B *bits) BMNOEXCEPT
Templated Bitscan with dynamic dispatch for best type.
bm::alloc_pool_guard< allocator_pool_type, bvector< Alloc > > mem_pool_guard
null_support
NULL-able value support.
@ BM_SORTED
input set is sorted (ascending order)
@ BM_SORTED_UNIFORM
sorted and in one block (internal!)
@ use_null
support "non-assigned" or "NULL" logic
@ no_null
do not support NULL values
bool sparse_vector_find_first_mismatch(const SV &sv1, const SV &sv2, typename SV::size_type &midx, bm::null_support null_proc=bm::use_null)
Find first mismatch (element which is different) between two sparse vectors (uses linear scan in bit-...
void dynamic_range_clip_low(SV &svect, unsigned low_bit)
Clip dynamic range for signal lower than specified (boost)
void dynamic_range_clip_high(SV &svect, unsigned high_bit)
Clip dynamic range for signal higher than specified.
void sparse_vector_find_mismatch(typename SV1::bvector_type &bv, const SV1 &sv1, const SV2 &sv2, bm::null_support null_proc)
Find mismatch vector, indicating positions of mismatch between two sparse vectors (uses linear scan i...
const unsigned set_block_mask
const unsigned sub_block3_size
BMFORCEINLINE void xor_swap(W &x, W &y) BMNOEXCEPT
XOR swap two variables.
unsigned long long int id64_t
const unsigned gap_max_bits
const unsigned set_block_shift
ad-hoc conditional expressions