54 <<
"BitMagic DNA Index Build Sample (c) 2018" << std::endl
55 <<
"-fa file-name -- input FASTA file" << std::endl
56 <<
"-j number -- number of parallel jobs to run" << std::endl
57 <<
"-timing -- collect timings" << std::endl
73 for (
int i = 1; i < argc; ++i)
75 std::string arg = argv[i];
76 if ((arg ==
"-h") || (arg ==
"--help"))
81 if (arg ==
"-fa" || arg ==
"--fa")
89 std::cerr <<
"Error: -fa requires file name" << std::endl;
94 if (arg ==
"-j" || arg ==
"--j")
102 std::cerr <<
"Error: -j requires number of jobs" << std::endl;
108 if (arg ==
"-timing" || arg ==
"--timing" || arg ==
"-t" || arg ==
"--t")
123int load_FASTA(
const std::string& fname, std::vector<char>& seq_vect)
128 std::ifstream fin(fname.c_str(), std::ios::in);
133 for (
unsigned i = 0; std::getline(fin, line); ++i)
139 for (std::string::iterator it = line.begin(); it != line.end(); ++it)
140 seq_vect.push_back(*it);
160 void Build(
const vector<char>& sequence)
168 for (
size_t i = 0; i < sequence.size(); ++i)
170 unsigned pos = unsigned(i);
204 for (
size_t i = 0; i < sequence.size(); ++i)
206 unsigned pos = unsigned(i);
239 const std::vector<char>* src_sequence;
242 : target_idx(idx), src_sequence(&src) {}
244 void operator() (
size_t from,
size_t to)
246 const vector<char>& sequence = *src_sequence;
255 for (
size_t i = from; i < sequence.size() && (i < to); ++i)
257 unsigned pos = unsigned(i);
305 std::vector<std::future<void> > futures;
307 unsigned range = unsigned(sequence.size() / threads);
309 for (
unsigned k = 0; k < sequence.size(); k += range)
311 futures.emplace_back(std::async(std::launch::async,
312 Func(
this, sequence), k, k + range));
316 for (
auto& e : futures)
326 static std::mutex mtx_A;
327 static std::mutex mtx_T;
328 static std::mutex mtx_G;
329 static std::mutex mtx_C;
330 static std::mutex mtx_N;
336 std::lock_guard<std::mutex> guard(mtx_A);
342 std::lock_guard<std::mutex> guard(mtx_C);
348 std::lock_guard<std::mutex> guard(mtx_G);
354 std::lock_guard<std::mutex> guard(mtx_T);
360 std::lock_guard<std::mutex> guard(mtx_N);
376 return m_FPrintBV[
eA];
378 return m_FPrintBV[
eC];
380 return m_FPrintBV[
eG];
382 return m_FPrintBV[
eT];
384 return m_FPrintBV[
eN];
388 throw runtime_error(
"Error. Invalid letter!");
401 std::vector<char> letters {
'A',
'T',
'G',
'C'};
402 for (
char base : letters)
410 throw runtime_error(
string(
"Fingerprint mismatch for:") +
string(1, base));
417int main(
int argc,
char *argv[])
425 std::vector<char> seq_vect;
441 std::cout <<
"FASTA sequence size=" << seq_vect.size() << std::endl;
445 idx1.
Build(seq_vect);
462 std::cout << std::endl <<
"Performance:" << std::endl;
466 catch (std::exception& ex)
468 std::cerr <<
"Error:" << ex.what() << std::endl;
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
Timing utilities for benchmarking (internal)
pre-processor un-defines to avoid global space pollution (internal)
Utility for keeping all DNA finger print vectors and search using various techniques.
void BuildParallel(const vector< char > &sequence, unsigned threads)
Build fingerprint bit-vectors using bulk insert iterator and parallel processing.
void Build(const vector< char > &sequence)
Build fingerprint bit-vectors from the original sequence.
void BuildBulk(const vector< char > &sequence)
Build index using bulk insert iterator.
void MergeVector(char letter, bm::bvector<> &bv)
Thread sync bit-vector merge.
const bm::bvector & GetVector(char letter) const
Return fingerprint bit-vector.
Output iterator iterator designed to set "ON" bits based on input sequence of integers.
Output iterator iterator designed to set "ON" bits based on input sequence of integers (bit indeces).
Bitvector Bit-vector container with runtime compression of bits.
void merge(bm::bvector< Alloc > &bvect)
Merge/move content from another vector.
insert_iterator inserter()
int compare(const bvector< Alloc > &bvect) const BMNOEXCEPT
Lexicographical comparison with a bitvector.
Utility class to collect performance measurements and statistics.
std::map< std::string, statistics > duration_map_type
test name to duration map
static void print_duration_map(TOut &tout, const duration_map_type &dmap, format fmt=ct_time)
@ BM_SORTED
input set is sorted (ascending order)
int main(int argc, char *argv[])
static int parse_args(int argc, char *argv[])
static void fingerprint_compare(const DNA_FingerprintScanner &idx1, const DNA_FingerprintScanner &idx2)
Check correctness of indexes constructed using different methods.
bm::chrono_taker ::duration_map_type timing_map
static int load_FASTA(const std::string &fname, std::vector< char > &seq_vect)