1 #ifndef MINHASHITERATOR_H 2 #define MINHASHITERATOR_H 16 minHashResult(
const uint64_t h,
const int p) : hash(h), pos(p) {}
32 minHashIterator(
int _k,
int _g, HF _h) : s(NULL), n(0), k(_k), g(_g), hf(_h), v(k-g+1), p(-1), invalid(
true), nh(
false) {
36 minHashIterator(
const char* _s,
int _length,
int _k,
int _g, HF _h,
bool _nh) : k(_k), g(_g), hf(_h), v(k-g+1), p(-1), invalid(
true), nh(_nh) {
38 initString(_s,_length);
41 minHashIterator() : s(NULL), n(0), k(0), g(0), hf(HF(0)), invalid(
true), nh(
false) {}
48 void initString(
const char* _s,
int _length) {
57 if (n < k || k < g) invalid =
true;
64 if (invalid || o.invalid)
return invalid && o.invalid;
65 return s==o.s && n==o.n && g==o.g && k==o.k && nh==o.nh;
68 bool operator!=(
const minHashIterator& o) {
return !this->operator==(o); }
76 if (invalid)
return *
this;
80 if (p >= n-k+1 || s[p+k-1] == 0) {
86 const int shift = nh ? 1 : 0;
94 for (
int j = shift; j < k-g-shift;) {
96 hf.update(s[j],s[j+g]);
98 uint64_t h = hf.hash();
99 int t = ((int)v.size())-1;
101 while (t >= 0 && v[t].hash > h) {
113 if (v[0].pos < p + shift) v.pop_front();
115 hf.update(s[p+k-g-1-shift],s[p+k-1-shift]);
117 uint64_t h = hf.hash();
118 int t = ((int) v.size())-1;
120 while (t >= 0 && v[t].hash > h) {
142 for (; i > 0; i--)
operator++();
150 const int shift = nh ? 1 : 0;
151 const int end = p+k-g-shift;
162 while ((hf_tmp.hash() <= mhr_discard.hash) && (j < end)){
164 hf_tmp.update(s[j],s[j+g]);
168 if ((j == end) && (hf_tmp.hash() <= mhr_discard.hash))
return mhr_discard;
174 hf_tmp.update(s[j],s[j+g]);
179 if ((h <= mhr.hash) && (h > mhr_discard.hash)){
185 mhr.hash = hf_tmp.hash();
191 mhr.hash = hf_tmp.hash();
202 inline uint64_t getHash()
const {
return invalid ? 0 : v[0].hash; }
204 inline int getPosition()
const {
return invalid ? 0 : v[0].pos; }
206 inline int getKmerPosition()
const {
return p; }
213 deque<minHashResult> v;
230 if (o.invalid || invalid)
return o.invalid && invalid;
231 return p_pos == o.p_pos && p_s == o.p_s && pos==o.pos;
236 return !this->operator==(o);
241 if (invalid)
return *
this;
243 if ((p_pos != p->p || p_s != p->s)
244 || (pos>=p->v.size()-1)
245 || (p->v[pos+1].hash != p->v[pos].hash)) {
265 const minHashResult* operator->()
const {
return &(p->v[pos]); }
286 preAllocMinHashIterator() : s(NULL), n(0), k(0), g(0), hf(HF(0)), p(-1), v(0), p_cur_start(0), p_cur_end(0), invalid(
true), nh(
false) {}
289 s(_s), n(_n), k(_k), g(_g), hf(_h), p(-1), p_cur_start(0), p_cur_end(0), invalid(
true), nh(_nh) {
291 if ((s != NULL) && (n >= k) && (k >= g)){
295 v = vector<minHashResult>(n-g+1);
304 if (invalid || o.invalid)
return invalid && o.invalid;
305 return s==o.s && n==o.n && g==o.g && k==o.k && nh==o.nh;
316 if (invalid)
return *
this;
320 if (p >= n-k+1 || s[p+k-1] == 0) {
326 const int shift = nh ? 1 : 0;
335 for (
int j = shift; j < k-g-shift;) {
337 hf.update(s[j], s[j+g]);
339 uint64_t h = hf.hash();
341 while (p_cur_end > p_cur_start && v[p_cur_end-1].hash > h) p_cur_end--;
351 if (v[p_cur_start].pos < p + shift) p_cur_start++;
353 hf.update(s[p+k-g-1-shift], s[p+k-1-shift]);
355 uint64_t h = hf.hash();
357 while (p_cur_end > p_cur_start && v[p_cur_end-1].hash > h) p_cur_end--;
376 for (; i > 0; i--)
operator++();
382 inline uint64_t getHash()
const {
return invalid ? 0 : v[p_cur_start].hash; }
384 inline int getPosition()
const {
return invalid ? 0 : v[p_cur_start].pos; }
386 inline int getNbMin()
const {
return p_cur_end - p_cur_start; }
388 inline int getKmerPosition()
const {
return p; }
394 const int shift = nh ? 1 : 0;
395 const int end = p+k-g-shift;
406 while ((hf_tmp.hash() <= mhr_discard.hash) && (j < end)){
408 hf_tmp.update(s[j],s[j+g]);
412 if ((j == end) && (hf_tmp.hash() <= mhr_discard.hash))
return mhr_discard;
418 hf_tmp.update(s[j],s[j+g]);
423 if ((h <= mhr.hash) && (h > mhr_discard.hash)){
429 mhr.hash = hf_tmp.hash();
435 mhr.hash = hf_tmp.hash();
441 if (mhr.hash <= mhr_discard.hash) cerr <<
"Problem here" << endl;
451 vector<minHashResult> v;
460 p_cur_start(o.p_cur_start), p_cur_end(o.p_cur_end), invalid(o.invalid), nh(o.nh) {}
463 p_cur_end(o.p_cur_end - o.p_cur_start), invalid(o.invalid), nh(o.nh) {
465 if (!invalid && (o.p + n <= o.n)){
467 v = std::vector<minHashResult>(o.v.begin() + o.p_cur_start, o.v.begin() + o.p_cur_end);
469 for (
auto& min_h : v) min_h.pos -= o.p;
483 if (o.invalid || invalid)
return o.invalid && invalid;
484 return p_pos == o.p_pos && p_s == o.p_s && p_it==o.p_it && p_it_end==o.p_it_end;
488 return !this->operator==(o);
493 if (invalid)
return *
this;
495 if ((p_s != p->s || p_pos != p->p || p_it_end != p->p_cur_end)
496 || (p_it >= p_it_end - 1)
497 || (p->v[p_it + 1].hash != p->v[p_it].hash)) {
518 p_it_end = o.p_it_end;
521 if (
operator!=(o)) invalid =
true;
526 const minHashResult& operator*()
const {
return p->v[p_it]; }
527 const minHashResult* operator->()
const {
return &(p->v[p_it]); }
555 minHashKmer(
const char* _s,
int _k,
int _g, HF _h,
bool neighbor_hash) : s(_s), k(_k), g(_g), hf(_h), h(0), p(-1), invalid(
true), nh(neighbor_hash) {
557 if ((s != NULL) && ((n = strlen(s)) >= k) && (k >= g)){
566 minHashKmer() : s(NULL), n(0), k(0), g(0), hf(HF(0)), h(0), p(-1), nb(0), invalid(
true), nh(
false) {}
572 h = o.v[o.p_cur_start].hash;
573 p = o.v[o.p_cur_start].pos - o.p;
579 if(invalid || o.invalid)
return invalid && o.invalid;
580 return s==o.s && n==o.n && g==o.g && k==o.k && nh==o.nh && h==o.h && p==o.p && nb==o.nb;
583 bool operator!=(
const minHashKmer& o) {
return !this->operator==(o); }
585 uint64_t getHash()
const {
return h; }
587 int getPosition()
const {
return p; }
589 int getNbMin()
const {
return nb; }
597 const int shift = nh ? 1 : 0;
605 for (
int j = shift; j < k-g-shift; j++) {
607 hf.update(s[j], s[j+g]);
611 if (hf.hash() == h) nb++;
634 #endif // MINHASHITERATOR_H Definition: minHashIterator.hpp:13
Definition: minHashIterator.hpp:28
Definition: minHashIterator.hpp:282
Definition: minHashIterator.hpp:23
Definition: minHashIterator.hpp:279
Definition: minHashIterator.hpp:551