BFGraph
KmerHashTable.h
1 #ifndef KALLISTO_KMERHASHTABLE_H
2 #define KALLISTO_KMERHASHTABLE_H
3 
4 #include <utility>
5 #include <string>
6 #include <iterator>
7 
8 #include "Kmer.hpp"
9 
10 template<typename T, typename Hash = KmerHash>
11 struct KmerHashTable {
12 
13  using value_type = std::pair<Kmer, T>;
14  using key_type = Kmer;
15  using mapped_type = T;
16 
17  Hash hasher;
18  value_type *table;
19  size_t size_, pop, num_empty;
20  value_type empty_val;
21  value_type deleted;
22 
23 // ---- iterator ----
24 
25  template<bool is_const_iterator = true>
26  class iterator_ : public std::iterator<std::forward_iterator_tag, value_type> {
27 
28  public:
29 
30  typedef typename std::conditional<is_const_iterator, const KmerHashTable *, KmerHashTable *>::type DataStructurePointerType;
31  typedef typename std::conditional<is_const_iterator, const value_type&, value_type&>::type ValueReferenceType;
32  typedef typename std::conditional<is_const_iterator, const value_type *, value_type *>::type ValuePointerType;
33 
34 
35  DataStructurePointerType ht;
36  size_t h;
37 
38  iterator_() : ht(nullptr), h(0) {}
39  iterator_(DataStructurePointerType ht_) : ht(ht_), h(ht_->size_) {}
40  iterator_(DataStructurePointerType ht_, size_t h_) : ht(ht_), h(h_) {}
41  iterator_(const iterator_<false>& o) : ht(o.ht), h(o.h) {}
42  iterator_& operator=(const iterator_& o) {ht=o.ht; h=o.h; return *this;}
43 
44  ValueReferenceType operator*() const {return ht->table[h];}
45  ValuePointerType operator->() const {return &(ht->table[h]);}
46 
47  size_t getHash() const { return h; }
48 
49  void find_first() {
50 
51  h = 0;
52 
53  if (ht->table != nullptr && ht->size_>0) {
54 
55  Kmer& km = ht->table[h].first;
56  if (km == ht->empty_val.first || km == ht->deleted.first) operator++();
57  }
58  }
59 
60  iterator_ operator++(int) {
61 
62  const iterator_ old(*this);
63  ++(*this);
64  return old;
65  }
66 
67  iterator_& operator++() {
68 
69  if (h == ht->size_) return *this;
70 
71  ++h;
72 
73  for (; h < ht->size_; ++h) {
74 
75  Kmer& km = ht->table[h].first;
76 
77  if (km != ht->empty_val.first && km != ht->deleted.first) break;
78  }
79 
80  return *this;
81  }
82 
83  bool operator==(const iterator_ &o) const {return (ht->table == o.ht->table) && (h == o.h);}
84  bool operator!=(const iterator_ &o) const {return !(this->operator==(o));}
85 
86  friend class iterator_<true>;
87  };
88 
90  typedef iterator_<false> iterator;
91 
92 
93  // --- hash table
94  KmerHashTable(const Hash& h = Hash() ) : hasher(h), table(nullptr), size_(0), pop(0), num_empty(0) {
95 
96  empty_val.first.set_empty();
97  deleted.first.set_deleted();
98  init_table(1024);
99  }
100 
101  KmerHashTable(size_t sz, const Hash& h = Hash() ) : hasher(h), table(nullptr), size_(0), pop(0), num_empty(0) {
102  empty_val.first.set_empty();
103  deleted.first.set_deleted();
104  init_table((size_t) (1.2*sz));
105  }
106 
108 
109  hasher = o.hasher;
110  size_ = o.size_;
111  pop = o.pop;
112  num_empty = o.num_empty;
113  table = o.table;
114  empty_val = o.empty_val;
115  deleted = o.deleted;
116 
117  o.table = nullptr;
118 
119  o.clear_table();
120  }
121 
122  KmerHashTable& operator=(KmerHashTable&& o){
123 
124  if (this != &o) {
125 
126  clear_table();
127 
128  hasher = o.hasher;
129  size_ = o.size_;
130  pop = o.pop;
131  num_empty = o.num_empty;
132  table = o.table;
133  empty_val = o.empty_val;
134  deleted = o.deleted;
135 
136  o.table = nullptr;
137 
138  o.clear_table();
139  }
140 
141  return *this;
142  }
143 
144  ~KmerHashTable() { clear_table(); }
145 
146  void clear_table() {
147 
148  if (table != nullptr) {
149 
150  delete[] table;
151  //free(table);
152  table = nullptr;
153  }
154 
155  size_ = 0;
156  pop = 0;
157  num_empty = 0;
158  }
159 
160  size_t size() const { return pop; }
161 
162  bool empty() const { return pop == 0; }
163 
164  void clear() {
165 
166  std::fill(table, table+size_, empty_val);
167 
168  pop = 0;
169  num_empty = size_;
170  }
171 
172  void init_table(size_t sz) {
173 
174  clear_table();
175 
176  size_ = rndup(sz);
177 
178  table = new value_type[size_];
179  //table = (value_type*) malloc(size_ * sizeof(value_type));
180 
181  clear();
182  }
183 
184  iterator find(const Kmer& key) {
185 
186  size_t h = hasher(key) & (size_-1);
187  size_t end_h = (h == 0) ? (size_-1) : h-1;
188 
189  for (;; h = (h+1!=size_ ? h+1 : 0)) {
190 
191  if (table[h].first == empty_val.first) return iterator(this); // empty slot, not in table
192  else if (table[h].first == key) return iterator(this, h); // same key, found
193  // if it is deleted, we still have to continue
194  if (h==end_h) return iterator(this); // we've gone throught the table, quit
195  }
196  }
197 
198  const_iterator find(const Kmer& key) const {
199 
200  size_t h = hasher(key) & (size_-1);
201  size_t end_h = (h == 0) ? (size_-1) : h-1;
202 
203  for (;; h = (h+1!=size_ ? h+1 : 0)) {
204 
205  if (table[h].first == empty_val.first) return const_iterator(this); // empty slot, not in table
206  else if (table[h].first == key) return const_iterator(this, h); // same key, found
207 
208  if (h==end_h) return const_iterator(this);
209  }
210  }
211 
212  iterator find(const size_t h) {
213 
214  if ((h < size_) && (table[h].first != empty_val.first) && (table[h].first != deleted.first))
215  return iterator(this, h);
216 
217  return iterator(this);
218  }
219 
220  const_iterator find(const size_t h) const {
221 
222  if ((h < size_) && (table[h].first != empty_val.first) && (table[h].first != deleted.first))
223  return const_iterator(this, h);
224 
225  return const_iterator(this);
226  }
227 
228  iterator erase(const_iterator pos) {
229 
230  if (pos == this->end()) return this->end();
231 
232  size_t h = pos.h;
233 
234  table[h] = deleted;
235  --pop;
236 
237  return ++iterator(this, h); // return pointer to next element
238  }
239 
240  iterator erase(const size_t h) {
241 
242  if (h >= size_) return this->end();
243 
244  table[h] = deleted;
245  --pop;
246 
247  return ++iterator(this, h); // return pointer to next element
248  }
249 
250  size_t erase(const Kmer& km) {
251 
252  const_iterator pos = find(km);
253  size_t oldpop = pop;
254 
255  if (pos != this->end()) erase(pos);
256 
257  return oldpop-pop;
258  }
259 
260  std::pair<iterator,bool> insert(const value_type& val) {
261 
262  if ((5*num_empty) < size_) reserve(2*size_); // if more than 80% full, resize
263 
264  bool is_deleted = false;
265 
266  for (size_t h = hasher(val.first) & (size_-1), h_tmp;; h = (h+1 != size_ ? h+1 : 0)) {
267 
268  if (table[h].first == empty_val.first) {
269 
270  if (!is_deleted) num_empty--;
271  else h = h_tmp;
272 
273  table[h] = val;
274  ++pop;
275 
276  return {iterator(this, h), true};
277  }
278  else if (table[h].first == val.first) return {iterator(this, h), false};
279  else if (!is_deleted && (table[h].first == deleted.first)) {
280  is_deleted = true;
281  h_tmp = h;
282  }
283  }
284  }
285 
286  void reserve(size_t sz) {
287 
288  if (sz <= size_) return;
289 
290  value_type *old_table = table;
291  size_t old_size_ = size_;
292 
293  size_ = rndup(sz);
294  pop = 0;
295  num_empty = size_;
296 
297  table = new value_type[size_];
298 
299  std::fill(table, table+size_, empty_val);
300 
301  for (size_t i = 0; i < old_size_; i++) {
302 
303  if (old_table[i].first != empty_val.first && old_table[i].first != deleted.first) insert(old_table[i]);
304  }
305 
306  delete[] old_table;
307  old_table = nullptr;
308 
309  /*if (sz <= size_) return;
310 
311  const size_t prev_size_ = size_;
312 
313  size_ = rndup(sz);
314  pop = 0;
315  num_empty = size_;
316 
317  table = (value_type*) realloc(table, size_ * sizeof(value_type));
318 
319  std::sort(table, table + prev_size_, sortKmerHashTable(*this));
320 
321  value_type* table_empty = table;
322 
323  for (; table_empty < table + prev_size_; table_empty++) {
324 
325  if (((*table_empty).first == empty_val.first) || ((*table_empty).first == deleted.first)) break;
326  }
327 
328  std::fill(table_empty, table + size_, empty_val);
329 
330  std::vector<value_type> v;
331 
332  for (int64_t i = table_empty - table - 1; i >= 0; i--) {
333 
334  size_t h = hasher(table[i].first) & (size_-1);
335 
336  if (h > i){
337 
338  for ( ; h < size_; h++) {
339 
340  if (table[h].first == empty_val.first) {
341 
342  num_empty--;
343  pop++;
344 
345  std::swap(table[h], table[i]);
346 
347  break;
348  }
349  }
350 
351  if (h == size_){
352 
353  v.push_back(table[i]);
354  table[i] = empty_val;
355  }
356  }
357  }
358 
359  for (auto& vt : v) insert(vt);*/
360  }
361 
362  size_t rndup(size_t v) {
363  v--;
364  v |= v >> 1;
365  v |= v >> 2;
366  v |= v >> 4;
367  v |= v >> 8;
368  v |= v >> 16;
369  v |= v >> 32;
370  v++;
371  return v;
372  }
373 
374  iterator begin() {
375 
376  iterator it(this);
377  it.find_first();
378  return it;
379  }
380 
381  const_iterator begin() const {
382 
383  const_iterator it(this);
384  it.find_first();
385  return it;
386  }
387 
388  iterator end() { return iterator(this); }
389 
390  const_iterator end() const { return const_iterator(this); }
391 
392  /*private:
393 
394  struct sortKmerHashTable {
395 
396  sortKmerHashTable(const KmerHashTable& kht_) : kht(kht_) {}
397 
398  bool operator() (const value_type& a, const value_type& b) const {
399 
400  const size_t h_a = (a.first == kht.empty_val.first) || (a.first == kht.deleted.first) ? 0xffffffffffffffff : kht.hasher(a.first) & (kht.size_-1);
401  const size_t h_b = (b.first == kht.empty_val.first) || (b.first == kht.deleted.first) ? 0xffffffffffffffff : kht.hasher(b.first) & (kht.size_-1);
402 
403  return (h_a < h_b);
404  }
405 
406  const KmerHashTable& kht;
407  };*/
408 };
409 
410 template<typename T, typename Hash = MinimizerHash>
412 
413  using value_type = std::pair<Minimizer, T>;
414  using key_type = Minimizer;
415  using mapped_type = T;
416 
417  Hash hasher;
418 
419  size_t size_, pop, num_empty;
420 
421  value_type* table;
422 
423  value_type empty_val;
424  value_type deleted;
425 
426 // ---- iterator ----
427  template<bool is_const_iterator = true>
428  class iterator_ : public std::iterator<std::forward_iterator_tag, value_type> {
429 
430  public:
431 
432  typedef typename std::conditional<is_const_iterator, const MinimizerHashTable *, MinimizerHashTable *>::type DataStructurePointerType;
433  typedef typename std::conditional<is_const_iterator, const value_type&, value_type&>::type ValueReferenceType;
434  typedef typename std::conditional<is_const_iterator, const value_type *, value_type *>::type ValuePointerType;
435 
436  DataStructurePointerType ht;
437  size_t h;
438 
439  iterator_() : ht(nullptr), h(0) {}
440  iterator_(DataStructurePointerType ht_) : ht(ht_), h(ht_->size_) {}
441  iterator_(DataStructurePointerType ht_, size_t h_) : ht(ht_), h(h_) {}
442  iterator_(const iterator_<false>& o) : ht(o.ht), h(o.h) {}
443  iterator_& operator=(const iterator_& o) {ht=o.ht; h=o.h; return *this;}
444 
445  ValueReferenceType operator*() const {return ht->table[h];}
446  ValuePointerType operator->() const {return &(ht->table[h]);}
447 
448  size_t getHash() const { return h; }
449 
450  void find_first() {
451 
452  h = 0;
453 
454  if (ht->table != nullptr && ht->size_>0) {
455 
456  Minimizer& minz = ht->table[h].first;
457 
458  if (minz == ht->empty_val.first || minz == ht->deleted.first) operator++();
459  }
460  }
461 
462  iterator_ operator++(int) {
463 
464  const iterator_ old(*this);
465  ++(*this);
466  return old;
467  }
468 
469  iterator_& operator++() {
470 
471  if (h == ht->size_) return *this;
472 
473  ++h;
474 
475  for (; h < ht->size_; ++h) {
476 
477  Minimizer& minz = ht->table[h].first;
478 
479  if (minz != ht->empty_val.first && minz != ht->deleted.first) break;
480  }
481 
482  return *this;
483  }
484 
485  bool operator==(const iterator_ &o) const {return (ht->table == o.ht->table) && (h == o.h);}
486  bool operator!=(const iterator_ &o) const {return !(this->operator==(o));}
487  friend class iterator_<true>;
488  };
489 
491  typedef iterator_<false> iterator;
492 
493  // --- hash table
494  MinimizerHashTable(const Hash& h = Hash() ) : hasher(h), table(nullptr), size_(0), pop(0), num_empty(0) {
495 
496  empty_val.first.set_empty();
497  deleted.first.set_deleted();
498  init_table(1024);
499  }
500 
501  MinimizerHashTable(size_t sz, const Hash& h = Hash() ) : hasher(h), table(nullptr), size_(0), pop(0), num_empty(0) {
502 
503  empty_val.first.set_empty();
504  deleted.first.set_deleted();
505  init_table((size_t) (1.2*sz));
506  }
507 
509 
510  hasher = o.hasher;
511  size_ = o.size_;
512  pop = o.pop;
513  num_empty = o.num_empty;
514  table = o.table;
515  empty_val = o.empty_val;
516  deleted = o.deleted;
517 
518  o.table = nullptr;
519 
520  o.clear_table();
521  }
522 
523  MinimizerHashTable& operator=(MinimizerHashTable&& o){
524 
525  if (this != &o) {
526 
527  clear_table();
528 
529  hasher = o.hasher;
530  size_ = o.size_;
531  pop = o.pop;
532  num_empty = o.num_empty;
533  table = o.table;
534  empty_val = o.empty_val;
535  deleted = o.deleted;
536 
537  o.table = nullptr;
538 
539  o.clear_table();
540  }
541 
542  return *this;
543  }
544 
545  ~MinimizerHashTable() { clear_table(); }
546 
547  void clear_table() {
548 
549  if (table != nullptr) {
550 
551  delete[] table;
552  //free(table);
553  table = nullptr;
554  }
555 
556  size_ = 0;
557  pop = 0;
558  num_empty = 0;
559  }
560 
561  size_t size() const { return pop; }
562 
563  bool empty() const { return pop == 0; }
564 
565  void clear() {
566 
567  std::fill(table, table+size_, empty_val);
568 
569  pop = 0;
570  num_empty = size_;
571  }
572 
573  void init_table(size_t sz) {
574 
575  clear_table();
576 
577  size_ = rndup(sz);
578 
579  table = new value_type[size_];
580  //table = (value_type*) malloc(size_ * sizeof(value_type));
581 
582  clear();
583  }
584 
585  iterator find(const Minimizer& key) {
586 
587  size_t h = hasher(key) & (size_-1);
588  size_t end_h = (h == 0) ? (size_-1) : h-1;
589 
590  for (;; h = (h+1!=size_ ? h+1 : 0)) {
591 
592  if (table[h].first == empty_val.first) return iterator(this); // empty slot, not in table
593  else if (table[h].first == key) return iterator(this, h); // same key, found
594 
595  // if it is deleted, we still have to continue
596  if (h==end_h) return iterator(this); // we've gone throught the table, quit
597  }
598  }
599 
600  const_iterator find(const Minimizer& key) const {
601 
602  size_t h = hasher(key) & (size_-1);
603  size_t end_h = (h == 0) ? (size_-1) : h-1;
604 
605  for (;; h = (h+1!=size_ ? h+1 : 0)) {
606 
607  if (table[h].first == empty_val.first) return const_iterator(this); // empty slot, not in table
608  else if (table[h].first == key) return const_iterator(this, h); // same key, found
609 
610  if (h==end_h) return const_iterator(this);
611  }
612  }
613 
614  iterator find(const size_t h) {
615 
616  if ((h < size_) && (table[h].first != empty_val.first) && (table[h].first != deleted.first))
617  return iterator(this, h);
618 
619  return iterator(this);
620  }
621 
622  const_iterator find(const size_t h) const {
623 
624  if ((h < size_) && (table[h].first != empty_val.first) && (table[h].first != deleted.first))
625  return const_iterator(this, h);
626 
627  return const_iterator(this);
628  }
629 
630  iterator erase(const_iterator pos) {
631 
632  if (pos == this->end()) return this->end();
633 
634  table[pos.h] = deleted;
635  --pop;
636 
637  return ++iterator(this, pos.h); // return pointer to next element
638  }
639 
640  size_t erase(const Minimizer& minz) {
641 
642  const_iterator pos = find(minz);
643 
644  size_t oldpop = pop;
645  if (pos != this->end()) erase(pos);
646 
647  return oldpop-pop;
648  }
649 
650  std::pair<iterator,bool> insert(const value_type& val) {
651 
652  if ((5*num_empty) < size_) reserve(2*size_); // if more than 80% full, resize
653 
654  bool is_deleted = false;
655 
656  for (size_t h = hasher(val.first) & (size_-1), h_tmp;; h = (h+1 != size_ ? h+1 : 0)) {
657 
658  if (table[h].first == empty_val.first) {
659 
660  if (!is_deleted) num_empty--;
661  else h = h_tmp;
662 
663  table[h] = val;
664  ++pop;
665 
666  return {iterator(this, h), true};
667  }
668  else if (table[h].first == val.first) return {iterator(this, h), false};
669  else if (!is_deleted && (table[h].first == deleted.first)) {
670  is_deleted = true;
671  h_tmp = h;
672  }
673  }
674  }
675 
676  void reserve(size_t sz) {
677 
678  if (sz <= size_) return;
679 
680  value_type *old_table = table;
681  size_t old_size_ = size_;
682 
683  size_ = rndup(sz);
684  pop = 0;
685  num_empty = size_;
686 
687  table = new value_type[size_];
688 
689  std::fill(table, table+size_, empty_val);
690 
691  for (size_t i = 0; i < old_size_; i++) {
692 
693  if (old_table[i].first != empty_val.first && old_table[i].first != deleted.first) insert(old_table[i]);
694  }
695 
696  free(old_table);
697  old_table = nullptr;
698 
699  /*if (sz <= size_) return;
700 
701  const size_t prev_size_ = size_;
702 
703  size_ = rndup(sz);
704  pop = 0;
705  num_empty = size_;
706 
707  table = (value_type*) realloc(table, size_ * sizeof(value_type));
708 
709  std::sort(table, table + prev_size_, sortMinimizerHashTable(*this));
710 
711  value_type* table_empty = table;
712 
713  for (; table_empty < table + prev_size_; table_empty++) {
714 
715  if (((*table_empty).first == empty_val.first) || ((*table_empty).first == deleted.first)) break;
716  }
717 
718  std::fill(table_empty, table + size_, empty_val);
719 
720  std::vector<value_type> v;
721 
722  for (int64_t i = table_empty - table - 1; i >= 0; i--) {
723 
724  size_t h = hasher(table[i].first) & (size_-1);
725 
726  if (h > i){
727 
728  for ( ; h < size_; h++) {
729 
730  if (table[h].first == empty_val.first) {
731 
732  num_empty--;
733  pop++;
734 
735  std::swap(table[h], table[i]);
736 
737  break;
738  }
739  }
740 
741  if (h == size_){
742 
743  v.push_back(table[i]);
744  table[i] = empty_val;
745  }
746  }
747  }
748 
749  for (auto& vt : v) insert(vt);*/
750  }
751 
752  size_t rndup(size_t v) {
753 
754  v--;
755  v |= v >> 1;
756  v |= v >> 2;
757  v |= v >> 4;
758  v |= v >> 8;
759  v |= v >> 16;
760  v |= v >> 32;
761  v++;
762 
763  return v;
764  }
765 
766  iterator begin() {
767 
768  iterator it(this);
769  it.find_first();
770  return it;
771  }
772 
773  const_iterator begin() const {
774 
775  const_iterator it(this);
776  it.find_first();
777  return it;
778  }
779 
780  iterator end() { return iterator(this); }
781 
782  const_iterator end() const { return const_iterator(this); }
783 
784  /*private:
785 
786  struct sortMinimizerHashTable {
787 
788  sortMinimizerHashTable(const MinimizerHashTable& mht_) : mht(mht_) {}
789 
790  bool operator() (const value_type& a, const value_type& b) const {
791 
792  const size_t h_a = (a.first == mht.empty_val.first) || (a.first == mht.deleted.first) ? 0xffffffffffffffff : mht.hasher(a.first) & (mht.size_-1);
793  const size_t h_b = (b.first == mht.empty_val.first) || (b.first == mht.deleted.first) ? 0xffffffffffffffff : mht.hasher(b.first) & (mht.size_-1);
794 
795  return (h_a < h_b);
796  }
797 
798  const MinimizerHashTable& mht;
799  };*/
800 };
801 
802 #endif // KALLISTO_KMERHASHTABLE_H
Definition: KmerHashTable.h:26
Definition: KmerHashTable.h:11
Definition: KmerHashTable.h:428
Definition: Kmer.hpp:114
Definition: Kmer.hpp:31
Definition: KmerHashTable.h:411