BFGraph
KmerIterator.hpp
1 #ifndef BFG_KMER_ITERATOR_HPP
2 #define BFG_KMER_ITERATOR_HPP
3 
4 #include <iostream>
5 #include <iterator>
6 #include "Kmer.hpp"
7 
8 
9 /* Short description:
10  * - Easily iterate through kmers in a read
11  * - If the read contains any N, then the N is skipped and checked whether
12  * there is a kmer to the right of the N
13  * - iter->first gives the kmer, iter->second gives the position within the reads
14  * */
15 class KmerIterator : public std::iterator<std::input_iterator_tag, std::pair<Kmer, int>, int> {
16 
17  public:
18 
19  KmerIterator() : s_(NULL), p_(), invalid_(true) {}
20  KmerIterator(const char *s) : s_(s), p_(), invalid_(false) { find_next(-1,-1,false);}
21  KmerIterator(const KmerIterator& o) : s_(o.s_), p_(o.p_), invalid_(o.invalid_) {}
22 
23  KmerIterator& operator++();
24  KmerIterator& operator+=(const int length);
25  KmerIterator operator++(int);
26  void raise(Kmer& km, Kmer& rep);
27 
28  bool operator==(const KmerIterator& o);
29  bool operator!=(const KmerIterator& o) { return !this->operator==(o);}
30 
31  std::pair<Kmer, int>& operator*();
32  std::pair<Kmer, int> *operator->();
33 
34  private:
35 
36  void find_next(size_t i, size_t j, bool last_valid);
37 
38  const char *s_;
39  std::pair<Kmer, int> p_;
40  bool invalid_;
41 };
42 
43 template<class HF>
45 
46  public:
47 
48  KmerHashIterator(const char* _s, const int _length, const int _k) : s(_s), n(_length), k(_k), hf(HF(_k)), p_(0,-1), invalid(true) {
49 
50  if ((_s != NULL) || (n >= k)) {
51 
52  invalid = false;
53  operator++();
54  }
55  }
56 
57  KmerHashIterator() : s(NULL), n(0), k(0), hf(HF(0)), p_(0,-1), invalid(true) {}
58 
59  KmerHashIterator(const KmerHashIterator& o) : s(o.s), n(o.n), k(o.k), hf(o.hf), p_(o.p_), invalid(o.invalid) {}
60 
61  bool operator==(const KmerHashIterator& o) {
62 
63  if (invalid || o.invalid) return invalid && o.invalid;
64  return s==o.s && n==o.n && k==o.k && p_.first==o.p_.first && p_.second==o.p_.second;
65  }
66 
67  bool operator!=(const KmerHashIterator& o) { return !this->operator==(o); }
68 
69  KmerHashIterator& operator++() {
70 
71  if (invalid) return *this;
72 
73  ++(p_.second); // advance to next k-mer
74 
75  if (p_.second >= n - k + 1 || s[p_.second + k - 1] == '\0') { // out of bounds
76 
77  invalid = true;
78  p_ = std::make_pair(0,-1);
79  return *this;
80  }
81 
82  char c, c_twin;
83 
84  int j = p_.second + k - 1;
85 
86  if (p_.second == 0){
87 
88  while (j >= p_.second) {
89 
90  c = s[j] & 0xDF; // mask lowercase bit
91 
92  if ((c == 'A') || (c == 'C') || (c == 'G') || (c == 'T')) j--;
93  else {
94 
95  p_.second += j - p_.second + 1;
96 
97  if (p_.second >= n - k + 1 || s[p_.second + k - 1] == '\0') { // out of bounds
98 
99  invalid = true;
100  p_ = std::make_pair(0,-1);
101  return *this;
102  }
103 
104  j = p_.second + k - 1;
105  }
106  }
107 
108  hf.init(&s[p_.second]);
109  }
110  else {
111 
112  c = s[j] & 0xDF; // mask lowercase bit
113 
114  if ((c == 'A') || (c == 'C') || (c == 'G') || (c == 'T')) hf.update(s[j-k], s[j]);
115  else {
116 
117  p_.second += k;
118 
119  if (p_.second >= n - k + 1 || s[p_.second + k - 1] == '\0') { // out of bounds
120 
121  invalid = true;
122  p_ = std::make_pair(0,-1);
123  return *this;
124  }
125 
126  j = p_.second + k - 1;
127 
128  while (j >= p_.second) {
129 
130  c = s[j] & 0xDF; // mask lowercase bit
131 
132  if ((c == 'A') || (c == 'C') || (c == 'G') || (c == 'T')) j--;
133  else {
134 
135  p_.second += j - p_.second + 1;
136 
137  if (p_.second >= n - k + 1 || s[p_.second + k - 1] == '\0') { // out of bounds
138 
139  invalid = true;
140  p_ = std::make_pair(0,-1);
141  return *this;
142  }
143 
144  j = p_.second + k - 1;
145  }
146  }
147 
148  hf.init(&s[p_.second]);
149  }
150  }
151 
152  p_.first = hf.hash();
153 
154  return *this;
155  }
156 
157  KmerHashIterator operator++(int) {
158 
159  KmerHashIterator tmp(*this);
160  operator++();
161 
162  return tmp;
163  }
164 
165  //Move iterator to next VALID position >= p_.second + length
166  KmerHashIterator& operator+=(const int length){
167 
168  size_t next_pos = p_.second + length;
169 
170  while (!invalid && p_.second < next_pos) operator++();
171 
172  return *this;
173  }
174 
175  std::pair<uint64_t, int>& operator*() { return p_; }
176 
177  std::pair<uint64_t, int>* operator->() { return &(operator*()); }
178 
179  const char *s; // K-mers are from a sequence s
180  int n; // Length of sequence s
181  int k; // Length of k-mers
182  HF hf; // Rolling hash function for k-mers of s
183  std::pair<uint64_t, int> p_; // <hash, position> current k-mer
184  bool invalid; // If sequence is invalid (iterating on k-mers out of bounds, etc.)
185 };
186 
187 #endif // BFG_KMER_ITERATOR_HPP
Definition: Kmer.hpp:31
Definition: KmerIterator.hpp:44
Definition: KmerIterator.hpp:15