Bifrost
ColoredCDBG.hpp
Go to the documentation of this file.
1 #ifndef BFG_COLOREDCDBG_HPP
2 #define BFG_COLOREDCDBG_HPP
3 
4 #include <iostream>
5 #include <random>
6 #include <unordered_map>
7 #include <unordered_set>
8 
9 #include "CompactedDBG.hpp"
10 #include "DataManager.hpp"
11 
12 #include "getRSS.h"
13 
31 
33 
35 
36  CCDBG_Build_opt() : outputColors(true) {}
37 };
38 
39 template<typename U> using UnitigColorMap = UnitigMap<DataAccessor<U>, DataStorage<U>>;
40 template<typename U> using const_UnitigColorMap = const_UnitigMap<DataAccessor<U>, DataStorage<U>>;
41 
59 template<typename Unitig_data_t> //Curiously Recurring Template Pattern (CRTP)
60 class CCDBG_Data_t {
61 
62  typedef Unitig_data_t U;
63 
64  public:
65 
72  void clear(const UnitigColorMap<U>& um_dest){}
73 
88  void concat(const UnitigColorMap<U>& um_dest, const UnitigColorMap<U>& um_src){}
89 
100  void merge(const UnitigColorMap<U>& um_dest, const const_UnitigColorMap<U>& um_src){}
101 
117  void extract(const UnitigColors* uc_dest, const UnitigColorMap<U>& um_src, const bool last_extraction){}
118 
126  string serialize(const const_UnitigColorMap<U>& um_src) const {
127 
128  return string();
129  }
130 };
131 
150 template<typename Unitig_data_t = void>
151 class ColoredCDBG : public CompactedDBG<DataAccessor<Unitig_data_t>, DataStorage<Unitig_data_t>> {
152 
153  static_assert(is_void<Unitig_data_t>::value || is_base_of<CCDBG_Data_t<Unitig_data_t>, Unitig_data_t>::value,
154  "Type Unitig_data_t of data associated with vertices of class ColoredCDBG<Unitig_data_t> must "
155  " be void (no data) or a class extending class CCDBG_Data_t");
156 
157  typedef Unitig_data_t U;
158 
159  template<typename U> friend class DataAccessor;
160 
161  public:
162 
167  ColoredCDBG(int kmer_length = DEFAULT_K, int minimizer_length = DEFAULT_G);
168 
174  ColoredCDBG(const ColoredCDBG& o);
175 
182 
189  ColoredCDBG& operator=(const ColoredCDBG& o);
190 
197  ColoredCDBG& operator=(ColoredCDBG&& o);
198 
203  bool operator==(const ColoredCDBG& o) const;
204 
209  inline bool operator!=(const ColoredCDBG& o) const;
210 
223  ColoredCDBG& operator+=(const ColoredCDBG& o);
224 
227  void clear();
228 
234  bool buildGraph(const CCDBG_Build_opt& opt);
235 
241  bool buildColors(const CCDBG_Build_opt& opt);
242 
251  bool write(const string& prefix_output_filename, const size_t nb_threads = 1, const bool verbose = false) const;
252 
262  bool read(const string& input_graph_filename, const string& input_colors_filename, const size_t nb_threads = 1, const bool verbose = false);
263 
276  bool merge(const ColoredCDBG& o, const size_t nb_threads = 1, const bool verbose = false);
277 
292  bool merge(ColoredCDBG&& o, const size_t nb_threads = 1, const bool verbose = false);
293 
304  bool merge(const vector<ColoredCDBG>& v, const size_t nb_threads = 1, const bool verbose = false);
305 
318  bool merge(vector<ColoredCDBG>&& v, const size_t nb_threads = 1, const bool verbose = false);
319 
324  string getColorName (const size_t color_id) const;
325 
330  vector<string> getColorNames() const;
331 
335  inline size_t getNbColors() const { return this->getData()->getNbColors(); }
336 
337  void checkColors(const vector<string>& filename_seq_in) const;
338 
339  private:
340 
341  void initUnitigColors(const CCDBG_Build_opt& opt, const size_t max_nb_hash = 31);
342  void buildUnitigColors(const size_t nb_threads);
343  //void buildUnitigColors2(const size_t nb_threads);
344 
345  void resizeDataUC(const size_t sz, const size_t nb_threads = 1, const size_t max_nb_hash = 31);
346 
347  bool invalid;
348 };
349 
350 #include "ColoredCDBG.tcc"
351 
352 #endif
This structure inherits from CDBG_Build_opt and introduces only a few new members which are color-rel...
Definition: ColoredCDBG.hpp:30
Represent the k-mer color sets of a unitig.
Definition: ColorSet.hpp:21
vector< string > filename_seq_in
Vector of strings, each string is the name of a FASTA/FASTQ/GFA file to use for the graph constructio...
Definition: CompactedDBG.hpp:138
void clear(const UnitigColorMap< U > &um)
Clear the colors and data associated with a colored unitig.
Most members of this structure are parameters for CompactedDBG<U, G>::build(), except for: ...
Definition: CompactedDBG.hpp:124
Represent a Compacted de Bruijn graph.
Definition: CompactedDBG.hpp:297
Represent a Colored and Compacted de Bruijn graph.
Definition: ColoredCDBG.hpp:151
void extract(const UnitigColors *uc_dest, const UnitigColorMap< U > &um_src, const bool last_extraction)
Extract data corresponding to a sub-unitig of a unitig A.
Definition: ColoredCDBG.hpp:117
bool verbose
Print information messages during execution if true.
Definition: CompactedDBG.hpp:126
void concat(const UnitigColorMap< U > &um_dest, const UnitigColorMap< U > &um_src)
Join data of two unitigs which are going to be concatenated.
Definition: ColoredCDBG.hpp:88
Definition: ColorSet.hpp:16
Contain all the information for the mapping of a k-mer or a sequence to a unitig of a Compacted de Br...
Definition: NeighborIterator.hpp:12
string filename_colors_in
String containing the name of a Bifrost color file to read in ColoredCDBG<U>::read().
Definition: ColoredCDBG.hpp:32
Interface to access the colors and the data associated with a unitig of a ColoredCDBG.
Definition: ColorSet.hpp:15
string serialize(const const_UnitigColorMap< U > &um_src) const
Serialize the data to a GFA-formatted string.
Definition: ColoredCDBG.hpp:126
If data are to be associated with the unitigs of the colored and compacted de Bruijn graph...
Definition: ColoredCDBG.hpp:60
bool outputColors
Boolean indicating if the graph should be colored or not.
Definition: ColoredCDBG.hpp:34
void merge(const UnitigColorMap< U > &um_dest, const const_UnitigColorMap< U > &um_src)
Merge the data of a sub-unitig B to the data of a sub-unitig A.
Definition: ColoredCDBG.hpp:100
void merge(const UnitigColorMap< U > &um_dest, const const_UnitigColorMap< U > &um_src)
Merge the data and colors of a sub-unitig B to the data and colors of a sub-unitig A...
Interface for the Compacted de Bruijn graph API.
const U * getData(const const_UnitigColorMap< U > &um) const
Get the unitig data.
void clear(const UnitigColorMap< U > &um_dest)
Clear the data associated with a unitig.
Definition: ColoredCDBG.hpp:72
size_t nb_threads
Number of threads to use for building the graph.
Definition: CompactedDBG.hpp:127
size_t getNbColors() const
Get the number of colors in the graph.
Definition: ColoredCDBG.hpp:335