BFGraph
kseq.h
1 /* The MIT License
2 
3  Copyright (c) 2008, 2009, 2011 Attractive Chaos <attractor@live.co.uk>
4 
5  Permission is hereby granted, free of charge, to any person obtaining
6  a copy of this software and associated documentation files (the
7  "Software"), to deal in the Software without restriction, including
8  without limitation the rights to use, copy, modify, merge, publish,
9  distribute, sublicense, and/or sell copies of the Software, and to
10  permit persons to whom the Software is furnished to do so, subject to
11  the following conditions:
12 
13  The above copyright notice and this permission notice shall be
14  included in all copies or substantial portions of the Software.
15 
16  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20  BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21  ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  SOFTWARE.
24 */
25 
26 /* Last Modified: 05MAR2012 */
27 
28 #ifndef AC_KSEQ_H
29 #define AC_KSEQ_H
30 
31 #include <ctype.h>
32 #include <string.h>
33 #include <stdlib.h>
34 
35 #define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
36 #define KS_SEP_TAB 1 // isspace() && !' '
37 #define KS_SEP_LINE 2 // line separator: "\n" (Unix) or "\r\n" (Windows)
38 #define KS_SEP_MAX 2
39 
40 #define __KS_TYPE(type_t) \
41  typedef struct __kstream_t { \
42  unsigned char *buf; \
43  int begin, end, is_eof; \
44  type_t f; \
45  } kstream_t;
46 
47 #define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
48 #define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
49 
50 #define __KS_BASIC(type_t, __bufsize) \
51  static inline kstream_t *ks_init(type_t f) \
52  { \
53  kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
54  ks->f = f; \
55  ks->buf = (unsigned char*)malloc(__bufsize); \
56  return ks; \
57  } \
58  static inline void ks_destroy(kstream_t *ks) \
59  { \
60  if (ks) { \
61  free(ks->buf); \
62  free(ks); \
63  } \
64  }
65 
66 #define __KS_GETC(__read, __bufsize) \
67  static inline int ks_getc(kstream_t *ks) \
68  { \
69  if (ks->is_eof && ks->begin >= ks->end) return -1; \
70  if (ks->begin >= ks->end) { \
71  ks->begin = 0; \
72  ks->end = __read(ks->f, ks->buf, __bufsize); \
73  if (ks->end == 0) { ks->is_eof = 1; return -1;} \
74  } \
75  return (int)ks->buf[ks->begin++]; \
76  }
77 
78 #ifndef KSTRING_T
79 #define KSTRING_T kstring_t
80 typedef struct __kstring_t {
81  size_t l, m;
82  char *s;
83 } kstring_t;
84 #endif
85 
86 #ifndef kroundup32
87 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
88 #endif
89 
90 #define __KS_GETUNTIL(__read, __bufsize) \
91  static int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append) \
92  { \
93  int gotany = 0; \
94  if (dret) *dret = 0; \
95  str->l = append? str->l : 0; \
96  for (;;) { \
97  int i; \
98  if (ks->begin >= ks->end) { \
99  if (!ks->is_eof) { \
100  ks->begin = 0; \
101  ks->end = __read(ks->f, ks->buf, __bufsize); \
102  if (ks->end == 0) { ks->is_eof = 1; break; } \
103  } else break; \
104  } \
105  if (delimiter == KS_SEP_LINE) { \
106  for (i = ks->begin; i < ks->end; ++i) \
107  if (ks->buf[i] == '\n') break; \
108  } else if (delimiter > KS_SEP_MAX) { \
109  for (i = ks->begin; i < ks->end; ++i) \
110  if (ks->buf[i] == delimiter) break; \
111  } else if (delimiter == KS_SEP_SPACE) { \
112  for (i = ks->begin; i < ks->end; ++i) \
113  if (isspace(ks->buf[i])) break; \
114  } else if (delimiter == KS_SEP_TAB) { \
115  for (i = ks->begin; i < ks->end; ++i) \
116  if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
117  } else i = 0; /* never come to here! */ \
118  if (str->m - str->l < (size_t)(i - ks->begin + 1)) { \
119  str->m = str->l + (i - ks->begin) + 1; \
120  kroundup32(str->m); \
121  str->s = (char*)realloc(str->s, str->m); \
122  } \
123  gotany = 1; \
124  memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
125  str->l = str->l + (i - ks->begin); \
126  ks->begin = i + 1; \
127  if (i < ks->end) { \
128  if (dret) *dret = ks->buf[i]; \
129  break; \
130  } \
131  } \
132  if (!gotany && ks_eof(ks)) return -1; \
133  if (str->s == 0) { \
134  str->m = 1; \
135  str->s = (char*)calloc(1, 1); \
136  } else if (delimiter == KS_SEP_LINE && str->l > 1 && str->s[str->l-1] == '\r') --str->l; \
137  str->s[str->l] = '\0'; \
138  return str->l; \
139  } \
140  static inline int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
141  { return ks_getuntil2(ks, delimiter, str, dret, 0); }
142 
143 #define KSTREAM_INIT(type_t, __read, __bufsize) \
144  __KS_TYPE(type_t) \
145  __KS_BASIC(type_t, __bufsize) \
146  __KS_GETC(__read, __bufsize) \
147  __KS_GETUNTIL(__read, __bufsize)
148 
149 #define kseq_rewind(ks) ((ks)->last_char = (ks)->f->is_eof = (ks)->f->begin = (ks)->f->end = 0)
150 
151 #define __KSEQ_BASIC(SCOPE, type_t) \
152  SCOPE kseq_t *kseq_init(type_t fd) \
153  { \
154  kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
155  s->f = ks_init(fd); \
156  return s; \
157  } \
158  SCOPE void kseq_destroy(kseq_t *ks) \
159  { \
160  if (!ks) return; \
161  free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \
162  ks_destroy(ks->f); \
163  free(ks); \
164  }
165 
166 /* Return value:
167  >=0 length of the sequence (normal)
168  -1 end-of-file
169  -2 truncated quality string
170  */
171 #define __KSEQ_READ(SCOPE) \
172  SCOPE int kseq_read(kseq_t *seq) \
173  { \
174  int c; \
175  kstream_t *ks = seq->f; \
176  if (seq->last_char == 0) { /* then jump to the next header line */ \
177  while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \
178  if (c == -1) return -1; /* end of file */ \
179  seq->last_char = c; \
180  } /* else: the first header char has been read in the previous call */ \
181  seq->comment.l = seq->seq.l = seq->qual.l = 0; /* reset all members */ \
182  if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; /* normal exit: EOF */ \
183  if (c != '\n') ks_getuntil(ks, KS_SEP_LINE, &seq->comment, 0); /* read FASTA/Q comment */ \
184  if (seq->seq.s == 0) { /* we can do this in the loop below, but that is slower */ \
185  seq->seq.m = 256; \
186  seq->seq.s = (char*)malloc(seq->seq.m); \
187  } \
188  while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
189  if (c == '\n') continue; /* skip empty lines */ \
190  seq->seq.s[seq->seq.l++] = c; /* this is safe: we always have enough space for 1 char */ \
191  ks_getuntil2(ks, KS_SEP_LINE, &seq->seq, 0, 1); /* read the rest of the line */ \
192  } \
193  if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
194  if (seq->seq.l + 1 >= seq->seq.m) { /* seq->seq.s[seq->seq.l] below may be out of boundary */ \
195  seq->seq.m = seq->seq.l + 2; \
196  kroundup32(seq->seq.m); /* rounded to the next closest 2^k */ \
197  seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
198  } \
199  seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \
200  if (c != '+') return seq->seq.l; /* FASTA */ \
201  if (seq->qual.m < seq->seq.m) { /* allocate memory for qual in case insufficient */ \
202  seq->qual.m = seq->seq.m; \
203  seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \
204  } \
205  while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
206  if (c == -1) return -2; /* error: no quality string */ \
207  while (ks_getuntil2(ks, KS_SEP_LINE, &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l); \
208  seq->last_char = 0; /* we have not come to the next header line */ \
209  if (seq->seq.l != seq->qual.l) return -2; /* error: qual string is of a different length */ \
210  return seq->seq.l; \
211  }
212 
213 #define __KSEQ_TYPE(type_t) \
214  typedef struct { \
215  kstring_t name, comment, seq, qual; \
216  int last_char; \
217  kstream_t *f; \
218  } kseq_t;
219 
220 #define KSEQ_INIT2(SCOPE, type_t, __read) \
221  KSTREAM_INIT(type_t, __read, 16384) \
222  __KSEQ_TYPE(type_t) \
223  __KSEQ_BASIC(SCOPE, type_t) \
224  __KSEQ_READ(SCOPE)
225 
226 #define KSEQ_INIT(type_t, __read) KSEQ_INIT2(static, type_t, __read)
227 
228 #define KSEQ_DECLARE(type_t) \
229  __KS_TYPE(type_t) \
230  __KSEQ_TYPE(type_t) \
231  extern kseq_t *kseq_init(type_t fd); \
232  void kseq_destroy(kseq_t *ks); \
233  int kseq_read(kseq_t *seq);
234 
235 #endif
Definition: kseq.h:80