Codebase list seqtk / ba53853
New upstream version 1.3 Andreas Tille 5 years ago
6 changed file(s) with 249 addition(s) and 694 deletion(s). Raw diff Collapse all Expand all
00 CC=gcc
11 CFLAGS=-g -Wall -O2 -Wno-unused-function
2 BINDIR=/usr/local/bin
23
34 all:seqtk
45
56 seqtk:seqtk.c khash.h kseq.h
67 $(CC) $(CFLAGS) seqtk.c -o $@ -lz -lm
78
9 install:all
10 install seqtk $(BINDIR)
11
812 clean:
913 rm -fr gmon.out *.o ext/*.o a.out seqtk trimadap *~ *.a *.dSYM session*
+115
-107
kseq.h less more
2222 SOFTWARE.
2323 */
2424
25 /* Last Modified: 05MAR2012 */
25 /* Last Modified: 2017-02-11 */
2626
2727 #ifndef AC_KSEQ_H
2828 #define AC_KSEQ_H
3636 #define KS_SEP_LINE 2 // line separator: "\n" (Unix) or "\r\n" (Windows)
3737 #define KS_SEP_MAX 2
3838
39 #define __KS_TYPE(type_t) \
40 typedef struct __kstream_t { \
41 unsigned char *buf; \
42 int begin, end, is_eof; \
43 type_t f; \
39 #define __KS_TYPE(type_t) \
40 typedef struct __kstream_t { \
41 unsigned char *buf; \
42 int begin, end, is_eof; \
43 type_t f; \
4444 } kstream_t;
4545
46 #define ks_err(ks) ((ks)->end < 0)
4647 #define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
4748 #define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
4849
49 #define __KS_BASIC(type_t, __bufsize) \
50 static inline kstream_t *ks_init(type_t f) \
51 { \
52 kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
53 ks->f = f; \
54 ks->buf = (unsigned char*)malloc(__bufsize); \
55 return ks; \
56 } \
57 static inline void ks_destroy(kstream_t *ks) \
58 { \
59 if (ks) { \
60 free(ks->buf); \
61 free(ks); \
62 } \
63 }
64
65 #define __KS_GETC(__read, __bufsize) \
66 static inline int ks_getc(kstream_t *ks) \
67 { \
68 if (ks->is_eof && ks->begin >= ks->end) return -1; \
69 if (ks->begin >= ks->end) { \
70 ks->begin = 0; \
71 ks->end = __read(ks->f, ks->buf, __bufsize); \
72 if (ks->end == 0) { ks->is_eof = 1; return -1;} \
73 } \
74 return (int)ks->buf[ks->begin++]; \
50 #define __KS_BASIC(type_t, __bufsize) \
51 static inline kstream_t *ks_init(type_t f) \
52 { \
53 kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
54 ks->f = f; \
55 ks->buf = (unsigned char*)malloc(__bufsize); \
56 return ks; \
57 } \
58 static inline void ks_destroy(kstream_t *ks) \
59 { \
60 if (ks) { \
61 free(ks->buf); \
62 free(ks); \
63 } \
64 }
65
66 #define __KS_GETC(__read, __bufsize) \
67 static inline int ks_getc(kstream_t *ks) \
68 { \
69 if (ks_err(ks)) return -3; \
70 if (ks_eof(ks)) return -1; \
71 if (ks->begin >= ks->end) { \
72 ks->begin = 0; \
73 ks->end = __read(ks->f, ks->buf, __bufsize); \
74 if (ks->end == 0) { ks->is_eof = 1; return -1; } \
75 else if (ks->end < 0) { ks->is_eof = 1; return -3; } \
76 } \
77 return (int)ks->buf[ks->begin++]; \
7578 }
7679
7780 #ifndef KSTRING_T
8689 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
8790 #endif
8891
89 #define __KS_GETUNTIL(__read, __bufsize) \
92 #define __KS_GETUNTIL(__read, __bufsize) \
9093 static int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append) \
91 { \
92 int gotany = 0; \
93 if (dret) *dret = 0; \
94 str->l = append? str->l : 0; \
95 for (;;) { \
96 int i; \
97 if (ks->begin >= ks->end) { \
98 if (!ks->is_eof) { \
99 ks->begin = 0; \
100 ks->end = __read(ks->f, ks->buf, __bufsize); \
101 if (ks->end == 0) { ks->is_eof = 1; break; } \
102 } else break; \
103 } \
94 { \
95 int gotany = 0; \
96 if (dret) *dret = 0; \
97 str->l = append? str->l : 0; \
98 for (;;) { \
99 int i; \
100 if (ks_err(ks)) return -3; \
101 if (ks->begin >= ks->end) { \
102 if (!ks->is_eof) { \
103 ks->begin = 0; \
104 ks->end = __read(ks->f, ks->buf, __bufsize); \
105 if (ks->end == 0) { ks->is_eof = 1; break; } \
106 if (ks->end == -1) { ks->is_eof = 1; return -3; } \
107 } else break; \
108 } \
104109 if (delimiter == KS_SEP_LINE) { \
105110 for (i = ks->begin; i < ks->end; ++i) \
106111 if (ks->buf[i] == '\n') break; \
107 } else if (delimiter > KS_SEP_MAX) { \
108 for (i = ks->begin; i < ks->end; ++i) \
109 if (ks->buf[i] == delimiter) break; \
110 } else if (delimiter == KS_SEP_SPACE) { \
111 for (i = ks->begin; i < ks->end; ++i) \
112 if (isspace(ks->buf[i])) break; \
113 } else if (delimiter == KS_SEP_TAB) { \
114 for (i = ks->begin; i < ks->end; ++i) \
112 } else if (delimiter > KS_SEP_MAX) { \
113 for (i = ks->begin; i < ks->end; ++i) \
114 if (ks->buf[i] == delimiter) break; \
115 } else if (delimiter == KS_SEP_SPACE) { \
116 for (i = ks->begin; i < ks->end; ++i) \
117 if (isspace(ks->buf[i])) break; \
118 } else if (delimiter == KS_SEP_TAB) { \
119 for (i = ks->begin; i < ks->end; ++i) \
115120 if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
116 } else i = 0; /* never come to here! */ \
117 if (str->m - str->l < (size_t)(i - ks->begin + 1)) { \
118 str->m = str->l + (i - ks->begin) + 1; \
119 kroundup32(str->m); \
120 str->s = (char*)realloc(str->s, str->m); \
121 } \
122 gotany = 1; \
121 } else i = 0; /* never come to here! */ \
122 if (str->m - str->l < (size_t)(i - ks->begin + 1)) { \
123 str->m = str->l + (i - ks->begin) + 1; \
124 kroundup32(str->m); \
125 str->s = (char*)realloc(str->s, str->m); \
126 } \
127 gotany = 1; \
123128 memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
124 str->l = str->l + (i - ks->begin); \
125 ks->begin = i + 1; \
126 if (i < ks->end) { \
127 if (dret) *dret = ks->buf[i]; \
128 break; \
129 } \
130 } \
131 if (!gotany && ks_eof(ks)) return -1; \
132 if (str->s == 0) { \
133 str->m = 1; \
134 str->s = (char*)calloc(1, 1); \
129 str->l = str->l + (i - ks->begin); \
130 ks->begin = i + 1; \
131 if (i < ks->end) { \
132 if (dret) *dret = ks->buf[i]; \
133 break; \
134 } \
135 } \
136 if (!gotany && ks_eof(ks)) return -1; \
137 if (str->s == 0) { \
138 str->m = 1; \
139 str->s = (char*)calloc(1, 1); \
135140 } else if (delimiter == KS_SEP_LINE && str->l > 1 && str->s[str->l-1] == '\r') --str->l; \
136 str->s[str->l] = '\0'; \
137 return str->l; \
141 str->s[str->l] = '\0'; \
142 return str->l; \
138143 } \
139144 static inline int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
140145 { return ks_getuntil2(ks, delimiter, str, dret, 0); }
141146
142147 #define KSTREAM_INIT(type_t, __read, __bufsize) \
143 __KS_TYPE(type_t) \
144 __KS_BASIC(type_t, __bufsize) \
145 __KS_GETC(__read, __bufsize) \
148 __KS_TYPE(type_t) \
149 __KS_BASIC(type_t, __bufsize) \
150 __KS_GETC(__read, __bufsize) \
146151 __KS_GETUNTIL(__read, __bufsize)
147152
148153 #define kseq_rewind(ks) ((ks)->last_char = (ks)->f->is_eof = (ks)->f->begin = (ks)->f->end = 0)
149154
150 #define __KSEQ_BASIC(SCOPE, type_t) \
151 SCOPE kseq_t *kseq_init(type_t fd) \
152 { \
153 kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
154 s->f = ks_init(fd); \
155 return s; \
156 } \
157 SCOPE void kseq_destroy(kseq_t *ks) \
158 { \
159 if (!ks) return; \
155 #define __KSEQ_BASIC(SCOPE, type_t) \
156 SCOPE kseq_t *kseq_init(type_t fd) \
157 { \
158 kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
159 s->f = ks_init(fd); \
160 return s; \
161 } \
162 SCOPE void kseq_destroy(kseq_t *ks) \
163 { \
164 if (!ks) return; \
160165 free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \
161 ks_destroy(ks->f); \
162 free(ks); \
166 ks_destroy(ks->f); \
167 free(ks); \
163168 }
164169
165170 /* Return value:
166171 >=0 length of the sequence (normal)
167172 -1 end-of-file
168173 -2 truncated quality string
174 -3 error reading stream
169175 */
170176 #define __KSEQ_READ(SCOPE) \
171177 SCOPE int kseq_read(kseq_t *seq) \
172178 { \
173 int c; \
179 int c,r; \
174180 kstream_t *ks = seq->f; \
175181 if (seq->last_char == 0) { /* then jump to the next header line */ \
176 while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \
177 if (c == -1) return -1; /* end of file */ \
182 while ((c = ks_getc(ks)) >= 0 && c != '>' && c != '@'); \
183 if (c < 0) return c; /* end of file or error*/ \
178184 seq->last_char = c; \
179185 } /* else: the first header char has been read in the previous call */ \
180186 seq->comment.l = seq->seq.l = seq->qual.l = 0; /* reset all members */ \
181 if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; /* normal exit: EOF */ \
187 if ((r=ks_getuntil(ks, 0, &seq->name, &c)) < 0) return r; /* normal exit: EOF or error */ \
182188 if (c != '\n') ks_getuntil(ks, KS_SEP_LINE, &seq->comment, 0); /* read FASTA/Q comment */ \
183189 if (seq->seq.s == 0) { /* we can do this in the loop below, but that is slower */ \
184190 seq->seq.m = 256; \
185191 seq->seq.s = (char*)malloc(seq->seq.m); \
186192 } \
187 while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
193 while ((c = ks_getc(ks)) >= 0 && c != '>' && c != '+' && c != '@') { \
188194 if (c == '\n') continue; /* skip empty lines */ \
189195 seq->seq.s[seq->seq.l++] = c; /* this is safe: we always have enough space for 1 char */ \
190196 ks_getuntil2(ks, KS_SEP_LINE, &seq->seq, 0, 1); /* read the rest of the line */ \
191197 } \
192 if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
198 if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
193199 if (seq->seq.l + 1 >= seq->seq.m) { /* seq->seq.s[seq->seq.l] below may be out of boundary */ \
194200 seq->seq.m = seq->seq.l + 2; \
195201 kroundup32(seq->seq.m); /* rounded to the next closest 2^k */ \
196202 seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
197203 } \
198204 seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \
199 if (c != '+') return seq->seq.l; /* FASTA */ \
205 seq->is_fastq = (c == '+'); \
206 if (!seq->is_fastq) return seq->seq.l; /* FASTA */ \
200207 if (seq->qual.m < seq->seq.m) { /* allocate memory for qual in case insufficient */ \
201208 seq->qual.m = seq->seq.m; \
202209 seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \
203210 } \
204 while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
211 while ((c = ks_getc(ks)) >= 0 && c != '\n'); /* skip the rest of '+' line */ \
205212 if (c == -1) return -2; /* error: no quality string */ \
206 while (ks_getuntil2(ks, KS_SEP_LINE, &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l); \
213 while ((c = ks_getuntil2(ks, KS_SEP_LINE, &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l)); \
214 if (c == -3) return -3; /* stream error */ \
207215 seq->last_char = 0; /* we have not come to the next header line */ \
208216 if (seq->seq.l != seq->qual.l) return -2; /* error: qual string is of a different length */ \
209217 return seq->seq.l; \
210218 }
211219
212 #define __KSEQ_TYPE(type_t) \
213 typedef struct { \
214 kstring_t name, comment, seq, qual; \
215 int last_char; \
216 kstream_t *f; \
220 #define __KSEQ_TYPE(type_t) \
221 typedef struct { \
222 kstring_t name, comment, seq, qual; \
223 int last_char, is_fastq; \
224 kstream_t *f; \
217225 } kseq_t;
218226
219 #define KSEQ_INIT2(SCOPE, type_t, __read) \
220 KSTREAM_INIT(type_t, __read, 16384) \
221 __KSEQ_TYPE(type_t) \
222 __KSEQ_BASIC(SCOPE, type_t) \
227 #define KSEQ_INIT2(SCOPE, type_t, __read) \
228 KSTREAM_INIT(type_t, __read, 16384) \
229 __KSEQ_TYPE(type_t) \
230 __KSEQ_BASIC(SCOPE, type_t) \
223231 __KSEQ_READ(SCOPE)
224232
225233 #define KSEQ_INIT(type_t, __read) KSEQ_INIT2(static, type_t, __read)
+0
-298
ksort.h less more
0 /* The MIT License
1
2 Copyright (c) 2008, 2011 Attractive Chaos <attractor@live.co.uk>
3
4 Permission is hereby granted, free of charge, to any person obtaining
5 a copy of this software and associated documentation files (the
6 "Software"), to deal in the Software without restriction, including
7 without limitation the rights to use, copy, modify, merge, publish,
8 distribute, sublicense, and/or sell copies of the Software, and to
9 permit persons to whom the Software is furnished to do so, subject to
10 the following conditions:
11
12 The above copyright notice and this permission notice shall be
13 included in all copies or substantial portions of the Software.
14
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 SOFTWARE.
23 */
24
25 /*
26 2011-04-10 (0.1.6):
27
28 * Added sample
29
30 2011-03 (0.1.5):
31
32 * Added shuffle/permutation
33
34 2008-11-16 (0.1.4):
35
36 * Fixed a bug in introsort() that happens in rare cases.
37
38 2008-11-05 (0.1.3):
39
40 * Fixed a bug in introsort() for complex comparisons.
41
42 * Fixed a bug in mergesort(). The previous version is not stable.
43
44 2008-09-15 (0.1.2):
45
46 * Accelerated introsort. On my Mac (not on another Linux machine),
47 my implementation is as fast as std::sort on random input.
48
49 * Added combsort and in introsort, switch to combsort if the
50 recursion is too deep.
51
52 2008-09-13 (0.1.1):
53
54 * Added k-small algorithm
55
56 2008-09-05 (0.1.0):
57
58 * Initial version
59
60 */
61
62 #ifndef AC_KSORT_H
63 #define AC_KSORT_H
64
65 #include <stdlib.h>
66 #include <string.h>
67
68 typedef struct {
69 void *left, *right;
70 int depth;
71 } ks_isort_stack_t;
72
73 #define KSORT_SWAP(type_t, a, b) { register type_t t=(a); (a)=(b); (b)=t; }
74
75 #define KSORT_INIT(name, type_t, __sort_lt) \
76 void ks_mergesort_##name(size_t n, type_t array[], type_t temp[]) \
77 { \
78 type_t *a2[2], *a, *b; \
79 int curr, shift; \
80 \
81 a2[0] = array; \
82 a2[1] = temp? temp : (type_t*)malloc(sizeof(type_t) * n); \
83 for (curr = 0, shift = 0; (1ul<<shift) < n; ++shift) { \
84 a = a2[curr]; b = a2[1-curr]; \
85 if (shift == 0) { \
86 type_t *p = b, *i, *eb = a + n; \
87 for (i = a; i < eb; i += 2) { \
88 if (i == eb - 1) *p++ = *i; \
89 else { \
90 if (__sort_lt(*(i+1), *i)) { \
91 *p++ = *(i+1); *p++ = *i; \
92 } else { \
93 *p++ = *i; *p++ = *(i+1); \
94 } \
95 } \
96 } \
97 } else { \
98 size_t i, step = 1ul<<shift; \
99 for (i = 0; i < n; i += step<<1) { \
100 type_t *p, *j, *k, *ea, *eb; \
101 if (n < i + step) { \
102 ea = a + n; eb = a; \
103 } else { \
104 ea = a + i + step; \
105 eb = a + (n < i + (step<<1)? n : i + (step<<1)); \
106 } \
107 j = a + i; k = a + i + step; p = b + i; \
108 while (j < ea && k < eb) { \
109 if (__sort_lt(*k, *j)) *p++ = *k++; \
110 else *p++ = *j++; \
111 } \
112 while (j < ea) *p++ = *j++; \
113 while (k < eb) *p++ = *k++; \
114 } \
115 } \
116 curr = 1 - curr; \
117 } \
118 if (curr == 1) { \
119 type_t *p = a2[0], *i = a2[1], *eb = array + n; \
120 for (; p < eb; ++i) *p++ = *i; \
121 } \
122 if (temp == 0) free(a2[1]); \
123 } \
124 void ks_heapadjust_##name(size_t i, size_t n, type_t l[]) \
125 { \
126 size_t k = i; \
127 type_t tmp = l[i]; \
128 while ((k = (k << 1) + 1) < n) { \
129 if (k != n - 1 && __sort_lt(l[k], l[k+1])) ++k; \
130 if (__sort_lt(l[k], tmp)) break; \
131 l[i] = l[k]; i = k; \
132 } \
133 l[i] = tmp; \
134 } \
135 void ks_heapmake_##name(size_t lsize, type_t l[]) \
136 { \
137 size_t i; \
138 for (i = (lsize >> 1) - 1; i != (size_t)(-1); --i) \
139 ks_heapadjust_##name(i, lsize, l); \
140 } \
141 void ks_heapsort_##name(size_t lsize, type_t l[]) \
142 { \
143 size_t i; \
144 for (i = lsize - 1; i > 0; --i) { \
145 type_t tmp; \
146 tmp = *l; *l = l[i]; l[i] = tmp; ks_heapadjust_##name(0, i, l); \
147 } \
148 } \
149 static inline void __ks_insertsort_##name(type_t *s, type_t *t) \
150 { \
151 type_t *i, *j, swap_tmp; \
152 for (i = s + 1; i < t; ++i) \
153 for (j = i; j > s && __sort_lt(*j, *(j-1)); --j) { \
154 swap_tmp = *j; *j = *(j-1); *(j-1) = swap_tmp; \
155 } \
156 } \
157 void ks_combsort_##name(size_t n, type_t a[]) \
158 { \
159 const double shrink_factor = 1.2473309501039786540366528676643; \
160 int do_swap; \
161 size_t gap = n; \
162 type_t tmp, *i, *j; \
163 do { \
164 if (gap > 2) { \
165 gap = (size_t)(gap / shrink_factor); \
166 if (gap == 9 || gap == 10) gap = 11; \
167 } \
168 do_swap = 0; \
169 for (i = a; i < a + n - gap; ++i) { \
170 j = i + gap; \
171 if (__sort_lt(*j, *i)) { \
172 tmp = *i; *i = *j; *j = tmp; \
173 do_swap = 1; \
174 } \
175 } \
176 } while (do_swap || gap > 2); \
177 if (gap != 1) __ks_insertsort_##name(a, a + n); \
178 } \
179 void ks_introsort_##name(size_t n, type_t a[]) \
180 { \
181 int d; \
182 ks_isort_stack_t *top, *stack; \
183 type_t rp, swap_tmp; \
184 type_t *s, *t, *i, *j, *k; \
185 \
186 if (n < 1) return; \
187 else if (n == 2) { \
188 if (__sort_lt(a[1], a[0])) { swap_tmp = a[0]; a[0] = a[1]; a[1] = swap_tmp; } \
189 return; \
190 } \
191 for (d = 2; 1ul<<d < n; ++d); \
192 stack = (ks_isort_stack_t*)malloc(sizeof(ks_isort_stack_t) * ((sizeof(size_t)*d)+2)); \
193 top = stack; s = a; t = a + (n-1); d <<= 1; \
194 while (1) { \
195 if (s < t) { \
196 if (--d == 0) { \
197 ks_combsort_##name(t - s + 1, s); \
198 t = s; \
199 continue; \
200 } \
201 i = s; j = t; k = i + ((j-i)>>1) + 1; \
202 if (__sort_lt(*k, *i)) { \
203 if (__sort_lt(*k, *j)) k = j; \
204 } else k = __sort_lt(*j, *i)? i : j; \
205 rp = *k; \
206 if (k != t) { swap_tmp = *k; *k = *t; *t = swap_tmp; } \
207 for (;;) { \
208 do ++i; while (__sort_lt(*i, rp)); \
209 do --j; while (i <= j && __sort_lt(rp, *j)); \
210 if (j <= i) break; \
211 swap_tmp = *i; *i = *j; *j = swap_tmp; \
212 } \
213 swap_tmp = *i; *i = *t; *t = swap_tmp; \
214 if (i-s > t-i) { \
215 if (i-s > 16) { top->left = s; top->right = i-1; top->depth = d; ++top; } \
216 s = t-i > 16? i+1 : t; \
217 } else { \
218 if (t-i > 16) { top->left = i+1; top->right = t; top->depth = d; ++top; } \
219 t = i-s > 16? i-1 : s; \
220 } \
221 } else { \
222 if (top == stack) { \
223 free(stack); \
224 __ks_insertsort_##name(a, a+n); \
225 return; \
226 } else { --top; s = (type_t*)top->left; t = (type_t*)top->right; d = top->depth; } \
227 } \
228 } \
229 } \
230 /* This function is adapted from: http://ndevilla.free.fr/median/ */ \
231 /* 0 <= kk < n */ \
232 type_t ks_ksmall_##name(size_t n, type_t arr[], size_t kk) \
233 { \
234 type_t *low, *high, *k, *ll, *hh, *mid; \
235 low = arr; high = arr + n - 1; k = arr + kk; \
236 for (;;) { \
237 if (high <= low) return *k; \
238 if (high == low + 1) { \
239 if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \
240 return *k; \
241 } \
242 mid = low + (high - low) / 2; \
243 if (__sort_lt(*high, *mid)) KSORT_SWAP(type_t, *mid, *high); \
244 if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \
245 if (__sort_lt(*low, *mid)) KSORT_SWAP(type_t, *mid, *low); \
246 KSORT_SWAP(type_t, *mid, *(low+1)); \
247 ll = low + 1; hh = high; \
248 for (;;) { \
249 do ++ll; while (__sort_lt(*ll, *low)); \
250 do --hh; while (__sort_lt(*low, *hh)); \
251 if (hh < ll) break; \
252 KSORT_SWAP(type_t, *ll, *hh); \
253 } \
254 KSORT_SWAP(type_t, *low, *hh); \
255 if (hh <= k) low = ll; \
256 if (hh >= k) high = hh - 1; \
257 } \
258 } \
259 void ks_shuffle_##name(size_t n, type_t a[]) \
260 { \
261 int i, j; \
262 for (i = n; i > 1; --i) { \
263 type_t tmp; \
264 j = (int)(drand48() * i); \
265 tmp = a[j]; a[j] = a[i-1]; a[i-1] = tmp; \
266 } \
267 } \
268 void ks_sample_##name(size_t n, size_t r, type_t a[]) /* FIXME: NOT TESTED!!! */ \
269 { /* reference: http://code.activestate.com/recipes/272884/ */ \
270 int i, k, pop = n; \
271 for (i = (int)r, k = 0; i >= 0; --i) { \
272 double z = 1., x = drand48(); \
273 type_t tmp; \
274 while (x < z) z -= z * i / (pop--); \
275 if (k != n - pop - 1) tmp = a[k], a[k] = a[n-pop-1], a[n-pop-1] = tmp; \
276 ++k; \
277 } \
278 }
279
280 #define ks_mergesort(name, n, a, t) ks_mergesort_##name(n, a, t)
281 #define ks_introsort(name, n, a) ks_introsort_##name(n, a)
282 #define ks_combsort(name, n, a) ks_combsort_##name(n, a)
283 #define ks_heapsort(name, n, a) ks_heapsort_##name(n, a)
284 #define ks_heapmake(name, n, a) ks_heapmake_##name(n, a)
285 #define ks_heapadjust(name, i, n, a) ks_heapadjust_##name(i, n, a)
286 #define ks_ksmall(name, n, a, k) ks_ksmall_##name(n, a, k)
287 #define ks_shuffle(name, n, a) ks_shuffle_##name(n, a)
288
289 #define ks_lt_generic(a, b) ((a) < (b))
290 #define ks_lt_str(a, b) (strcmp((a), (b)) < 0)
291
292 typedef const char *ksstr_t;
293
294 #define KSORT_INIT_GENERIC(type_t) KSORT_INIT(type_t, type_t, ks_lt_generic)
295 #define KSORT_INIT_STR KSORT_INIT(str, ksstr_t, ks_lt_str)
296
297 #endif
+0
-169
kstring.h less more
0 /* The MIT License
1
2 Copyright (c) by Attractive Chaos <attractor@live.co.uk>
3
4 Permission is hereby granted, free of charge, to any person obtaining
5 a copy of this software and associated documentation files (the
6 "Software"), to deal in the Software without restriction, including
7 without limitation the rights to use, copy, modify, merge, publish,
8 distribute, sublicense, and/or sell copies of the Software, and to
9 permit persons to whom the Software is furnished to do so, subject to
10 the following conditions:
11
12 The above copyright notice and this permission notice shall be
13 included in all copies or substantial portions of the Software.
14
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 SOFTWARE.
23 */
24
25 #ifndef KSTRING_H
26 #define KSTRING_H
27
28 #include <stdlib.h>
29 #include <string.h>
30 #include <stdint.h>
31
32 #ifndef kroundup32
33 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
34 #endif
35
36 #ifndef KSTRING_T
37 #define KSTRING_T kstring_t
38 typedef struct __kstring_t {
39 uint32_t l, m;
40 char *s;
41 } kstring_t;
42 #endif
43
44 typedef struct {
45 uint64_t tab[4];
46 int sep, finished;
47 const char *p; // end of the current token
48 } ks_tokaux_t;
49
50 #ifdef __cplusplus
51 extern "C" {
52 #endif
53
54 int ksprintf(kstring_t *s, const char *fmt, ...);
55 int ksprintf_fast(kstring_t *s, const char *fmt, ...);
56 int ksplit_core(char *s, int delimiter, int *_max, int **_offsets);
57 char *kstrstr(const char *str, const char *pat, int **_prep);
58 char *kstrnstr(const char *str, const char *pat, int n, int **_prep);
59 void *kmemmem(const void *_str, int n, const void *_pat, int m, int **_prep);
60
61 /* kstrtok() is similar to strtok_r() except that str is not
62 * modified and both str and sep can be NULL. For efficiency, it is
63 * actually recommended to set both to NULL in the subsequent calls
64 * if sep is not changed. */
65 char *kstrtok(const char *str, const char *sep, ks_tokaux_t *aux);
66
67 #ifdef __cplusplus
68 }
69 #endif
70
71 static inline void ks_resize(kstring_t *s, size_t size)
72 {
73 if (s->m < size) {
74 s->m = size;
75 kroundup32(s->m);
76 s->s = (char*)realloc(s->s, s->m);
77 }
78 }
79
80 static inline int kputsn(const char *p, int l, kstring_t *s)
81 {
82 if (s->l + l + 1 >= s->m) {
83 s->m = s->l + l + 2;
84 kroundup32(s->m);
85 s->s = (char*)realloc(s->s, s->m);
86 }
87 memcpy(s->s + s->l, p, l);
88 s->l += l;
89 s->s[s->l] = 0;
90 return l;
91 }
92
93 static inline int kputs(const char *p, kstring_t *s)
94 {
95 return kputsn(p, strlen(p), s);
96 }
97
98 static inline int kputc(int c, kstring_t *s)
99 {
100 if (s->l + 1 >= s->m) {
101 s->m = s->l + 2;
102 kroundup32(s->m);
103 s->s = (char*)realloc(s->s, s->m);
104 }
105 s->s[s->l++] = c;
106 s->s[s->l] = 0;
107 return c;
108 }
109
110 static inline int kputw(int c, kstring_t *s)
111 {
112 char buf[16];
113 int l, x;
114 if (c == 0) return kputc('0', s);
115 for (l = 0, x = c < 0? -c : c; x > 0; x /= 10) buf[l++] = x%10 + '0';
116 if (c < 0) buf[l++] = '-';
117 if (s->l + l + 1 >= s->m) {
118 s->m = s->l + l + 2;
119 kroundup32(s->m);
120 s->s = (char*)realloc(s->s, s->m);
121 }
122 for (x = l - 1; x >= 0; --x) s->s[s->l++] = buf[x];
123 s->s[s->l] = 0;
124 return 0;
125 }
126
127 static inline int kputuw(unsigned c, kstring_t *s)
128 {
129 char buf[16];
130 int l, i;
131 unsigned x;
132 if (c == 0) return kputc('0', s);
133 for (l = 0, x = c; x > 0; x /= 10) buf[l++] = x%10 + '0';
134 if (s->l + l + 1 >= s->m) {
135 s->m = s->l + l + 2;
136 kroundup32(s->m);
137 s->s = (char*)realloc(s->s, s->m);
138 }
139 for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i];
140 s->s[s->l] = 0;
141 return 0;
142 }
143
144 static inline int kputl(long c, kstring_t *s)
145 {
146 char buf[32];
147 long l, x;
148 if (c == 0) return kputc('0', s);
149 for (l = 0, x = c < 0? -c : c; x > 0; x /= 10) buf[l++] = x%10 + '0';
150 if (c < 0) buf[l++] = '-';
151 if (s->l + l + 1 >= s->m) {
152 s->m = s->l + l + 2;
153 kroundup32(s->m);
154 s->s = (char*)realloc(s->s, s->m);
155 }
156 for (x = l - 1; x >= 0; --x) s->s[s->l++] = buf[x];
157 s->s[s->l] = 0;
158 return 0;
159 }
160
161 static inline int *ksplit(kstring_t *s, int delimiter, int *n)
162 {
163 int max = 0, *offsets = 0;
164 *n = ksplit_core(s->s, delimiter, &max, &offsets);
165 return offsets;
166 }
167
168 #endif
+0
-90
kvec.h less more
0 /* The MIT License
1
2 Copyright (c) 2008, by Attractive Chaos <attractor@live.co.uk>
3
4 Permission is hereby granted, free of charge, to any person obtaining
5 a copy of this software and associated documentation files (the
6 "Software"), to deal in the Software without restriction, including
7 without limitation the rights to use, copy, modify, merge, publish,
8 distribute, sublicense, and/or sell copies of the Software, and to
9 permit persons to whom the Software is furnished to do so, subject to
10 the following conditions:
11
12 The above copyright notice and this permission notice shall be
13 included in all copies or substantial portions of the Software.
14
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 SOFTWARE.
23 */
24
25 /*
26 An example:
27
28 #include "kvec.h"
29 int main() {
30 kvec_t(int) array;
31 kv_init(array);
32 kv_push(int, array, 10); // append
33 kv_a(int, array, 20) = 5; // dynamic
34 kv_A(array, 20) = 4; // static
35 kv_destroy(array);
36 return 0;
37 }
38 */
39
40 /*
41 2008-09-22 (0.1.0):
42
43 * The initial version.
44
45 */
46
47 #ifndef AC_KVEC_H
48 #define AC_KVEC_H
49
50 #include <stdlib.h>
51
52 #define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
53
54 #define kvec_t(type) struct { size_t n, m; type *a; }
55 #define kv_init(v) ((v).n = (v).m = 0, (v).a = 0)
56 #define kv_destroy(v) free((v).a)
57 #define kv_A(v, i) ((v).a[(i)])
58 #define kv_pop(v) ((v).a[--(v).n])
59 #define kv_size(v) ((v).n)
60 #define kv_max(v) ((v).m)
61
62 #define kv_resize(type, v, s) ((v).m = (s), (v).a = (type*)realloc((v).a, sizeof(type) * (v).m))
63
64 #define kv_copy(type, v1, v0) do { \
65 if ((v1).m < (v0).n) kv_resize(type, v1, (v0).n); \
66 (v1).n = (v0).n; \
67 memcpy((v1).a, (v0).a, sizeof(type) * (v0).n); \
68 } while (0) \
69
70 #define kv_push(type, v, x) do { \
71 if ((v).n == (v).m) { \
72 (v).m = (v).m? (v).m<<1 : 2; \
73 (v).a = (type*)realloc((v).a, sizeof(type) * (v).m); \
74 } \
75 (v).a[(v).n++] = (x); \
76 } while (0)
77
78 #define kv_pushp(type, v) (((v).n == (v).m)? \
79 ((v).m = ((v).m? (v).m<<1 : 2), \
80 (v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \
81 : 0), ((v).a + ((v).n++))
82
83 #define kv_a(type, v, i) (((v).m <= (size_t)(i)? \
84 ((v).m = (v).n = (i) + 1, kv_roundup32((v).m), \
85 (v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \
86 : (v).n <= (size_t)(i)? (v).n = (i) + 1 \
87 : 0), (v).a[(i)])
88
89 #endif
00 /* The MIT License
11
2 Copyright (c) 20082-2012 by Heng Li <lh3@me.com>
2 Copyright (c) 2008-2016 Broad Institute
33
44 Permission is hereby granted, free of charge, to any person obtaining
55 a copy of this software and associated documentation files (the
5656 int dret;
5757 kstring_t *str;
5858 // read the list
59 fp = strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
60 if (fp == 0) return 0;
61 ks = ks_init(fp);
5962 str = calloc(1, sizeof(kstring_t));
60 fp = strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
61 ks = ks_init(fp);
6263 while (ks_getuntil(ks, 0, str, &dret) >= 0) {
6364 int beg = -1, end = -1;
6465 reglist_t *p;
297298 fprintf(stderr, " -b INT trim INT bp from left (non-zero to disable -q/-l) [0]\n");
298299 fprintf(stderr, " -e INT trim INT bp from right (non-zero to disable -q/-l) [0]\n");
299300 fprintf(stderr, " -L INT retain at most INT bp from the 5'-end (non-zero to disable -q/-l) [0]\n");
301 fprintf(stderr, " -Q force FASTQ output\n");
300302 fprintf(stderr, "\n");
301303 return 1;
302304 }
303305 fp = strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r");
306 if (fp == 0) {
307 fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
308 return 1;
309 }
304310 seq = kseq_init(fp);
305311 for (i = 0; i < 128; ++i)
306312 q_int2real[i] = pow(10., -(i - 33) / 10.);
335341 end = beg + min_len;
336342 }
337343 } else beg = 0, end = seq->seq.l;
338 putchar(seq->qual.l? '@' : '>'); fputs(seq->name.s, stdout);
344 putchar(seq->is_fastq? '@' : '>'); fputs(seq->name.s, stdout);
339345 if (seq->comment.l) {
340346 putchar(' '); puts(seq->comment.s);
341347 } else putchar('\n');
342348 fwrite(seq->seq.s + beg, 1, end - beg, stdout); putchar('\n');
343 if (seq->qual.l) {
349 if (seq->is_fastq) {
344350 puts("+");
345351 fwrite(seq->qual.s + beg, 1, end - beg, stdout); putchar('\n');
346352 }
371377 return 1;
372378 }
373379 fp = optind < argc && strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r");
380 if (fp == 0) {
381 fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
382 return 1;
383 }
374384 seq = kseq_init(fp);
375385 dummy.n= dummy.m = 1; dummy.a = calloc(1, 8);
376386 while ((l = kseq_read(seq)) >= 0) {
436446 return 1;
437447 }
438448 fp = (strcmp(argv[1], "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(argv[1], "r");
449 if (fp == 0) {
450 fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
451 return 1;
452 }
439453 seq = kseq_init(fp);
440454 while ((l = kseq_read(seq)) >= 0) {
441455 int i;
488502 }
489503 }
490504 fp = (strcmp(argv[optind], "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(argv[optind], "r");
505 if (fp == 0) {
506 fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
507 return 1;
508 }
491509 seq = kseq_init(fp);
492510 win_step = win_size / n_start;
493511 buf = calloc(win_size, 1);
546564 return 1;
547565 }
548566 h = stk_reg_read(argv[optind+1]);
567 if (h == 0) {
568 fprintf(stderr, "[E::%s] failed to read the list of regions in file '%s'\n", __func__, argv[optind+1]);
569 return 1;
570 }
549571 // subseq
550572 fp = strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r");
573 if (fp == 0) {
574 fprintf(stderr, "[E::%s] failed to open the input file/stream\n", __func__);
575 return 1;
576 }
551577 seq = kseq_init(fp);
552578 while ((l = kseq_read(seq)) >= 0) {
553579 reglist_t *p;
624650 for (i = 0; i < 2; ++i) {
625651 fp[i] = strcmp(argv[optind+i], "-")? gzopen(argv[optind+i], "r") : gzdopen(fileno(stdin), "r");
626652 seq[i] = kseq_init(fp[i]);
653 }
654 if (fp[0] == 0 || fp[1] == 0) {
655 fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
656 return 1;
627657 }
628658 cnt[0] = cnt[1] = cnt[2] = cnt[3] = cnt[4] = 0;
629659 srand48(11);
706736 fp[i] = strcmp(argv[optind+i], "-")? gzopen(argv[optind+i], "r") : gzdopen(fileno(stdin), "r");
707737 seq[i] = kseq_init(fp[i]);
708738 }
739 if (fp[0] == 0 || fp[1] == 0) {
740 fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
741 return 1;
742 }
709743 while (kseq_read(seq[0]) >= 0) {
710744 int min_l, c[2];
711745 kseq_read(seq[1]);
745779 // read the list
746780 str = calloc(1, sizeof(kstring_t));
747781 fp = strcmp(argv[2], "-")? gzopen(argv[2], "r") : gzdopen(fileno(stdin), "r");
782 if (fp == 0) {
783 fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
784 return 1;
785 }
748786 ks = ks_init(fp);
749787 while (ks_getuntil(ks, 0, str, &dret) >= 0) {
750788 char *s = strdup(str->s);
774812 free(str->s); free(str);
775813 // mutfa
776814 fp = strcmp(argv[1], "-")? gzopen(argv[1], "r") : gzdopen(fileno(stdin), "r");
815 if (fp == 0) {
816 fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
817 return 1;
818 }
777819 seq = kseq_init(fp);
778820 while ((l = kseq_read(seq)) >= 0) {
779821 reglist_t *p;
816858 return 1;
817859 }
818860 fp = (strcmp(argv[1], "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(argv[1], "r");
861 if (fp == 0) {
862 fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
863 return 1;
864 }
819865 seq = kseq_init(fp);
820866 while ((l = kseq_read(seq)) >= 0) {
821867 for (i = 0; i < l; ++i) {
901947 return 1;
902948 }
903949 fp = (strcmp(argv[optind], "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(argv[optind], "r");
950 if (fp == 0) {
951 fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
952 return 1;
953 }
904954 ks = kseq_init(fp);
905955 while ((l = kseq_read(ks)) >= 0) {
906956 int k = 0, begin = 0, end = 0;
929979 }
930980 if (argc == optind + 2) min_len = atoi(argv[optind+1]);
931981 fp = (strcmp(argv[optind], "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(argv[optind], "r");
982 if (fp == 0) {
983 fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
984 return 1;
985 }
932986 ks = kseq_init(fp);
933987 while (kseq_read(ks) >= 0) {
934988 c = ks->seq.s[0]; l = 1; beg = 0;
9881042 return 1;
9891043 }
9901044 frac = atof(argv[optind+1]);
991 if (frac > 1.) num = (uint64_t)(frac + .499), frac = 0.;
1045 if (frac >= 1.0) num = (uint64_t)(frac + .499), frac = 0.;
9921046 else if (twopass) {
9931047 fprintf(stderr, "[W::%s] when sampling a fraction, option -2 is ignored.", __func__);
9941048 twopass = 0;
10051059 }
10061060
10071061 fp = strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r");
1062 if (fp == 0) {
1063 fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
1064 return 1;
1065 }
10081066 seq = kseq_init(fp);
10091067 n_seqs = 0;
10101068 while (kseq_read(seq) >= 0) {
10361094 buf = malloc(num * 8);
10371095 for (i = 0; i < num; ++i) buf[i] = UINT64_MAX;
10381096 fp = gzopen(argv[optind], "r");
1097 if (fp == 0) {
1098 fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
1099 return 1;
1100 }
10391101 seq = kseq_init(fp);
10401102 n_seqs = 0;
10411103 while (kseq_read(seq) >= 0) {
11161178 {
11171179 gzFile fp;
11181180 kseq_t *seq;
1119 int c, qual_thres = 0, flag = 0, qual_shift = 33, mask_chr = 0, min_len = 0, max_q = 255;
1181 int c, qual_thres = 0, flag = 0, qual_shift = 33, mask_chr = 0, min_len = 0, max_q = 255, fake_qual = -1;
11201182 unsigned i, line_len = 0;
11211183 int64_t n_seqs = 0;
11221184 double frac = 1.;
11231185 khash_t(reg) *h = 0;
11241186 krand_t *kr = 0;
11251187
1126 while ((c = getopt(argc, argv, "N12q:l:Q:aACrn:s:f:M:L:cVUX:S")) >= 0) {
1188 while ((c = getopt(argc, argv, "N12q:l:Q:aACrn:s:f:M:L:cVUX:SF:")) >= 0) {
11271189 switch (c) {
11281190 case 'a':
11291191 case 'A': flag |= 1; break;
11451207 case 'L': min_len = atoi(optarg); break;
11461208 case 's': kr = kr_srand(atol(optarg)); break;
11471209 case 'f': frac = atof(optarg); break;
1210 case 'F': fake_qual = *optarg; break;
11481211 }
11491212 }
11501213 if (kr == 0) kr = kr_srand(11);
11601223 fprintf(stderr, " -f FLOAT sample FLOAT fraction of sequences [1]\n");
11611224 fprintf(stderr, " -M FILE mask regions in BED or name list FILE [null]\n");
11621225 fprintf(stderr, " -L INT drop sequences with length shorter than INT [0]\n");
1226 fprintf(stderr, " -F CHAR fake FASTQ quality []\n");
11631227 fprintf(stderr, " -c mask complement region (effective with -M)\n");
11641228 fprintf(stderr, " -r reverse complement\n");
11651229 fprintf(stderr, " -A force FASTA output (discard quality)\n");
11761240 }
11771241 if (line_len == 0) line_len = UINT_MAX;
11781242 fp = optind < argc && strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r");
1243 if (fp == 0) {
1244 fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
1245 return 1;
1246 }
11791247 seq = kseq_init(fp);
11801248 qual_thres += qual_shift;
11811249 while (kseq_read(seq) >= 0) {
12141282 for (i = 0; i < seq->seq.l; ++i)
12151283 seq->seq.s[i] = toupper(seq->seq.s[i]);
12161284 if (flag & 1) seq->qual.l = 0; // option -a: fastq -> fasta
1285 else if (fake_qual >= 33 && fake_qual <= 127) {
1286 if (seq->qual.m < seq->seq.m) {
1287 seq->qual.m = seq->seq.m;
1288 seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m);
1289 }
1290 seq->qual.l = seq->seq.l;
1291 memset(seq->qual.s, fake_qual, seq->qual.l);
1292 seq->qual.s[seq->qual.l] = 0;
1293 }
12171294 if (flag & 2) seq->comment.l = 0; // option -C: drop fasta/q comments
12181295 if (h) stk_mask(seq, h, flag&8, mask_chr); // masking
12191296 if (flag & 4) { // option -r: reverse complement
12731350 q = (1.0f - frac) / frac;
12741351
12751352 fp = strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r");
1353 if (fp == 0) {
1354 fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
1355 return 1;
1356 }
12761357 seq = kseq_init(fp);
12771358 while (kseq_read(seq) >= 0) {
12781359 int i, start = 0, max_i = 0, n_hits = 0, start_hits = 0, max_hits = 0;
13161397 }
13171398 fp1 = strcmp(argv[1], "-")? gzopen(argv[1], "r") : gzdopen(fileno(stdin), "r");
13181399 fp2 = strcmp(argv[2], "-")? gzopen(argv[2], "r") : gzdopen(fileno(stdin), "r");
1400 if (fp1 == 0 || fp2 == 0) {
1401 fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
1402 return 1;
1403 }
13191404 seq[0] = kseq_init(fp1);
13201405 seq[1] = kseq_init(fp2);
13211406 while (kseq_read(seq[0]) >= 0) {
13431428 return 1;
13441429 }
13451430 fp = argc > 1 && strcmp(argv[1], "-")? gzopen(argv[1], "r") : gzdopen(fileno(stdin), "r");
1431 if (fp == 0) {
1432 fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
1433 return 1;
1434 }
13461435 seq = kseq_init(fp);
13471436
13481437 memset(&last, 0, sizeof(kseq_t));
13801469 return 1;
13811470 }
13821471 fp = argc > 1 && strcmp(argv[1], "-")? gzopen(argv[1], "r") : gzdopen(fileno(stdin), "r");
1472 if (fp == 0) {
1473 fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
1474 return 1;
1475 }
13831476 seq = kseq_init(fp);
13841477 if (argc > 2) prefix = argv[2];
13851478
14431536 }
14441537
14451538 fp = argc == 2 || strcmp(argv[2], "-") == 0? gzdopen(fileno(stdin), "r") : gzopen(argv[2], "r");
1539 if (fp == 0) {
1540 fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
1541 return 1;
1542 }
14461543 ks = kseq_init(fp);
14471544 while (kseq_read(ks) >= 0) {
14481545 int k, x[2], cnt[2], cnt_nei[2], which;
15161613 return 1;
15171614 }
15181615 fp = (strcmp(argv[optind], "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(argv[optind], "r");
1616 if (fp == 0) {
1617 fprintf(stderr, "[E::%s] failed to open the input file/stream.\n", __func__);
1618 return 1;
1619 }
15191620 seq = kseq_init(fp);
15201621 for (k = 0; k <= 93; ++k)
15211622 perr[k] = pow(10., -.1 * k);
15731674 {
15741675 fprintf(stderr, "\n");
15751676 fprintf(stderr, "Usage: seqtk <command> <arguments>\n");
1576 fprintf(stderr, "Version: 1.2-r94\n\n");
1677 fprintf(stderr, "Version: 1.3-r106\n\n");
15771678 fprintf(stderr, "Command: seq common transformation of FASTA/Q\n");
15781679 fprintf(stderr, " comp get the nucleotide composition of FASTA/Q\n");
15791680 fprintf(stderr, " sample subsample sequences\n");
15981699 int main(int argc, char *argv[])
15991700 {
16001701 if (argc == 1) return usage();
1601 if (strcmp(argv[1], "comp") == 0) stk_comp(argc-1, argv+1);
1602 else if (strcmp(argv[1], "fqchk") == 0) stk_fqchk(argc-1, argv+1);
1603 else if (strcmp(argv[1], "hety") == 0) stk_hety(argc-1, argv+1);
1604 else if (strcmp(argv[1], "gc") == 0) stk_gc(argc-1, argv+1);
1605 else if (strcmp(argv[1], "subseq") == 0) stk_subseq(argc-1, argv+1);
1606 else if (strcmp(argv[1], "mutfa") == 0) stk_mutfa(argc-1, argv+1);
1607 else if (strcmp(argv[1], "mergefa") == 0) stk_mergefa(argc-1, argv+1);
1608 else if (strcmp(argv[1], "mergepe") == 0) stk_mergepe(argc-1, argv+1);
1609 else if (strcmp(argv[1], "dropse") == 0) stk_dropse(argc-1, argv+1);
1610 else if (strcmp(argv[1], "randbase") == 0) stk_randbase(argc-1, argv+1);
1611 else if (strcmp(argv[1], "cutN") == 0) stk_cutN(argc-1, argv+1);
1612 else if (strcmp(argv[1], "listhet") == 0) stk_listhet(argc-1, argv+1);
1613 else if (strcmp(argv[1], "famask") == 0) stk_famask(argc-1, argv+1);
1614 else if (strcmp(argv[1], "trimfq") == 0) stk_trimfq(argc-1, argv+1);
1615 else if (strcmp(argv[1], "hrun") == 0) stk_hrun(argc-1, argv+1);
1616 else if (strcmp(argv[1], "sample") == 0) stk_sample(argc-1, argv+1);
1617 else if (strcmp(argv[1], "seq") == 0) stk_seq(argc-1, argv+1);
1618 else if (strcmp(argv[1], "kfreq") == 0) stk_kfreq(argc-1, argv+1);
1619 else if (strcmp(argv[1], "rename") == 0) stk_rename(argc-1, argv+1);
1702 if (strcmp(argv[1], "comp") == 0) return stk_comp(argc-1, argv+1);
1703 else if (strcmp(argv[1], "fqchk") == 0) return stk_fqchk(argc-1, argv+1);
1704 else if (strcmp(argv[1], "hety") == 0) return stk_hety(argc-1, argv+1);
1705 else if (strcmp(argv[1], "gc") == 0) return stk_gc(argc-1, argv+1);
1706 else if (strcmp(argv[1], "subseq") == 0) return stk_subseq(argc-1, argv+1);
1707 else if (strcmp(argv[1], "mutfa") == 0) return stk_mutfa(argc-1, argv+1);
1708 else if (strcmp(argv[1], "mergefa") == 0) return stk_mergefa(argc-1, argv+1);
1709 else if (strcmp(argv[1], "mergepe") == 0) return stk_mergepe(argc-1, argv+1);
1710 else if (strcmp(argv[1], "dropse") == 0) return stk_dropse(argc-1, argv+1);
1711 else if (strcmp(argv[1], "randbase") == 0) return stk_randbase(argc-1, argv+1);
1712 else if (strcmp(argv[1], "cutN") == 0) return stk_cutN(argc-1, argv+1);
1713 else if (strcmp(argv[1], "listhet") == 0) return stk_listhet(argc-1, argv+1);
1714 else if (strcmp(argv[1], "famask") == 0) return stk_famask(argc-1, argv+1);
1715 else if (strcmp(argv[1], "trimfq") == 0) return stk_trimfq(argc-1, argv+1);
1716 else if (strcmp(argv[1], "hrun") == 0) return stk_hrun(argc-1, argv+1);
1717 else if (strcmp(argv[1], "sample") == 0) return stk_sample(argc-1, argv+1);
1718 else if (strcmp(argv[1], "seq") == 0) return stk_seq(argc-1, argv+1);
1719 else if (strcmp(argv[1], "kfreq") == 0) return stk_kfreq(argc-1, argv+1);
1720 else if (strcmp(argv[1], "rename") == 0) return stk_rename(argc-1, argv+1);
16201721 else {
16211722 fprintf(stderr, "[main] unrecognized command '%s'. Abort!\n", argv[1]);
16221723 return 1;
16231724 }
1624 return 0;
1625 }
1725 }