Codebase list miniasm / fe33263
Import upstream version 0.3+dfsg+git20191007.1.ce615d1 Debian Janitor 2 years ago
16 changed file(s) with 5339 addition(s) and 4 deletion(s). Raw diff Collapse all Expand all
77 |--:|:----:|:-----------------------------------------|
88 |1 |string|Query sequence name |
99 |2 |int |Query sequence length |
10 |3 |int |Query start (0-based) |
11 |4 |int |Query end (0-based) |
10 |3 |int |Query start (0-based; BED-like; closed) |
11 |4 |int |Query end (0-based; BED-like; open) |
1212 |5 |char |Relative strand: "+" or "-" |
1313 |6 |string|Target sequence name |
1414 |7 |int |Target sequence length |
66 # Install minimap and miniasm (requiring gcc and zlib)
77 git clone https://github.com/lh3/minimap2 && (cd minimap2 && make)
88 git clone https://github.com/lh3/miniasm && (cd miniasm && make)
9 # Overlap for PacBio reads (or use "-x map-ont" for nanopore read overlapping)
10 minimap2/minimap2 -x map-pb -t8 pb-reads.fq pb-reads.fq | gzip -1 > reads.paf.gz
9 # Overlap for PacBio reads (or use "-x ava-ont" for nanopore read overlapping)
10 minimap2/minimap2 -x ava-pb -t8 pb-reads.fq pb-reads.fq | gzip -1 > reads.paf.gz
1111 # Layout
1212 miniasm/miniasm -f reads.fq reads.paf.gz > reads.gfa
1313 ```
8181 ma_utg_t *p = &ug->u.a[i];
8282 sprintf(name, "utg%.6d%c", i + 1, "lc"[p->circ]);
8383 fprintf(fp, "S\t%s\t%s\tLN:i:%d\n", name, p->s? p->s : "*", p->len);
84 if (p->circ) { // make circularising links (both forward and reverse directions) for circular unitigs
85 fprintf(fp, "L\t%s\t+\t%s\t+\t0M\n", name, name);
86 fprintf(fp, "L\t%s\t-\t%s\t-\t0M\n", name, name);
87 }
8488 for (j = l = 0; j < p->n; l += (uint32_t)p->a[j++]) {
8589 uint32_t x = p->a[j]>>33;
8690 if (sub) fprintf(fp, "a\t%s\t%d\t%s:%d-%d\t%c\t%d\n", name, l, d->seq[x].name, sub[x].s + 1, sub[x].e, "+-"[p->a[j]>>32&1], (uint32_t)p->a[j]);
0 .SUFFIXES: .gp .tex .eps .pdf .eps.gz
1
2 .eps.pdf:
3 epstopdf --outfile $@ $<
4
5 .eps.gz.pdf:
6 gzip -dc $< | epstopdf --filter > $@
7
8 .pdf.eps:
9 pdftops -eps $< $@
10
11 all:miniasm.pdf
12
13 miniasm.pdf:miniasm.tex miniasm.bib
14 pdflatex miniasm; bibtex miniasm; pdflatex miniasm; pdflatex miniasm;
15
16 clean:
17 rm -fr *.toc *.aux *.bbl *.blg *.idx *.log *.out *~ miniasm.pdf
0 \newcommand\classname{bioinfo}
1 \newcommand\lastmodifieddate{2003/02/08}
2 \newcommand\versionnumber{0.1}
3
4 % Are we printing crop marks?
5 \newif\if@cropmarkson \@cropmarksontrue
6
7 \NeedsTeXFormat{LaTeX2e}[2001/06/01]
8 \ProvidesClass{\classname}[\lastmodifieddate\space\versionnumber]
9
10 \setlength{\paperheight}{11truein}
11 \setlength{\paperwidth}{8.5truein}
12
13 \newif\if@final
14
15 \DeclareOption{draft}{\PassOptionsToPackage{draft}{graphicx}}
16 \DeclareOption{a4paper}{\PassOptionsToPackage{a4}{crop}}
17 \DeclareOption{centre}{\PassOptionsToPackage{center}{crop}}
18 \DeclareOption{crop}{\PassOptionsToPackage{cam}{crop}\global\@cropmarksontrue}
19 \DeclareOption{nocrop}{\PassOptionsToPackage{off}{crop}\global\@cropmarksonfalse}
20 \DeclareOption{info}{\PassOptionsToPackage{info}{crop}}
21 \DeclareOption{noinfo}{\PassOptionsToPackage{noinfo}{crop}}
22 \DeclareOption{final}{\global\@finaltrue}
23
24 \ExecuteOptions{a4paper,nocrop,centre,info}
25
26 \ProcessOptions
27
28 % Load all necessary packages
29 \RequirePackage{inputenc,crop,graphicx,amsmath,array,color,amssymb,flushend,stfloats,amsthm,chngpage,times}
30 %\RequirePackage[LY1]{fontenc}
31 %\RequirePackage[LY1,mtbold]{mathtime}
32 \def\helvetica{\fontfamily{phv}\selectfont}
33 \def\helveticaitalic{\fontfamily{phv}\itshape\selectfont}
34 \def\helveticabold{\fontfamily{phv}\bfseries\selectfont}
35 \def\helveticabolditalic{\fontfamily{phv}\bfseries\itshape\selectfont}
36
37 % Not sure if needed.
38 \newcommand\@ptsize{0}
39
40 % Set twoside printing
41 \@twosidetrue
42
43 % Marginal notes are on the outside edge
44 \@mparswitchfalse
45
46 \reversemarginpar
47
48 \renewcommand\normalsize{%
49 \@setfontsize\normalsize{9}{11}%
50 \abovedisplayskip 10\p@ \@plus2\p@ \@minus5\p@
51 \abovedisplayshortskip \z@ \@plus3\p@
52 \belowdisplayshortskip 6\p@ \@plus3\p@ \@minus3\p@
53 \belowdisplayskip \abovedisplayskip
54 \let\@listi\@listI}
55 \normalsize
56 \let\@bls\baselineskip
57
58 \newcommand\small{%
59 \@setfontsize\small{9}{11}%
60 \abovedisplayskip 11\p@ minus 3\p@
61 \belowdisplayskip \abovedisplayskip
62 \abovedisplayshortskip \z@ plus 2\p@
63 \belowdisplayshortskip 4\p@ plus 2\p@ minus2\p@
64 \def\@listi{\topsep 4.5\p@ plus 2\p@ minus 1\p@
65 \itemsep \parsep
66 \topsep 4\p@ plus 2\p@ minus 2\p@}}
67
68 \newcommand\footnotesize{%
69 \@setfontsize\footnotesize{8}{10}%
70 \abovedisplayskip 6\p@ minus 3\p@
71 \belowdisplayskip\abovedisplayskip
72 \abovedisplayshortskip \z@ plus 3\p@
73 \belowdisplayshortskip 6\p@ plus 3\p@ minus 3\p@
74 \def\@listi{\topsep 3\p@ plus 1\p@ minus 1\p@
75 \parsep 2\p@ plus 1\p@ minus 1\p@\itemsep \parsep}}
76
77 \def\scriptsize{\@setfontsize\scriptsize{7pt}{9pt}}
78 \def\tiny{\@setfontsize\tiny{5pt}{7pt}}
79 \def\large{\@setfontsize\large{11.5pt}{12pt}}
80 \def\Large{\@setfontsize\Large{14pt}{16}}
81 \def\LARGE{\@setfontsize\LARGE{15pt}{17pt}}
82 \def\huge{\@setfontsize\huge{22pt}{22pt}}
83 \def\Huge{\@setfontsize\Huge{30pt}{30pt}}
84
85 \DeclareOldFontCommand{\rm}{\normalfont\rmfamily}{\mathrm}
86 \DeclareOldFontCommand{\sf}{\normalfont\sffamily}{\mathsf}
87 \DeclareOldFontCommand{\tt}{\normalfont\ttfamily}{\mathtt}
88 \DeclareOldFontCommand{\bf}{\normalfont\bfseries}{\mathbf}
89 \DeclareOldFontCommand{\it}{\normalfont\itshape}{\mathit}
90 \DeclareOldFontCommand{\sl}{\normalfont\slshape}{\@nomath\sl}
91 \DeclareOldFontCommand{\sc}{\normalfont\scshape}{\@nomath\sc}
92
93 % Line spacing
94 \setlength\lineskip{1\p@}
95 \setlength\normallineskip{1\p@}
96 \renewcommand\baselinestretch{}
97
98 % Paragraph dimensions and inter-para spacing
99 \setlength\parskip{0\p@}
100 \setlength\parindent{3mm}
101
102 % Set inter-para skips
103 \setlength\smallskipamount{3\p@ \@plus 1\p@ \@minus 1\p@}
104 \setlength\medskipamount{6\p@ \@plus 2\p@}
105 \setlength\bigskipamount{12\p@ \@plus 4\p@ \@minus 4\p@}
106
107 % Page break penalties
108 \@lowpenalty 51
109 \@medpenalty 151
110 \@highpenalty 301
111
112 % Disallow widows and orphans
113 \clubpenalty 10000
114 \widowpenalty 10000
115
116 % Disable page breaks before equations, allow pagebreaks after
117 % equations and discourage widow lines before equations.
118 \displaywidowpenalty 100
119 \predisplaypenalty 10000
120 \postdisplaypenalty 2500
121
122 % Allow breaking the page in the middle of a paragraph
123 \interlinepenalty 0
124
125 % Disallow breaking the page after a hyphenated line
126 \brokenpenalty 10000
127
128 % Hyphenation; don't split words into less than three characters
129 \lefthyphenmin=3
130 \righthyphenmin=3
131
132 %
133 % Set page layout dimensions
134 %
135 \setlength\headheight{16\p@} % height of running head
136 \setlength\topmargin{2.9pc} % head margin
137 \addtolength\topmargin{-1in} % subtract out the 1 inch driver margin
138
139 \setlength\topskip{10\p@} % height of first line of text
140 \setlength\headsep{19\p@} % space below running head --
141
142 \setlength\footskip{34\p@} % space above footer line
143 \setlength\maxdepth{.5\topskip} % pages can be short or deep by half a line?
144
145 \setlength\textwidth{42pc} % text measure excluding margins
146
147 \setlength\textheight{58\baselineskip} % 54 lines on a full page,
148 \addtolength\textheight{\topskip} % including the first
149 % line on the page
150
151 % Set the margins
152 \setlength\marginparsep{3\p@}
153 \setlength\marginparpush{3\p@}
154 \setlength\marginparwidth{35\p@}
155
156 \setlength\oddsidemargin{4.5pc}
157 \addtolength\oddsidemargin{-1in} % subtract out the 1 inch driver margin
158 \setlength\@tempdima{\paperwidth}
159 \addtolength\@tempdima{-\textwidth}
160 \addtolength\@tempdima{-4.5pc}
161 \setlength\evensidemargin{\@tempdima}
162 \addtolength\evensidemargin{-1in}
163
164 \setlength\columnsep{1.5pc} % space between columns for double-column text
165 \setlength\columnseprule{0\p@} % width of rule between two columns
166
167 % Footnotes
168 \setlength\footnotesep{9\p@} % space between footnotes
169 % space between text and footnote
170 \setlength{\skip\footins}{12\p@ \@plus 6\p@ \@minus 1\p@}
171
172 % Float placement parameters
173
174 % The total number of floats that can be allowed on a page.
175 \setcounter{totalnumber}{10}
176 % The maximum number of floats at the top and bottom of a page.
177 \setcounter{topnumber}{5}
178 \setcounter{bottomnumber}{5}
179 % The maximum part of the top or bottom of a text page that can be
180 % occupied by floats. This is set so that at least four lines of text
181 % fit on the page.
182 \renewcommand\topfraction{.9}
183 \renewcommand\bottomfraction{.9}
184 % The minimum amount of a text page that must be occupied by text.
185 % This should accomodate four lines of text.
186 \renewcommand\textfraction{.06}
187 % The minimum amount of a float page that must be occupied by floats.
188 \renewcommand\floatpagefraction{.94}
189
190 % The same parameters repeated for double column output
191 \renewcommand\dbltopfraction{.9}
192 \renewcommand\dblfloatpagefraction{.9}
193
194 % Space between floats
195 \setlength\floatsep {12\p@ \@plus 2\p@ \@minus 2\p@}
196 % Space between floats and text
197 \setlength\textfloatsep{20\p@ \@plus 2\p@ \@minus 4\p@}
198 % Space above and below an inline figure
199 \setlength\intextsep {18\p@ \@plus 2\p@ \@minus 2\p@}
200
201 % For double column floats
202 \setlength\dblfloatsep {12\p@ \@plus 2\p@ \@minus 2\p@}
203 \setlength\dbltextfloatsep{20\p@ \@plus 2\p@ \@minus 4\p@}
204
205 % Space left at top, bottom and inbetween floats on a float page.
206 \setlength\@fptop{0\p@} % no space above float page figures
207 \setlength\@fpsep{12\p@ \@plus 1fil}
208 \setlength\@fpbot{0\p@}
209
210 % The same for double column
211 \setlength\@dblfptop{0\p@}
212 \setlength\@dblfpsep{12\p@ \@plus 1fil}
213 \setlength\@dblfpbot{0\p@}
214
215 % Override settings in mathtime back to TeX defaults
216 \DeclareMathSizes{5} {5} {5} {5}
217 \DeclareMathSizes{6} {6} {5} {5}
218 \DeclareMathSizes{7} {7} {5} {5}
219 \DeclareMathSizes{8} {8} {6} {5}
220 \DeclareMathSizes{9} {9} {6.5} {5}
221 \DeclareMathSizes{10} {10} {7.5} {5}
222 \DeclareMathSizes{12} {12} {9} {7}
223
224 % Page styles
225 \def\ps@headings
226 {%
227 \def\@oddfoot{\vbox to 12.5\p@{\hbox{\rule{\textwidth}{0.5\p@}}\vss
228 \hbox to \textwidth{\hfill\helveticabold\small\thepage}%
229 }}%
230 \def\@evenfoot{\vbox to 12.5\p@{\rule{\textwidth}{0.5\p@}\vss
231 \hbox to \textwidth{\helveticabold\small\thepage\hfill}%
232 }}%
233 \def\@evenhead{\vbox{\hbox to \textwidth{\fontsize{8}{10}\selectfont
234 \helveticabold{\fontshape{it}\selectfont
235 \strut\leftmark}\hfill}\vspace{6.5\p@}\rule{\textwidth}{0.5\p@}}}%
236 \def\@oddhead{\vbox{\hbox to \textwidth{\hfill\fontsize{8}{10}\selectfont
237 \helveticabold{\fontshape{it}\selectfont\strut\rightmark}}%
238 \vspace{6.5\p@}\rule{\textwidth}{0.5\p@}}}%
239 \def\titlemark##1{\markboth{##1}{##1}}%
240 \def\authormark##1{\gdef\leftmark{##1}}%
241 }
242
243 \def\ps@opening
244 {%
245 \def\@oddfoot{\vbox to 13\p@{\hbox{\rule{\textwidth}{1\p@}}\vss
246 \hbox to \textwidth{\helvetica
247 \fontsize{7}{9}\fontshape{n}\selectfont%
248 \hfill\small\helveticabold\thepage}%
249 }}%
250 \def\@evenfoot{\vbox to 13\p@{\rule{\textwidth}\vss
251 \hbox to \textwidth{\helvetica\thepage\hfill
252 \fontsize{7}{9}\fontshape{n}\selectfont}%
253 }}%
254 \let\@evenhead\relax
255 \let\@oddhead\relax}
256
257 % Page range
258 \newif\iflastpagegiven \lastpagegivenfalse
259 \newcommand\firstpage[1]{%
260 \gdef\@firstpage{#1}%
261 \ifnum\@firstpage>\c@page
262 \setcounter{page}{#1}%
263 \ClassWarning{BIO}{Increasing pagenumber to \@firstpage}%
264 \else \ifnum\@firstpage<\c@page
265 \ClassWarning{BIO}{Firstpage lower than pagenumber}\fi\fi
266 \xdef\@firstpage{\the\c@page}%
267 }
268 \def\@firstpage{1}
269 \def\pagenumbering#1{%
270 \global\c@page \@ne
271 \gdef\thepage{\csname @#1\endcsname \c@page}%
272 \gdef\thefirstpage{%
273 \csname @#1\endcsname \@firstpage}%
274 \gdef\thelastpage{%
275 \csname @#1\endcsname \@lastpage}%
276 }
277
278 \newcommand\lastpage[1]{\xdef\@lastpage{#1}%
279 \global\lastpagegiventrue}
280 \def\@lastpage{0}
281 \def\setlastpage{\iflastpagegiven\else
282 \edef\@tempa{@lastpage@}%
283 \expandafter
284 \ifx \csname \@tempa \endcsname \relax
285 \gdef\@lastpage{0}%
286 \else
287 \xdef\@lastpage{\@nameuse{@lastpage@}}%
288 \fi
289 \fi }
290 \def\writelastpage{%
291 \iflastpagegiven \else
292 \immediate\write\@auxout%
293 {\string\global\string\@namedef{@lastpage@}{\the\c@page}}%
294 \fi
295 }
296 \def\thepagerange{%
297 \ifnum\@lastpage =0 {\ \bf ???} \else
298 \ifnum\@lastpage = \@firstpage \ \thefirstpage\else
299 \thefirstpage--\thelastpage \fi\fi}
300
301 \AtBeginDocument{\setlastpage
302 \pagenumbering{arabic}%
303 }
304 \AtEndDocument{%
305 \writelastpage
306 \if@final
307 \clearemptydoublepage
308 \else
309 \clearpage
310 \fi}
311
312 %
313 % Sectional units
314 %
315
316 % Counters
317 \newcounter{section}
318 \newcounter{subsection}[section]
319 \newcounter{subsubsection}[subsection]
320 \newcounter{paragraph}[subsubsection]
321 \newcounter{subparagraph}[paragraph]
322 \newcounter{figure}
323 \newcounter{table}
324
325 % Form of the numbers
326 \newcommand\thepage{\arabic{page}}
327 \renewcommand\thesection{\arabic{section}}
328 \renewcommand\thesubsection{{\thesection.\arabic{subsection}}}
329 \renewcommand\thesubsubsection{{\thesubsection.\arabic{subsubsection}}}
330 \renewcommand\theparagraph{\thesubsubsection.\arabic{paragraph}}
331 \renewcommand\thesubparagraph{\theparagraph.\arabic{subparagraph}}
332 \renewcommand\theequation{\arabic{equation}}
333
334 % Form of the words
335 \newcommand\contentsname{Contents}
336 \newcommand\listfigurename{List of Figures}
337 \newcommand\listtablename{List of Tables}
338 \newcommand\partname{Part}
339 \newcommand\appendixname{Appendix}
340 \newcommand\abstractname{Abstract}
341 \newcommand\refname{References}
342 \newcommand\bibname{References}
343 \newcommand\indexname{Index}
344 \newcommand\figurename{Fig.}
345 \newcommand\tablename{Table}
346
347 % Clearemptydoublepage should really clear the running heads too
348 \newcommand{\clearemptydoublepage}{\newpage{\pagestyle{empty}\cleardoublepage}}
349
350 % Frontmatter, mainmatter and backmatter
351
352 \newif\if@mainmatter \@mainmattertrue
353
354 \newcommand\frontmatter{%
355 \clearpage
356 \@mainmatterfalse
357 \pagenumbering{roman}}
358
359 \newcommand\mainmatter{%
360 \clearpage
361 \@mainmattertrue
362 \pagenumbering{arabic}}
363
364 \newcommand\backmatter{%
365 \clearpage
366 \@mainmatterfalse}
367
368 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%% TITLE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
369 \newlength{\dropfromtop}
370 \setlength{\dropfromtop}{\z@}
371
372 % Application Notes
373 \newif\if@appnotes
374 \newcommand{\application}{%
375 % \setlength{\dropfromtop}{-2.25pc}%
376 \global\@appnotestrue}
377
378 \long\def\title{\@ifnextchar[{\short@title}{\@@title}}
379 \def\short@title[#1]{\titlemark{#1}\@@@title}
380 \def\@@title#1{\authormark{#1}\@@@title{#1}}
381 \long\def\@@@title#1{\gdef\@title{#1}}
382
383 \long\def\author{\@ifnextchar[{\short@uthor}{\@uthor}}
384 \def\short@uthor[#1]{\authormark{#1}\@@author}
385 \def\@uthor#1{\authormark{#1}\@@author{#1}}
386 \long\def\@@author#1{\gdef\@author{#1}}
387
388 \def\vol#1{\global\def\@vol{#1}}
389 \def\issue#1{\global\def\@issue{#1}}
390 \def\address#1{\global\def\@issue{#1}}
391 \def\history#1{\global\def\@history{#1}}
392 \def\editor#1{\global\def\@editor{#1}}
393 \def\pubyear#1{\global\def\@pubyear{#1}}
394 \def\copyrightyear#1{\global\def\@copyrightyear{#1}}
395 \def\address#1{\global\def\@address{#1}}
396 \def\DOI#1{\global\def\@DOI{#1}}
397
398 \definecolor{gray}{cmyk}{0, 0, 0, 0.15}
399 \newlength{\extraspace}
400 \setlength{\extraspace}{\z@}
401
402 \newcommand\maketitle{\par
403 \begingroup
404 \renewcommand\thefootnote{\@fnsymbol\c@footnote}%
405 \def\@makefnmark{\rlap{\@textsuperscript{\normalfont\@thefnmark}}}%
406 \long\def\@makefntext##1{\parindent 3mm\noindent
407 % \@textsuperscript{\normalfont\@thefnmark}\raggedright##1}%
408 \@textsuperscript{\normalfont\@thefnmark}##1}%
409 \if@twocolumn
410 \ifnum \col@number=\@ne
411 \@maketitle
412 \else
413 \twocolumn[\@maketitle]%
414 \fi
415 \else
416 \newpage
417 \global\@topnum\z@ % Prevents figures from going at top of page.
418 \@maketitle
419 \fi
420 \thispagestyle{opening}\@thanks
421 \endgroup
422 \setcounter{footnote}{0}%
423 \global\let\thanks\relax
424 \global\let\maketitle\relax
425 \global\let\@maketitle\relax
426 \global\let\@address\@empty
427 \global\let\@history\@empty
428 \global\let\@editor\@empty
429 \global\let\@thanks\@empty
430 \global\let\@author\@empty
431 \global\let\@date\@empty
432 \global\let\@title\@empty
433 \global\let\@pubyear\@empty
434 \global\let\address\relax
435 \global\let\history\relax
436 \global\let\editor\relax
437 \global\let\title\relax
438 \global\let\author\relax
439 \global\let\date\relax
440 \global\let\pubyear\relax
441 \global\let\@copyrightline\@empty
442 \global\let\and\relax
443 \@afterindentfalse\@afterheading
444 }
445
446 \newlength{\aboveskipchk}%for checking oddpage or evenpage top skip
447 \setlength{\aboveskipchk}{\z@}%
448
449 \def\@maketitle{%
450 \let\footnote\thanks
451 \clearemptydoublepage
452 \checkoddpage\ifcpoddpage\setlength{\aboveskipchk}{-3pc}\else\setlength{\aboveskipchk}{-5pc}\fi%for checking oddpage or evenpage top skip%%
453 \vspace*{\aboveskipchk}%
454 \vspace{\dropfromtop}%
455 \hbox to \textwidth{%
456 {\helvetica\itshape\bfseries\fontsize{19}{12}\selectfont {\color{gray}MANUSCRIPT}
457 \hfil
458 \if@appnotes APPLICATIONS NOTE\hfil\fi
459 }%
460 \enskip \parbox[b]{11.3pc}{%
461 \helvetica
462 \flushright\fontsize{8}{10}\fontshape{it}\selectfont
463 \hfill
464 }}
465 \rule{\textwidth}{1\p@}\par%
466 \helvetica
467 \hbox to \textwidth{%
468 \parbox[t]{36.5pc}{%
469 \vspace*{1sp}
470 {\helveticabold\fontsize{16}{21}\selectfont\raggedright \@title \par}%
471 \vspace{4.5\p@}
472 {\helvetica\fontsize{13}{15}\selectfont\raggedright \@author \par}%
473 \vspace{4\p@}
474 {\helvetica\fontsize{10}{12}\selectfont\raggedright \@address \par}%
475 \vspace{4\p@}
476 %{\helvetica\fontsize{8}{10}\selectfont\raggedright \@history \par}
477 %\vspace{24\p@}
478 %{\helvetica\fontsize{10}{12}\selectfont\raggedright \@editor \par}
479 %\vspace{20\p@}
480 }%
481 }
482 \vspace{14.5\p@}%
483 \rule{\textwidth}{1\p@}%
484 \vspace{12\p@ plus 6\p@ minus 6\p@}%
485 \vspace{\extraspace}
486 }
487 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
488
489 %%%%%%%%%%%%%%%%%%%%%%%%%%%% Abstract %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
490 \newcommand{\absection}[1]{%
491 \par\noindent{\bfseries #1}\space\ignorespaces}
492
493 \newenvironment{abstract}{%
494 \begingroup
495 \let\section\absection
496 \fontfamily{\sfdefault}\fontsize{8}{11}\sffamily\selectfont
497 {\fontseries{b}\selectfont ABSTRACT}\par}
498 {\endgroup\bigskip\@afterheading\@afterindentfalse\vskip 12pt plus 3pt minus 1pt}
499
500 % Section macros
501
502 % Lowest level heading that takes a number by default
503 \setcounter{secnumdepth}{3}
504
505 \renewcommand{\@seccntformat}[1]{\csname the#1\endcsname\quad}
506
507 \def\section{%
508 \@startsection{section}{1}{\z@}
509 {-22\p@ plus -3\p@}{3\p@}
510 {\reset@font\raggedright\helveticabold\fontsize{10}{12}\selectfont\MakeUppercase}}
511
512 \def\subsection{%
513 \@startsection{subsection}{2}{\z@}
514 {-11\p@ plus -2\p@}{3\p@}
515 {\reset@font\raggedright\mathversion{bold}\fontseries{b}\fontsize{10}{12}\selectfont}}
516
517 \def\subsubsection{%
518 \@startsection{subsubsection}{3}{\z@}
519 {-11\p@ plus -1\p@}{-1em}
520 {\reset@font\normalfont\normalsize\itshape}}
521
522 \def\textcolon{\text{\rm :}}
523
524 \def\paragraph{%
525 \@startsection{paragraph}{4}{\z@}
526 {-6\p@}
527 {-.4em}
528 {\reset@font\itshape}}
529
530 % ********************
531 % Figures and tables *
532 % ********************
533
534 % Table and array parameters
535 \setlength\arraycolsep{.5em}
536 \setlength\tabcolsep{.5em}
537 \setlength\arrayrulewidth{.5pt}
538 \setlength\doublerulesep{2.5pt}
539 \setlength\extrarowheight{\z@}
540 \renewcommand\arraystretch{1}
541
542 \newlength{\abovecaptionskip}
543 \newlength{\belowcaptionskip}
544 \setlength{\abovecaptionskip}{13pt}
545 \setlength{\belowcaptionskip}{10.5pt}
546
547 \long\def\@makecaption#1#2{\vspace{\abovecaptionskip}%
548 \begingroup
549 \footnotesize
550 \textbf{#1.}\enskip{#2}\par
551 \endgroup}
552
553 \long\def\@tablecaption#1#2{%
554 \begingroup
555 \footnotesize
556 \textbf{#1.}\enskip{#2\strut\par}
557 \endgroup\vspace{\belowcaptionskip}}
558
559 % Table rules
560 \def\toprule{\noalign{\ifnum0=`}\fi\hrule \@height 0.5pt \hrule \@height 6pt \@width 0pt \futurelet
561 \@tempa\@xhline}
562 \def\midrule{\noalign{\ifnum0=`}\fi \hrule \@height 6.75pt \@width 0pt \hrule \@height 0.5pt
563 \hrule \@height 6pt \@width 0pt \futurelet \@tempa\@xhline}
564 \def\botrule{\noalign{\ifnum0=`}\fi \hrule \@height 5.75pt \@width 0pt \hrule \@height 0.5pt \futurelet
565 \@tempa\@xhline}
566 \def\hrulefill{\leavevmode\leaders\hrule height .5pt\hfill\kern\z@}
567
568 \def\thefigure{\@arabic\c@figure}
569 \def\fps@figure{tbp}
570 \def\ftype@figure{1}
571 \def\ext@figure{lof}
572 \def\fnum@figure{\figurename~\thefigure}
573 \def\figure{\@float{figure}}
574 \let\endfigure\end@float
575 \@namedef{figure*}{\@dblfloat{figure}}
576 \@namedef{endfigure*}{\end@dblfloat}
577 \def\thetable{\@arabic\c@table}
578 \def\fps@table{tbp}
579 \def\ftype@table{2}
580 \def\ext@table{lot}
581 \def\fnum@table{Table~\thetable}
582 \def\table{\let\@makecaption\@tablecaption\let\source\tablesource\@float{table}}
583 \def\endtable{\end@float}
584 \@namedef{table*}{\let\@makecaption\@tablecaption\@dblfloat{table}}
585 \@namedef{endtable*}{\end@dblfloat}
586
587 \newif\if@rotate \@rotatefalse
588 \newif\if@rotatecenter \@rotatecenterfalse
589 \def\rotatecenter{\global\@rotatecentertrue}
590 \def\rotateendcenter{\global\@rotatecenterfalse}
591 \def\rotate{\global\@rotatetrue}
592 \def\endrotate{\global\@rotatefalse}
593 \newdimen\rotdimen
594 \def\rotstart#1{\special{ps: gsave currentpoint currentpoint translate
595 #1 neg exch neg exch translate}}
596 \def\rotfinish{\special{ps: currentpoint grestore moveto}}
597 \def\rotl#1{\rotdimen=\ht#1\advance\rotdimen by \dp#1
598 \hbox to \rotdimen{\vbox to\wd#1{\vskip \wd#1
599 \rotstart{270 rotate}\box #1\vss}\hss}\rotfinish}
600 \def\rotr#1{\rotdimen=\ht #1\advance\rotdimen by \dp#1
601 \hbox to \rotdimen{\vbox to \wd#1{\vskip \wd#1
602 \rotstart{90 rotate}\box #1\vss}\hss}\rotfinish}
603
604 \newdimen\tempdime
605 \newbox\temptbox
606
607 % From ifmtarg.sty
608 % Copyright Peter Wilson and Donald Arseneau, 2000
609 \begingroup
610 \catcode`\Q=3
611 \long\gdef\@ifmtarg#1{\@xifmtarg#1QQ\@secondoftwo\@firstoftwo\@nil}
612 \long\gdef\@xifmtarg#1#2Q#3#4#5\@nil{#4}
613 \long\gdef\@ifnotmtarg#1{\@xifmtarg#1QQ\@firstofone\@gobble\@nil}
614 \endgroup
615
616 \def\tablesize{\@setfontsize\tablesize{8\p@}{10\p@}}
617
618 \newenvironment{processtable}[3]{\setbox\temptbox=\hbox{{\tablesize #2}}%
619 \tempdime\wd\temptbox\@processtable{#1}{#2}{#3}{\tempdime}}
620 {\relax}
621
622 \newcommand{\@processtable}[4]{%
623 \if@rotate
624 \setbox4=\vbox to \hsize{\vss\hbox to \textheight{%
625 \begin{minipage}{#4}%
626 \@ifmtarg{#1}{}{\caption{#1}}{\tablesize #2}%
627 \vskip7\p@\noindent
628 \parbox{#4}{\fontsize{7}{9}\selectfont #3\par}%
629 \end{minipage}}\vss}%
630 \rotr{4}
631 \else
632 \hbox to \hsize{\hss\begin{minipage}[t]{#4}%
633 \vskip2.9pt
634 \@ifmtarg{#1}{}{\caption{#1}}{\tablesize #2}%
635 \vskip6\p@\noindent
636 \parbox{#4}{\fontsize{7}{9}\selectfont #3\par}%
637 \end{minipage}\hss}\fi}%
638
639 \newcolumntype{P}[1]{>{\raggedright\let\\\@arraycr\hangindent1em}p{#1}}
640
641 % ******************************
642 % List numbering and lettering *
643 % ******************************
644 \def\labelenumi{{\rm\arabic{enumi}.}}
645 \def\theenumi{\arabic{enumi}}
646 \def\labelenumii{{\rm\alph{enumii}.}}
647 \def\theenumii{\alph{enumii}}
648 \def\p@enumii{\theenumi}
649 \def\labelenumiii{{\rm(\arabic{enumiii})}}
650 \def\theenumiii{\roman{enumiii}}
651 \def\p@enumiii{\theenumi(\theenumii)}
652 \def\labelenumiv{{\rm(\arabic{enumiv})}}
653 \def\theenumiv{\Alph{enumiv}}
654 \def\p@enumiv{\p@enumiii\theenumiii}
655 \def\labelitemi{{\small$\bullet$}}
656 \def\labelitemii{{\small$\bullet$}}
657 \def\labelitemiii{{\small$\bullet$}}
658 \def\labelitemiv{{\small$\bullet$}}
659
660 \def\@listI{\leftmargin\leftmargini \topsep\medskipamount}
661 \let\@listi\@listI
662 \@listi
663 \def\@listii{\topsep\z@\leftmargin\leftmarginii}
664 \def\@listiii{\leftmargin\leftmarginiii \topsep\z@}
665 \def\@listiv{\leftmargin\leftmarginiv \topsep\z@}
666 \def\@listv{\leftmargin\leftmarginv \topsep\z@}
667 \def\@listvi{\leftmargin\leftmarginvi \topsep\z@}
668
669 \setlength{\leftmargini}{3mm}
670 \setlength{\leftmarginii}{\z@}
671 \setlength{\leftmarginiii}{\z@}
672 \setlength{\leftmarginiv}{\z@}
673
674 % Changes to the list parameters for enumerate
675 \def\enumargs{%
676 \partopsep \z@
677 \itemsep 3\p@
678 \parsep \z@
679 \labelsep 0.5em
680 \listparindent \parindent
681 \itemindent \z@
682 \topsep 11\p@
683 }
684
685 \def\enumerate{%
686 \@ifnextchar[{\@numerate}{\@numerate[0]}}
687
688 \def\@numerate[#1]{%
689 \ifnum \@enumdepth >3 \@toodeep\else
690 \advance\@enumdepth \@ne
691 \edef\@enumctr{enum\romannumeral\the\@enumdepth}
692 \list{\csname label\@enumctr\endcsname}{%
693 \enumargs
694 \setlength{\leftmargin}{\csname leftmargin\romannumeral\the\@enumdepth\endcsname}
695 \usecounter{\@enumctr}
696 \settowidth\labelwidth{#1}
697 \addtolength{\leftmargin}{\labelwidth}
698 \addtolength{\leftmargin}{\labelsep}
699 \def\makelabel##1{\hss \llap{##1}}}%
700 \fi
701 }
702 \let\endenumerate\endlist
703
704 % Changes to the list parameters for itemize
705 \def\itemargs{%
706 \partopsep \z@
707 \itemsep 3\p@
708 \parsep \z@
709 \labelsep 0.5em
710 \rightmargin \z@
711 \listparindent \parindent
712 \itemindent \z@
713 \topsep11\p@
714 }
715
716 \def\itemize{%
717 \@ifnextchar[{\@itemize}{\@itemize[$\bullet$]}}
718
719 \def\@itemize[#1]{%
720 \ifnum \@itemdepth >3 \@toodeep\else
721 \advance\@itemdepth \@ne
722 \edef\@itemctr{item\romannumeral\the\@itemdepth}
723 \list{\csname label\@itemctr\endcsname}{%
724 \itemargs
725 \setlength{\leftmargin}{\csname leftmargin\romannumeral\the\@itemdepth\endcsname}
726 \settowidth\labelwidth{#1}
727 \addtolength{\leftmargin}{\labelwidth}
728 \addtolength{\leftmargin}{\labelsep}
729 \def\makelabel##1{\hss \llap{##1}}}%
730 \fi
731 }
732 \let\enditemize\endlist
733
734 \newenvironment{unlist}{%
735 \begin{list}{}%
736 {\setlength{\labelwidth}{\z@}%
737 \setlength{\labelsep}{\z@}%
738 \setlength{\topsep}{\medskipamount}%
739 \setlength{\itemsep}{3\p@}%
740 \setlength{\leftmargin}{2em}%
741 \setlength{\itemindent}{-2em}}}
742 {\end{list}}
743
744
745 % ***********************
746 % Quotes and Quotations *
747 % ***********************
748 \def\quotation{\par\begin{list}{}{
749 \setlength{\topsep}{\medskipamount}
750 \setlength{\leftmargin}{2em}%
751 \setlength{\rightmargin}{\z@}%
752 \setlength\labelwidth{0pt}%
753 \setlength\labelsep{0pt}%
754 \listparindent\parindent}%
755 \item[]}
756 \def\endquotation{\end{list}}
757 \let\quote\quotation
758 \let\endquote\endquotation
759
760 \skip\@mpfootins = \skip\footins
761 \fboxsep=6\p@
762 \fboxrule=1\p@
763
764 % *******************
765 % Table of contents *
766 % *******************
767 \newcommand\@pnumwidth{4em}
768 \newcommand\@tocrmarg{2.55em plus 1fil}
769 \newcommand\@dotsep{1000}
770 \setcounter{tocdepth}{4}
771
772 \def\numberline#1{\hbox to \@tempdima{{#1}}}
773
774 \def\@authortocline#1#2#3#4#5{%
775 \vskip 1.5\p@
776 \ifnum #1>\c@tocdepth \else
777 {\leftskip #2\relax \rightskip \@tocrmarg \parfillskip -\rightskip
778 \parindent #2\relax\@afterindenttrue
779 \interlinepenalty\@M
780 \leavevmode
781 \@tempdima #3\relax
782 \advance\leftskip \@tempdima \null\nobreak\hskip -\leftskip
783 {\itshape #4}\nobreak
784 \leaders\hbox{$\m@th
785 \mkern \@dotsep mu\hbox{.}\mkern \@dotsep
786 mu$}\hfill
787 \nobreak
788 \hb@xt@\@pnumwidth{\hfil}%
789 \par}%
790 \fi}
791
792 \newcommand*\l@author{\@authortocline{2}{0pt}{30pt}}
793 \newcommand*\l@section{\@dottedtocline{3}{11pt}{20pt}}
794 \newcommand*\l@subsection{\@dottedtocline{4}{31pt}{29pt}}
795 \newcommand*\l@subsubsection[2]{}
796
797
798
799 % ***********
800 % Footnotes *
801 % ***********
802
803 \def\footnoterule{\noindent\rule{\columnwidth}{0.5pt}}
804 \def\@makefnmark{\@textsuperscript{\normalfont\@thefnmark}}%
805 \newcommand\@makefntext[1]{\noindent{\@makefnmark}\enskip#1}
806
807 % ***********
808 % References *
809 % ***********
810
811 \providecommand{\newblock}{}
812 \newenvironment{thebibliography}{%
813 \section{\bibname}%
814 \begingroup
815 \small
816 \begin{list}{}{%
817 \setlength{\topsep}{\z@}%
818 \setlength{\labelsep}{\z@}%
819 \settowidth{\labelwidth}{\z@}%
820 \setlength{\leftmargin}{4mm}%
821 \setlength{\itemindent}{-4mm}}\small}
822 {\end{list}\endgroup}
823
824 \RequirePackage{natbib}
825
826 % **********
827 % Appendix *
828 % **********
829 \newif\ifappend % Are we in the Appendix?
830 \def\appendix{\par
831 \setcounter{section}{0}
832 \setcounter{subsection}{0}
833 \appendtrue
834 }
835
836 %Math parameters
837
838 \setlength{\jot}{5\p@}
839 \mathchardef\@m=1500 % adapted value
840
841 \def\frenchspacing{\sfcode`\.\@m \sfcode`\?\@m \sfcode`\!\@m
842 \sfcode`\:\@m \sfcode`\;\@m \sfcode`\,\@m}
843
844 % Theorems
845 \def\th@plain{%
846 %% \let\thm@indent\noindent % no indent
847 \thm@headfont{\quad\scshape}% heading font is bold
848 \thm@notefont{\upshape\mdseries}% same as heading font
849 \thm@headpunct{.}% no period after heading
850 \thm@headsep 5\p@ plus\p@ minus\p@\relax
851 %% \let\thm@swap\@gobble
852 %% \thm@preskip\topsep
853 %% \thm@postskip\theorempreskipamount
854 \itshape % body font
855 }
856
857 \vbadness=9999
858 \tolerance=9999
859 \doublehyphendemerits=10000
860 \doublehyphendemerits 640000 % corresponds to badness 800
861 \finalhyphendemerits 1000000 % corresponds to badness 1000
862
863 \flushbottom
864 \frenchspacing
865 \ps@headings
866 \twocolumn
867
868 % Screen PDF compatability
869 \newcommand{\medline}[1]{%
870 \unskip\unskip\ignorespaces}
871
872
873 %%%%for smaller size text
874 \newenvironment{methods}{%
875 \begingroup
876 \def\section{%
877 \@startsection{section}{1}{\z@}
878 {-24\p@ plus -3\p@}{4\p@}
879 {\reset@font\raggedright\helveticabold\fontsize{10}{12}\selectfont\MakeUppercase}}
880 \def\subsection{%
881 \@startsection{subsection}{2}{\z@}
882 {-5\p@ plus -2\p@}{4\p@}
883 {\reset@font\raggedright\mathversion{bold}\fontseries{b}\fontsize{10}{12}\selectfont}}
884 \def\subsubsection{%
885 \@startsection{subsubsection}{3}{\z@}
886 {-6\p@ plus -1\p@}{-1em}
887 {\reset@font\normalfont\normalsize\itshape}}
888 \footnotesize
889 \par}
890 {\par\endgroup\bigskip\@afterheading\@afterindentfalse}
891
892
893
894 \graphicspath{{g:/artwork/oup/bioinfo/}}
895
896 \language=2
897
898 \hyphenation{Figure Table Figures Tables}
899
900 \newcommand{\href}[2]{#2}
901
902 \renewenvironment{proof}[1][\proofname]{\par
903 \normalfont \topsep6\p@\@plus6\p@\relax
904 \labelsep 0.5em
905 \trivlist
906 \item[\hskip\labelsep\hskip1em\textsc{#1}.]\ignorespaces
907 }{\endtrivlist\@endpefalse}
908
909 %%Different Bonds
910
911 \def\sbond{\ensuremath{\raise.25ex\hbox{${-}\!\!\!\!{-}$}}\kern -.9pt}
912 \def\dbond{\ensuremath{\raise.25ex\hbox{=$\!$=}}}
913 \def\tbond{\ensuremath{\raise.20ex\hbox{${\equiv}\!\!\!{\equiv}$}}}
914
915 % Author queries
916 %\fboxsep=4\p@
917 %\fboxrule=0.5\p@
918 \newcommand{\query}[2][0pt]{}%
919 % \marginpar{\vspace*{#1}%
920 % {\parbox{\marginparwidth}{%
921 % \raggedright\fontsize{6}{8}\selectfont
922 % #2}}}}
923
924 \renewcommand{\dag}{{\mathversion{normal}$^{\dagger}$}}
925
926 \endinput
Binary diff not shown
0 <?xml version="1.0" encoding="UTF-8"?>
1 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
2 <plist version="1.0">
3 <dict>
4 <key>ActiveLayerIndex</key>
5 <integer>0</integer>
6 <key>ApplicationVersion</key>
7 <array>
8 <string>com.omnigroup.OmniGraffle</string>
9 <string>139.18.0.187838</string>
10 </array>
11 <key>AutoAdjust</key>
12 <true/>
13 <key>BackgroundGraphic</key>
14 <dict>
15 <key>Bounds</key>
16 <string>{{0, 0}, {576, 733}}</string>
17 <key>Class</key>
18 <string>SolidGraphic</string>
19 <key>ID</key>
20 <integer>2</integer>
21 <key>Style</key>
22 <dict>
23 <key>shadow</key>
24 <dict>
25 <key>Draws</key>
26 <string>NO</string>
27 </dict>
28 <key>stroke</key>
29 <dict>
30 <key>Draws</key>
31 <string>NO</string>
32 </dict>
33 </dict>
34 </dict>
35 <key>BaseZoom</key>
36 <integer>0</integer>
37 <key>CanvasOrigin</key>
38 <string>{0, 0}</string>
39 <key>ColumnAlign</key>
40 <integer>1</integer>
41 <key>ColumnSpacing</key>
42 <real>36</real>
43 <key>CreationDate</key>
44 <string>2015-11-16 14:46:34 +0000</string>
45 <key>Creator</key>
46 <string>Heng Li</string>
47 <key>DisplayScale</key>
48 <string>1 0/72 in = 1.0000 in</string>
49 <key>GraphDocumentVersion</key>
50 <integer>8</integer>
51 <key>GraphicsList</key>
52 <array>
53 <dict>
54 <key>Bounds</key>
55 <string>{{161, 173.5}, {9, 14}}</string>
56 <key>Class</key>
57 <string>ShapedGraphic</string>
58 <key>FitText</key>
59 <string>YES</string>
60 <key>Flow</key>
61 <string>Resize</string>
62 <key>ID</key>
63 <integer>23</integer>
64 <key>Shape</key>
65 <string>Rectangle</string>
66 <key>Style</key>
67 <dict>
68 <key>fill</key>
69 <dict>
70 <key>Draws</key>
71 <string>NO</string>
72 </dict>
73 <key>shadow</key>
74 <dict>
75 <key>Draws</key>
76 <string>NO</string>
77 </dict>
78 <key>stroke</key>
79 <dict>
80 <key>Draws</key>
81 <string>NO</string>
82 </dict>
83 </dict>
84 <key>Text</key>
85 <dict>
86 <key>Pad</key>
87 <integer>0</integer>
88 <key>Text</key>
89 <string>{\rtf1\ansi\ansicpg1252\cocoartf1265\cocoasubrtf210
90 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
91 {\colortbl;\red255\green255\blue255;}
92 \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc
93
94 \f0\i\fs24 \cf0 w}</string>
95 <key>VerticalPad</key>
96 <integer>0</integer>
97 </dict>
98 <key>Wrap</key>
99 <string>NO</string>
100 </dict>
101 <dict>
102 <key>Bounds</key>
103 <string>{{100, 137.5}, {6, 14}}</string>
104 <key>Class</key>
105 <string>ShapedGraphic</string>
106 <key>FitText</key>
107 <string>YES</string>
108 <key>Flow</key>
109 <string>Resize</string>
110 <key>ID</key>
111 <integer>22</integer>
112 <key>Shape</key>
113 <string>Rectangle</string>
114 <key>Style</key>
115 <dict>
116 <key>fill</key>
117 <dict>
118 <key>Draws</key>
119 <string>NO</string>
120 </dict>
121 <key>shadow</key>
122 <dict>
123 <key>Draws</key>
124 <string>NO</string>
125 </dict>
126 <key>stroke</key>
127 <dict>
128 <key>Draws</key>
129 <string>NO</string>
130 </dict>
131 </dict>
132 <key>Text</key>
133 <dict>
134 <key>Pad</key>
135 <integer>0</integer>
136 <key>Text</key>
137 <string>{\rtf1\ansi\ansicpg1252\cocoartf1265\cocoasubrtf210
138 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
139 {\colortbl;\red255\green255\blue255;}
140 \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc
141
142 \f0\i\fs24 \cf0 v}</string>
143 <key>VerticalPad</key>
144 <integer>0</integer>
145 </dict>
146 <key>Wrap</key>
147 <string>NO</string>
148 </dict>
149 <dict>
150 <key>Bounds</key>
151 <string>{{464, 185}, {18, 14}}</string>
152 <key>Class</key>
153 <string>ShapedGraphic</string>
154 <key>FitText</key>
155 <string>YES</string>
156 <key>Flow</key>
157 <string>Resize</string>
158 <key>FontInfo</key>
159 <dict>
160 <key>Font</key>
161 <string>Helvetica</string>
162 <key>Size</key>
163 <real>12</real>
164 </dict>
165 <key>ID</key>
166 <integer>21</integer>
167 <key>Shape</key>
168 <string>Rectangle</string>
169 <key>Style</key>
170 <dict>
171 <key>fill</key>
172 <dict>
173 <key>Draws</key>
174 <string>NO</string>
175 </dict>
176 <key>shadow</key>
177 <dict>
178 <key>Draws</key>
179 <string>NO</string>
180 </dict>
181 <key>stroke</key>
182 <dict>
183 <key>Draws</key>
184 <string>NO</string>
185 </dict>
186 </dict>
187 <key>Text</key>
188 <dict>
189 <key>Pad</key>
190 <integer>0</integer>
191 <key>Text</key>
192 <string>{\rtf1\ansi\ansicpg1252\cocoartf1265\cocoasubrtf210
193 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
194 {\colortbl;\red255\green255\blue255;}
195 \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc
196
197 \f0\i\b\fs24 \cf0 l
198 \i0 [2]}</string>
199 <key>VerticalPad</key>
200 <integer>0</integer>
201 </dict>
202 <key>Wrap</key>
203 <string>NO</string>
204 </dict>
205 <dict>
206 <key>Bounds</key>
207 <string>{{399, 125}, {18, 14}}</string>
208 <key>Class</key>
209 <string>ShapedGraphic</string>
210 <key>FitText</key>
211 <string>YES</string>
212 <key>Flow</key>
213 <string>Resize</string>
214 <key>FontInfo</key>
215 <dict>
216 <key>Font</key>
217 <string>Helvetica</string>
218 <key>Size</key>
219 <real>12</real>
220 </dict>
221 <key>ID</key>
222 <integer>20</integer>
223 <key>Shape</key>
224 <string>Rectangle</string>
225 <key>Style</key>
226 <dict>
227 <key>fill</key>
228 <dict>
229 <key>Draws</key>
230 <string>NO</string>
231 </dict>
232 <key>shadow</key>
233 <dict>
234 <key>Draws</key>
235 <string>NO</string>
236 </dict>
237 <key>stroke</key>
238 <dict>
239 <key>Draws</key>
240 <string>NO</string>
241 </dict>
242 </dict>
243 <key>Text</key>
244 <dict>
245 <key>Pad</key>
246 <integer>0</integer>
247 <key>Text</key>
248 <string>{\rtf1\ansi\ansicpg1252\cocoartf1265\cocoasubrtf210
249 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
250 {\colortbl;\red255\green255\blue255;}
251 \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc
252
253 \f0\i\b\fs24 \cf0 l
254 \i0 [1]}</string>
255 <key>VerticalPad</key>
256 <integer>0</integer>
257 </dict>
258 <key>Wrap</key>
259 <string>NO</string>
260 </dict>
261 <dict>
262 <key>Bounds</key>
263 <string>{{331, 185}, {22, 14}}</string>
264 <key>Class</key>
265 <string>ShapedGraphic</string>
266 <key>FitText</key>
267 <string>YES</string>
268 <key>Flow</key>
269 <string>Resize</string>
270 <key>FontInfo</key>
271 <dict>
272 <key>Font</key>
273 <string>Helvetica</string>
274 <key>Size</key>
275 <real>12</real>
276 </dict>
277 <key>ID</key>
278 <integer>19</integer>
279 <key>Shape</key>
280 <string>Rectangle</string>
281 <key>Style</key>
282 <dict>
283 <key>fill</key>
284 <dict>
285 <key>Draws</key>
286 <string>NO</string>
287 </dict>
288 <key>shadow</key>
289 <dict>
290 <key>Draws</key>
291 <string>NO</string>
292 </dict>
293 <key>stroke</key>
294 <dict>
295 <key>Draws</key>
296 <string>NO</string>
297 </dict>
298 </dict>
299 <key>Text</key>
300 <dict>
301 <key>Pad</key>
302 <integer>0</integer>
303 <key>Text</key>
304 <string>{\rtf1\ansi\ansicpg1252\cocoartf1265\cocoasubrtf210
305 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
306 {\colortbl;\red255\green255\blue255;}
307 \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc
308
309 \f0\i\b\fs24 \cf0 e
310 \i0 [2]}</string>
311 <key>VerticalPad</key>
312 <integer>0</integer>
313 </dict>
314 <key>Wrap</key>
315 <string>NO</string>
316 </dict>
317 <dict>
318 <key>Bounds</key>
319 <string>{{209, 185}, {22, 14}}</string>
320 <key>Class</key>
321 <string>ShapedGraphic</string>
322 <key>FitText</key>
323 <string>YES</string>
324 <key>Flow</key>
325 <string>Resize</string>
326 <key>FontInfo</key>
327 <dict>
328 <key>Font</key>
329 <string>Helvetica</string>
330 <key>Size</key>
331 <real>12</real>
332 </dict>
333 <key>ID</key>
334 <integer>18</integer>
335 <key>Shape</key>
336 <string>Rectangle</string>
337 <key>Style</key>
338 <dict>
339 <key>fill</key>
340 <dict>
341 <key>Draws</key>
342 <string>NO</string>
343 </dict>
344 <key>shadow</key>
345 <dict>
346 <key>Draws</key>
347 <string>NO</string>
348 </dict>
349 <key>stroke</key>
350 <dict>
351 <key>Draws</key>
352 <string>NO</string>
353 </dict>
354 </dict>
355 <key>Text</key>
356 <dict>
357 <key>Pad</key>
358 <integer>0</integer>
359 <key>Text</key>
360 <string>{\rtf1\ansi\ansicpg1252\cocoartf1265\cocoasubrtf210
361 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
362 {\colortbl;\red255\green255\blue255;}
363 \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc
364
365 \f0\i\b\fs24 \cf0 b
366 \i0 [2]}</string>
367 <key>VerticalPad</key>
368 <integer>0</integer>
369 </dict>
370 <key>Wrap</key>
371 <string>NO</string>
372 </dict>
373 <dict>
374 <key>Bounds</key>
375 <string>{{330.99999359250069, 124.99999651312828}, {22, 14}}</string>
376 <key>Class</key>
377 <string>ShapedGraphic</string>
378 <key>FitText</key>
379 <string>YES</string>
380 <key>Flow</key>
381 <string>Resize</string>
382 <key>FontInfo</key>
383 <dict>
384 <key>Font</key>
385 <string>Helvetica</string>
386 <key>Size</key>
387 <real>12</real>
388 </dict>
389 <key>ID</key>
390 <integer>17</integer>
391 <key>Shape</key>
392 <string>Rectangle</string>
393 <key>Style</key>
394 <dict>
395 <key>fill</key>
396 <dict>
397 <key>Draws</key>
398 <string>NO</string>
399 </dict>
400 <key>shadow</key>
401 <dict>
402 <key>Draws</key>
403 <string>NO</string>
404 </dict>
405 <key>stroke</key>
406 <dict>
407 <key>Draws</key>
408 <string>NO</string>
409 </dict>
410 </dict>
411 <key>Text</key>
412 <dict>
413 <key>Pad</key>
414 <integer>0</integer>
415 <key>Text</key>
416 <string>{\rtf1\ansi\ansicpg1252\cocoartf1265\cocoasubrtf210
417 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
418 {\colortbl;\red255\green255\blue255;}
419 \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc
420
421 \f0\i\b\fs24 \cf0 e
422 \i0 [1]}</string>
423 <key>VerticalPad</key>
424 <integer>0</integer>
425 </dict>
426 <key>Wrap</key>
427 <string>NO</string>
428 </dict>
429 <dict>
430 <key>Bounds</key>
431 <string>{{209, 125}, {22, 14}}</string>
432 <key>Class</key>
433 <string>ShapedGraphic</string>
434 <key>FitText</key>
435 <string>YES</string>
436 <key>Flow</key>
437 <string>Resize</string>
438 <key>FontInfo</key>
439 <dict>
440 <key>Font</key>
441 <string>Helvetica</string>
442 <key>Size</key>
443 <real>12</real>
444 </dict>
445 <key>ID</key>
446 <integer>16</integer>
447 <key>Shape</key>
448 <string>Rectangle</string>
449 <key>Style</key>
450 <dict>
451 <key>fill</key>
452 <dict>
453 <key>Draws</key>
454 <string>NO</string>
455 </dict>
456 <key>shadow</key>
457 <dict>
458 <key>Draws</key>
459 <string>NO</string>
460 </dict>
461 <key>stroke</key>
462 <dict>
463 <key>Draws</key>
464 <string>NO</string>
465 </dict>
466 </dict>
467 <key>Text</key>
468 <dict>
469 <key>Pad</key>
470 <integer>0</integer>
471 <key>Text</key>
472 <string>{\rtf1\ansi\ansicpg1252\cocoartf1265\cocoasubrtf210
473 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
474 {\colortbl;\red255\green255\blue255;}
475 \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc
476
477 \f0\i\b\fs24 \cf0 b
478 \i0 [1]}</string>
479 <key>VerticalPad</key>
480 <integer>0</integer>
481 </dict>
482 <key>Wrap</key>
483 <string>NO</string>
484 </dict>
485 <dict>
486 <key>Class</key>
487 <string>LineGraphic</string>
488 <key>ID</key>
489 <integer>15</integer>
490 <key>Points</key>
491 <array>
492 <string>{317, 117}</string>
493 <string>{378, 162}</string>
494 </array>
495 <key>Style</key>
496 <dict>
497 <key>stroke</key>
498 <dict>
499 <key>HeadArrow</key>
500 <string>FilledArrow</string>
501 <key>Legacy</key>
502 <true/>
503 <key>LineType</key>
504 <integer>1</integer>
505 <key>Pattern</key>
506 <integer>1</integer>
507 <key>TailArrow</key>
508 <string>0</string>
509 </dict>
510 </dict>
511 </dict>
512 <dict>
513 <key>Class</key>
514 <string>LineGraphic</string>
515 <key>ID</key>
516 <integer>13</integer>
517 <key>Points</key>
518 <array>
519 <string>{223, 117}</string>
520 <string>{189, 162}</string>
521 </array>
522 <key>Style</key>
523 <dict>
524 <key>stroke</key>
525 <dict>
526 <key>HeadArrow</key>
527 <string>FilledArrow</string>
528 <key>Legacy</key>
529 <true/>
530 <key>LineType</key>
531 <integer>1</integer>
532 <key>Pattern</key>
533 <integer>1</integer>
534 <key>TailArrow</key>
535 <string>0</string>
536 </dict>
537 </dict>
538 </dict>
539 <dict>
540 <key>Bounds</key>
541 <string>{{225, 104}, {90, 14}}</string>
542 <key>Class</key>
543 <string>ShapedGraphic</string>
544 <key>FitText</key>
545 <string>YES</string>
546 <key>Flow</key>
547 <string>Resize</string>
548 <key>ID</key>
549 <integer>11</integer>
550 <key>Shape</key>
551 <string>Rectangle</string>
552 <key>Style</key>
553 <dict>
554 <key>fill</key>
555 <dict>
556 <key>Draws</key>
557 <string>NO</string>
558 </dict>
559 <key>shadow</key>
560 <dict>
561 <key>Draws</key>
562 <string>NO</string>
563 </dict>
564 <key>stroke</key>
565 <dict>
566 <key>Draws</key>
567 <string>NO</string>
568 </dict>
569 </dict>
570 <key>Text</key>
571 <dict>
572 <key>Pad</key>
573 <integer>0</integer>
574 <key>Text</key>
575 <string>{\rtf1\ansi\ansicpg1252\cocoartf1265\cocoasubrtf210
576 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
577 {\colortbl;\red255\green255\blue255;}
578 \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc
579
580 \f0\fs24 \cf0 Overhang region}</string>
581 <key>VerticalPad</key>
582 <integer>0</integer>
583 </dict>
584 <key>Wrap</key>
585 <string>NO</string>
586 </dict>
587 <dict>
588 <key>Class</key>
589 <string>LineGraphic</string>
590 <key>ID</key>
591 <integer>6</integer>
592 <key>Points</key>
593 <array>
594 <string>{180, 180}</string>
595 <string>{468, 180}</string>
596 </array>
597 <key>Style</key>
598 <dict>
599 <key>stroke</key>
600 <dict>
601 <key>HeadArrow</key>
602 <string>0</string>
603 <key>Legacy</key>
604 <true/>
605 <key>LineType</key>
606 <integer>1</integer>
607 <key>TailArrow</key>
608 <string>0</string>
609 <key>Width</key>
610 <real>3</real>
611 </dict>
612 </dict>
613 </dict>
614 <dict>
615 <key>Class</key>
616 <string>LineGraphic</string>
617 <key>ID</key>
618 <integer>5</integer>
619 <key>Points</key>
620 <array>
621 <string>{117, 144}</string>
622 <string>{405, 144}</string>
623 </array>
624 <key>Style</key>
625 <dict>
626 <key>stroke</key>
627 <dict>
628 <key>HeadArrow</key>
629 <string>0</string>
630 <key>Legacy</key>
631 <true/>
632 <key>LineType</key>
633 <integer>1</integer>
634 <key>TailArrow</key>
635 <string>0</string>
636 <key>Width</key>
637 <real>3</real>
638 </dict>
639 </dict>
640 </dict>
641 <dict>
642 <key>Bounds</key>
643 <string>{{216, 144}, {126, 36}}</string>
644 <key>Class</key>
645 <string>ShapedGraphic</string>
646 <key>ID</key>
647 <integer>4</integer>
648 <key>Shape</key>
649 <string>Rectangle</string>
650 <key>Style</key>
651 <dict>
652 <key>fill</key>
653 <dict>
654 <key>Color</key>
655 <dict>
656 <key>b</key>
657 <string>0.6</string>
658 <key>g</key>
659 <string>0.6</string>
660 <key>r</key>
661 <string>0.6</string>
662 </dict>
663 </dict>
664 <key>shadow</key>
665 <dict>
666 <key>Draws</key>
667 <string>NO</string>
668 </dict>
669 </dict>
670 <key>Text</key>
671 <dict>
672 <key>Text</key>
673 <string>{\rtf1\ansi\ansicpg1252\cocoartf1265\cocoasubrtf210
674 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
675 {\colortbl;\red255\green255\blue255;}
676 \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc
677
678 \f0\fs24 \cf0 mapped region}</string>
679 </dict>
680 </dict>
681 <dict>
682 <key>Bounds</key>
683 <string>{{342, 144}, {63, 36}}</string>
684 <key>Class</key>
685 <string>ShapedGraphic</string>
686 <key>ID</key>
687 <integer>10</integer>
688 <key>Shape</key>
689 <string>Rectangle</string>
690 <key>Style</key>
691 <dict>
692 <key>fill</key>
693 <dict>
694 <key>Color</key>
695 <dict>
696 <key>b</key>
697 <string>0.901961</string>
698 <key>g</key>
699 <string>0.901961</string>
700 <key>r</key>
701 <string>0.901961</string>
702 </dict>
703 </dict>
704 <key>shadow</key>
705 <dict>
706 <key>Draws</key>
707 <string>NO</string>
708 </dict>
709 <key>stroke</key>
710 <dict>
711 <key>Pattern</key>
712 <integer>1</integer>
713 </dict>
714 </dict>
715 </dict>
716 <dict>
717 <key>Bounds</key>
718 <string>{{180, 144}, {36, 36}}</string>
719 <key>Class</key>
720 <string>ShapedGraphic</string>
721 <key>ID</key>
722 <integer>8</integer>
723 <key>Shape</key>
724 <string>Rectangle</string>
725 <key>Style</key>
726 <dict>
727 <key>fill</key>
728 <dict>
729 <key>Color</key>
730 <dict>
731 <key>b</key>
732 <string>0.901961</string>
733 <key>g</key>
734 <string>0.901961</string>
735 <key>r</key>
736 <string>0.901961</string>
737 </dict>
738 </dict>
739 <key>shadow</key>
740 <dict>
741 <key>Draws</key>
742 <string>NO</string>
743 </dict>
744 <key>stroke</key>
745 <dict>
746 <key>Pattern</key>
747 <integer>1</integer>
748 </dict>
749 </dict>
750 </dict>
751 </array>
752 <key>GridInfo</key>
753 <dict/>
754 <key>GuidesLocked</key>
755 <string>NO</string>
756 <key>GuidesVisible</key>
757 <string>YES</string>
758 <key>HPages</key>
759 <integer>1</integer>
760 <key>ImageCounter</key>
761 <integer>1</integer>
762 <key>KeepToScale</key>
763 <false/>
764 <key>Layers</key>
765 <array>
766 <dict>
767 <key>Lock</key>
768 <string>NO</string>
769 <key>Name</key>
770 <string>Layer 1</string>
771 <key>Print</key>
772 <string>YES</string>
773 <key>View</key>
774 <string>YES</string>
775 </dict>
776 </array>
777 <key>LayoutInfo</key>
778 <dict>
779 <key>Animate</key>
780 <string>NO</string>
781 <key>circoMinDist</key>
782 <real>18</real>
783 <key>circoSeparation</key>
784 <real>0.0</real>
785 <key>layoutEngine</key>
786 <string>dot</string>
787 <key>neatoSeparation</key>
788 <real>0.0</real>
789 <key>twopiSeparation</key>
790 <real>0.0</real>
791 </dict>
792 <key>LinksVisible</key>
793 <string>NO</string>
794 <key>MagnetsVisible</key>
795 <string>NO</string>
796 <key>MasterSheets</key>
797 <array/>
798 <key>ModificationDate</key>
799 <string>2015-12-16 20:32:49 +0000</string>
800 <key>Modifier</key>
801 <string>Heng Li</string>
802 <key>NotesVisible</key>
803 <string>NO</string>
804 <key>Orientation</key>
805 <integer>2</integer>
806 <key>OriginVisible</key>
807 <string>NO</string>
808 <key>PageBreaks</key>
809 <string>YES</string>
810 <key>PrintInfo</key>
811 <dict>
812 <key>NSBottomMargin</key>
813 <array>
814 <string>float</string>
815 <string>41</string>
816 </array>
817 <key>NSHorizonalPagination</key>
818 <array>
819 <string>coded</string>
820 <string>BAtzdHJlYW10eXBlZIHoA4QBQISEhAhOU051bWJlcgCEhAdOU1ZhbHVlAISECE5TT2JqZWN0AIWEASqEhAFxlwCG</string>
821 </array>
822 <key>NSLeftMargin</key>
823 <array>
824 <string>float</string>
825 <string>18</string>
826 </array>
827 <key>NSPaperSize</key>
828 <array>
829 <string>size</string>
830 <string>{612, 792}</string>
831 </array>
832 <key>NSPrintReverseOrientation</key>
833 <array>
834 <string>int</string>
835 <string>0</string>
836 </array>
837 <key>NSRightMargin</key>
838 <array>
839 <string>float</string>
840 <string>18</string>
841 </array>
842 <key>NSTopMargin</key>
843 <array>
844 <string>float</string>
845 <string>18</string>
846 </array>
847 </dict>
848 <key>PrintOnePage</key>
849 <false/>
850 <key>ReadOnly</key>
851 <string>NO</string>
852 <key>RowAlign</key>
853 <integer>1</integer>
854 <key>RowSpacing</key>
855 <real>36</real>
856 <key>SheetTitle</key>
857 <string>Canvas 1</string>
858 <key>SmartAlignmentGuidesActive</key>
859 <string>YES</string>
860 <key>SmartDistanceGuidesActive</key>
861 <string>YES</string>
862 <key>UniqueID</key>
863 <integer>1</integer>
864 <key>UseEntirePage</key>
865 <false/>
866 <key>VPages</key>
867 <integer>1</integer>
868 <key>WindowInfo</key>
869 <dict>
870 <key>CurrentSheet</key>
871 <integer>0</integer>
872 <key>ExpandedCanvases</key>
873 <array>
874 <dict>
875 <key>name</key>
876 <string>Canvas 1</string>
877 </dict>
878 </array>
879 <key>Frame</key>
880 <string>{{367, 6}, {710, 872}}</string>
881 <key>ListView</key>
882 <true/>
883 <key>OutlineWidth</key>
884 <integer>142</integer>
885 <key>RightSidebar</key>
886 <false/>
887 <key>ShowRuler</key>
888 <true/>
889 <key>Sidebar</key>
890 <true/>
891 <key>SidebarWidth</key>
892 <integer>120</integer>
893 <key>VisibleRegion</key>
894 <string>{{0, 0}, {575, 733}}</string>
895 <key>Zoom</key>
896 <real>1</real>
897 <key>ZoomValues</key>
898 <array>
899 <array>
900 <string>Canvas 1</string>
901 <real>1</real>
902 <real>1</real>
903 </array>
904 </array>
905 </dict>
906 </dict>
907 </plist>
0 @inproceedings{DBLP:conf/wabi/Myers14,
1 Author = {Gene Myers},
2 Booktitle = {Algorithms in Bioinformatics - 14th International Workshop, {WABI} 2014, Wroclaw, Poland, September 8-10, 2014. Proceedings},
3 Crossref = {DBLP:conf/wabi/2014},
4 Pages = {52--67},
5 Title = {Efficient Local Alignment Discovery amongst Noisy Long Reads},
6 Year = {2014}}
7
8 @proceedings{DBLP:conf/wabi/2014,
9 Editor = {Daniel G. Brown and Burkhard Morgenstern},
10 Publisher = {Springer},
11 Title = {Algorithms in Bioinformatics - 14th International Workshop, {WABI} 2014, Wroclaw, Poland, September 8-10, 2014. Proceedings},
12 Volume = {8701},
13 Year = {2014}}
14
15 @inproceedings{DBLP:conf/sigmod/SchleimerWA03,
16 Author = {Saul Schleimer and Daniel Shawcross Wilkerson and Alexander Aiken},
17 Booktitle = {Proceedings of the 2003 {ACM} {SIGMOD} International Conference on Management of Data, San Diego, California, USA, June 9-12, 2003},
18 Crossref = {DBLP:conf/sigmod/2003},
19 Pages = {76--85},
20 Title = {Winnowing: Local Algorithms for Document Fingerprinting},
21 Year = {2003}}
22
23 @proceedings{DBLP:conf/sigmod/2003,
24 Editor = {Alon Y. Halevy and Zachary G. Ives and AnHai Doan},
25 Publisher = {{ACM}},
26 Title = {Proceedings of the 2003 {ACM} {SIGMOD} International Conference on Management of Data, San Diego, California, USA, June 9-12, 2003},
27 Year = {2003}}
28
29 @article{Myers:2000kl,
30 Author = {Myers, E W and others},
31 Journal = {Science},
32 Pages = {2196-204},
33 Title = {A whole-genome assembly of {Drosophila}},
34 Volume = {287},
35 Year = {2000}}
36
37 @article{Roberts:2004fv,
38 Author = {Roberts, Michael and others},
39 Journal = {Bioinformatics},
40 Pages = {3363-9},
41 Title = {Reducing storage requirements for biological sequence comparison},
42 Volume = {20},
43 Year = {2004}}
44
45 @article{Myers:2005bh,
46 Author = {Myers, Eugene W},
47 Journal = {Bioinformatics},
48 Pages = {ii79-85},
49 Title = {The fragment assembly string graph},
50 Volume = {21 Suppl 2},
51 Year = {2005}}
52
53 @article{Zerbino:2008uq,
54 Author = {Zerbino, Daniel R and Birney, Ewan},
55 Journal = {Genome Res},
56 Pages = {821-9},
57 Title = {Velvet: algorithms for de novo short read assembly using de {Bruijn} graphs},
58 Volume = {18},
59 Year = {2008}}
60
61 @article{Kahn62aa,
62 Author = {Kahn, Arthur B},
63 Journal = {Communications of the ACM},
64 Pages = {558--562},
65 Title = {Topological sorting of large networks},
66 Volume = {5},
67 Year = {1962}}
68
69 @article{Li:2012fk,
70 Author = {Li, Heng},
71 Journal = {Bioinformatics},
72 Pages = {1838-44},
73 Title = {Exploring single-sample {SNP} and {INDEL} calling with whole-genome de novo assembly},
74 Volume = {28},
75 Year = {2012}}
76
77 @article{Berlin:2015xy,
78 Author = {Berlin, Konstantin and others},
79 Journal = {Nat Biotechnol},
80 Pages = {623-30},
81 Title = {Assembling large genomes with single-molecule sequencing and locality-sensitive hashing},
82 Volume = {33},
83 Year = {2015}}
84
85 @article{sovic:2015aa,
86 Author = {Ivan Sovic and others},
87 Journal = {bioRxiv},
88 Title = {Fast and sensitive mapping of error-prone nanopore sequencing reads with GraphMap},
89 Year = {2015}}
90
91 @article{TCS15,
92 author = {Ljiljana Brankovic and others},
93 title = {Linear-Time Superbubble Identification Algorithm for Genome Assembly},
94 journal = {Theoretical Computer Science},
95 year = {2015},
96 doi = {10.1016/j.tcs.2015.10.021}
97 }
98
99 @inproceedings{DBLP:conf/wabi/OnoderaSS13,
100 Author = {Taku Onodera and Kunihiko Sadakane and Tetsuo Shibuya},
101 Booktitle = {{WABI}},
102 Crossref = {DBLP:conf/wabi/2013},
103 Pages = {338--348},
104 Title = {Detecting Superbubbles in Assembly Graphs},
105 Year = {2013}}
106
107 @proceedings{DBLP:conf/wabi/2013,
108 editor = {Aaron E. Darling and
109 Jens Stoye},
110 title = {Algorithms in Bioinformatics - 13th International Workshop, {WABI}
111 2013, Sophia Antipolis, France, September 2-4, 2013. Proceedings},
112 series = {Lecture Notes in Computer Science},
113 volume = {8126},
114 publisher = {Springer},
115 year = {2013},
116 }
117
118 @article{Wick:2015qf,
119 Author = {Wick, Ryan R and others},
120 Journal = {Bioinformatics},
121 Pages = {3350-2},
122 Title = {Bandage: interactive visualization of de novo genome assemblies},
123 Volume = {31},
124 Year = {2015}}
125
126 @article{Li:2009ys,
127 Author = {Li, Heng and others},
128 Journal = {Bioinformatics},
129 Pages = {2078-9},
130 Title = {The Sequence Alignment/Map format and SAMtools},
131 Volume = {25},
132 Year = {2009}}
133
134 @article{Alkan:2011zr,
135 Author = {Alkan, Can and others},
136 Journal = {Nat Methods},
137 Pages = {61-5},
138 Title = {Limitations of next-generation genome sequence assembly},
139 Volume = {8},
140 Year = {2011}}
141
142 @article{Chaisson:2015wj,
143 Author = {Chaisson, Mark J P and others},
144 Journal = {Nat Rev Genet},
145 Pages = {627-40},
146 Title = {Genetic variation and the de novo assembly of human genomes},
147 Volume = {16},
148 Year = {2015}}
149
150 @article{Bashir:2012gb,
151 Author = {Bashir, Ali and others},
152 Journal = {Nat Biotechnol},
153 Pages = {701-7},
154 Title = {A hybrid approach for the automated finishing of bacterial genomes},
155 Volume = {30},
156 Year = {2012}}
157
158 @article{Ribeiro:2012bx,
159 Author = {Ribeiro, Filipe J and others},
160 Journal = {Genome Res},
161 Pages = {2270-7},
162 Title = {Finished bacterial genomes from shotgun sequence data},
163 Volume = {22},
164 Year = {2012}}
165
166 @article{Koren:2012pt,
167 Author = {Koren, Sergey and others},
168 Journal = {Nat Biotechnol},
169 Pages = {693-700},
170 Title = {Hybrid error correction and de novo assembly of single-molecule sequencing reads},
171 Volume = {30},
172 Year = {2012}}
173
174 @article{Chin:2013qr,
175 Author = {Chin, Chen-Shan and others},
176 Journal = {Nat Methods},
177 Pages = {563-9},
178 Title = {Nonhybrid, finished microbial genome assemblies from long-read SMRT sequencing data},
179 Volume = {10},
180 Year = {2013}}
181
182 @article{Koren:2013fc,
183 Author = {Koren, Sergey and others},
184 Journal = {Genome Biol},
185 Pages = {R101},
186 Title = {Reducing assembly complexity of microbial genomes with single-molecule sequencing},
187 Volume = {14},
188 Year = {2013}}
189
190 @article{Quick:2014uf,
191 Author = {Quick, Joshua and others},
192 Journal = {Gigascience},
193 Pages = {22},
194 Title = {A reference bacterial genome dataset generated on the MinION{\texttrademark} portable single-molecule nanopore sequencer},
195 Volume = {3},
196 Year = {2014}}
197
198 @article{Loman:2015xu,
199 Author = {Loman, Nicholas J and others},
200 Journal = {Nat Methods},
201 Pages = {733-5},
202 Title = {A complete bacterial genome assembled de novo using only nanopore sequencing data},
203 Volume = {12},
204 Year = {2015}}
205
206 @article{Chaisson:2012aa,
207 Author = {Chaisson, Mark J and Tesler, Glenn},
208 Journal = {BMC Bioinformatics},
209 Pages = {238},
210 Title = {Mapping single molecule sequencing reads using basic local alignment with successive refinement (BLASR): application and theory},
211 Volume = {13},
212 Year = {2012}}
213
214 @inproceedings{Broder:1997aa,
215 Author = {Broder, Andrei Z},
216 Booktitle = {Compression and Complexity of Sequences},
217 Pages = {21-29},
218 Title = {On the resemblance and containment of documents},
219 Year = {1997}}
220
221 @article{Altschul:1997vn,
222 Author = {Altschul, S F and others},
223 Journal = {Nucleic Acids Res},
224 Pages = {3389-402},
225 Title = {Gapped {BLAST} and {PSI-BLAST}: a new generation of protein database search programs},
226 Volume = {25},
227 Year = {1997}}
228
229 @article{Kent:2002jk,
230 Author = {Kent, W James},
231 Journal = {Genome Res},
232 Pages = {656-64},
233 Title = {{BLAT}--the {BLAST}-like alignment tool},
234 Volume = {12},
235 Year = {2002}}
236
237 @phdthesis{harris:2007aa,
238 Author = {Harris, R.S.},
239 School = {The Pennsylvania State University},
240 Title = {Improved pairwise alignment of genomic DNA},
241 Year = {2007}}
242
243 @article{Kiebasa:2011aa,
244 Author = {Kie{\l}basa, Szymon M and others},
245 Journal = {Genome Res},
246 Pages = {487-93},
247 Title = {Adaptive seeds tame genomic sequence comparison},
248 Volume = {21},
249 Year = {2011}}
250
251 @article{Li:2013aa,
252 Author = {Li, Heng},
253 Journal = {arXiv:1303.3997},
254 Title = {Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM},
255 Year = {2013}}
0 \documentclass{bioinfo}
1 \copyrightyear{2015}
2 \pubyear{2015}
3
4 \usepackage{amsmath}
5 \usepackage[ruled,vlined]{algorithm2e}
6 \newcommand\mycommfont[1]{\footnotesize\rmfamily{\it #1}}
7 \SetCommentSty{mycommfont}
8 \SetKwComment{Comment}{$\triangleright$\ }{}
9
10 \usepackage{natbib}
11 \bibliographystyle{apalike}
12
13 \begin{document}
14 \firstpage{1}
15
16 \title[Long-read mapping and assembly]{Minimap and miniasm: fast mapping and de novo assembly for noisy long sequences}
17 \author[Li]{Heng Li}
18 \address{Broad Institute, 75 Ames Street, Cambridge, MA 02142, USA}
19 \maketitle
20
21 \begin{abstract}
22
23 \section{Motivation:} Single Molecule Real-Time (SMRT) sequencing technology and Oxford
24 Nanopore technologies (ONT) produce reads over 10kbp in length, which have
25 enabled high-quality genome assembly at an affordable cost. However, at
26 present, long reads have an error rate as high as 10--15\%. Complex and
27 computationally intensive pipelines are required to assemble such reads.
28
29 \section{Results:} We present a new mapper, minimap, and a \emph{de novo}
30 assembler, miniasm, for efficiently mapping and assembling SMRT and ONT reads
31 without an error correction stage. They can often assemble a sequencing run of
32 bacterial data into a single contig in a few minutes, and assemble 45-fold
33 \emph{C. elegans} data in 9 minutes, orders of magnitude faster than the
34 existing pipelines, though the consensus sequence error rate is as high as raw
35 reads. We also introduce a pairwise read mapping format (PAF) and
36 a graphical fragment assembly format (GFA), and demonstrate the
37 interoperability between ours and current tools.
38
39 \section{Availability and implementation:} https://github.com/lh3/minimap and
40 https://github.com/lh3/miniasm
41
42 \section{Contact:} hengli@broadinstitute.org
43
44 \end{abstract}
45
46 \section{Introduction}
47
48 High-throughput short-read sequencing technologies, such as Illumina, have
49 empowered a variety of biological researches and clinical applications that
50 would not be practical with the older Sanger sequencing. However, the short
51 read length (typically a few hundred basepairs) has posed a great challenge to
52 \emph{de novo} assembly as many repetitive sequences and segmental duplications
53 are longer than the read length and can hardly be resolved by short reads even
54 with paired-end data~\citep{Alkan:2011zr}. Although with increased read length
55 and improved algorithms we are now able to produce much better short-read
56 assemblies than a few years ago, the contiguity and completeness of the
57 assemblies are still not as good as Sanger assemblies~\citep{Chaisson:2015wj}.
58
59 The PacBio's SMRT technology were developed partly as an answer to the
60 problem with short-read \emph{de novo} assembly. However, due to the high
61 per-base error rate, around 15\%, these reads were only used as a complement to
62 short reads initially~\citep{Bashir:2012gb,Ribeiro:2012bx,Koren:2012pt},
63 until~\citet{Chin:2013qr} and~\citet{Koren:2013fc} demonstrated the feasibility
64 of SMRT-only assembly. Since then, SMRT is becoming the preferred technology
65 for finishing small genomes and producing high-quality Eukaryotic
66 genomes~\citep{Berlin:2015xy}.
67
68 Oxford Nanopore Technologies (ONT) has recently offered another long-read
69 sequencing technology. Although the per-base error rate was high at the
70 early access phase~\citep{Quick:2014uf}, the latest data quality has been
71 greatly improved. \citet{Loman:2015xu} confirmed that we can achieve
72 high-quality bacterial assembly with ONT data alone.
73
74 Published long-read assembly pipelines all include four stages: (i) all-vs-all
75 raw read mapping, (ii) raw read error correction, (iii) assembly of error
76 corrected reads and (iv) contig consensus polish. Stage (iii) may involve
77 all-vs-all read mapping again, but as the error rate is much reduced at this
78 step, it is easier and faster than stage (i). Table~\ref{tab:tools} shows the tools used for
79 each stage. Notably, our tool minimap is a raw read overlapper and miniasm is
80 an assembler. We do not correct sequencing errors, but instead directly produce
81 unpolished and uncorrected contig sequences from raw read overlaps. The idea of
82 correction-free assembly was inspired by talks given by Gene Myers.
83 Sikic et al (personal communication) are also independently exploring such an
84 approach.
85
86 \begin{table}[b]
87 \processtable{Tools for noisy long-read assembly}
88 {\footnotesize\label{tab:tools}
89 \begin{tabular}{p{2.4cm}p{2cm}l}
90 \toprule
91 Functionality & Program & Reference \\
92 \midrule
93 Raw read overlap & BLASR & \citet{Chaisson:2012aa}\\
94 & DALIGNER & \citet{DBLP:conf/wabi/Myers14} \\
95 & MHAP & \citet{Berlin:2015xy} \\
96 & GraphMap & \citet{sovic:2015aa} \\
97 & minimap & this article \\
98 Error correction & pbdagcon & http://bit.ly/pbdagcon \\
99 & falcon\_sense & http://bit.ly/pbfcasm \\
100 & nanocorrect & \citet{Loman:2015xu} \\
101 Assembly & wgs-assembler & \citet{Myers:2000kl} \\
102 & Falcon & http://bit.ly/pbfcasm \\
103 & ra-integrate & http://bit.ly/raitgasm \\
104 & miniasm & this article \\
105 Consensus polish & Quiver & http://bit.ly/pbquiver \\
106 & nanopolish & \citet{Loman:2015xu} \\
107 \botrule
108 \end{tabular}
109 }{}
110 \end{table}
111
112 As we can see from Table~\ref{tab:tools}, each stage can be achieved with multiple tools.
113 Although we have successfully combined tools into different pipelines, we
114 need to change or convert the input/output formats to make them work
115 together. Another contribution of this article is the proposal of concise
116 mapping and assembly formats, which will hopefully encourage modular design of
117 assemblers and the associated tools.
118
119 \begin{methods}
120 \section{Methods}
121
122 \subsection{General notations}
123
124 Let $\Sigma=\{\mathrm{A},\mathrm{C},\mathrm{G},\mathrm{T}\}$ be the
125 alphabet of nucleotides. For a symbol $a\in\Sigma$, $\overline{a}$ is the
126 Watson-Crick complement of $a$. A string $s=a_1a_2\cdots a_n$ over
127 $\Sigma$ is also called a \emph{DNA sequence}. Its length is $|s|=n$;
128 its \emph{reverse complement} is $\overline{s}=\overline{a_1a_2\cdots
129 a_n}=\overline{a}_n\overline{a}_{n-1}\cdots\overline{a}_1$.
130 For convenience, we define strand function
131 $\pi:\Sigma^*\times\{0,1\}\to\Sigma^*$ such that $\pi(s,0)=s$ and
132 $\pi(s,1)=\overline{s}$. Here $\Sigma^*$ is the set of all DNA sequences.
133
134 By convention, we call a $k$-long DNA sequence as a \emph{$k$-mer}. We use the
135 notation $s^k_i=a_i\cdots a_{i+k-1}$ to denote a $k$-long substring of $s$
136 starting at $i$. $\Sigma^k$ is the set of all $k$-mers.
137
138 \subsection{Minimap}
139
140 \subsubsection{Overview of $k$-mer based sequence similarity search}\label{sec:minimapov}
141
142 BLAST~\citep{Altschul:1997vn} and BLAT~\citep{Kent:2002jk} are among the most
143 popular sequence similarity search tools. They use one $k$-mer hash function
144 $\phi:\Sigma^k\to\mathbb{Z}$ to hash $k$-mers at the positions
145 $1,w+1,2w+1,\ldots$ of a target sequence and keep the hash values in a hash
146 table. Upon query, they use the same hash function on every $k$-mer of the
147 query sequence and look up the hash table for potential matches. If there are
148 one or multiple $k$-mer matches in a small window, these aligners extend the
149 matches with dynamic programming to construct the final alignment.
150
151 DALIGNER~\citep{DBLP:conf/wabi/Myers14} does not use a hash table. It instead
152 identifies $k$-mer matches between two sets of reads by sorting $k$-mers and
153 merging the sorted lists. DALIGNER is fast primarily because sorting and
154 merging are highly cache efficient.
155
156 MHAP~\citep{Berlin:2015xy} differs from others in the use of MinHash
157 sketch~\citep{Broder:1997aa}. Briefly, given a read sequence $s$ and $m$
158 $k$-mer hash functions $\{\phi_j\}_{1\le j\le m}$, MHAP computes
159 $h_j=\min\{\phi_j(s_i^k):1\le i\le |s|-k+1\}$ with each hash function $\phi_j$, and
160 takes list $(h_j)_{1\le j\le m}$, which is called the \emph{sketch} of
161 $s$, as a reduced representation of $s$. Suppose $(h_j)_j$ and $(h'_j)_j$ are
162 the sketches of two reads, respectively. When the two reads are similar to each
163 other or have significant overlaps, there are likely to exist multiple $j$ such
164 that $h_j=h'_j$. Potential matches can thus be identified. A limitation of
165 MinHash sketch is that it always selects a fixed number of hash values
166 regardless of the length of the sequences. This may waste space or hurt
167 sensitivity when input sequences vary greatly in lengths.
168
169 Minimap is heavily influenced by all these works. It adopts the idea of sketch
170 like MHAP but takes minimizers \citep{DBLP:conf/sigmod/SchleimerWA03,Roberts:2004fv} as a reduced
171 representation instead; it stores $k$-mers in a hash table like BLAT and MHAP
172 but also uses sorting extensively like DALIGNER. In addition, minimap is
173 designed not only as a read overlapper but also as a read-to-genome and
174 genome-to-genome mapper. It has more potential applications.
175
176 \subsubsection{Computing minimizers}
177
178 \begin{algorithm}[tb]
179 \DontPrintSemicolon
180 \footnotesize
181 \KwIn{Parameter $w$ and $k$ and sequence $s$ with $|s|\ge w+k-1$}
182 \KwOut{($w$,$k$)-minimizers, their positions and strands}
183 \BlankLine
184 \textbf{Function} {\sc MinimizerSketch}$(s,w,k)$
185 \Begin {
186 $\mathcal{M}\gets\emptyset$\Comment*[r]{NB: $\mathcal{M}$ is a set; no duplicates}
187 \For{$i\gets1$ \KwTo $|s|-w-k+1$} {
188 $m\gets\infty$\;
189 \nl\For (\Comment*[f]{Find the min value}) {$j\gets0$ \KwTo $w-1$} {
190 $(u,v)\gets(\phi(s^k_{i+j}),\phi(\overline{s}^k_{i+j}))$\;
191 \If (\Comment*[f]{Skip if strand ambiguous}) {$u\not=v$} {
192 $m\gets\min(m,\min(u,v))$\;
193 }
194 }
195 \nl\For (\Comment*[f]{Collect minimizers}) {$j\gets0$ \KwTo $w-1$} {
196 $(u,v)\gets(\phi(s^k_{i+j}),\phi(\overline{s}^k_{i+j}))$\;
197 \uIf{$u<v$ {\bf and} $u=m$} {
198 $\mathcal{M}\gets\mathcal{M}\cup\{(m,i+j,0)\}$\;
199 }\ElseIf{$v<u$ {\bf and} $v=m$}{
200 $\mathcal{M}\gets\mathcal{M}\cup\{(m,i+j,1)\}$\;
201 }
202 }
203 }
204 \Return $M$\;
205 }
206 \caption{Compute minimizers}\label{alg:minimizer}
207 \end{algorithm}
208
209 Loosely speaking, a $(w,k)$-minimizer of a string is the
210 smallest $k$-mer in a surrounding window of $w$ consecutive $k$-mers. Formally,
211 let $\phi:\Sigma^k\to\mathbb{Z}$ be a $k$-mer hash function.
212 A \emph{double-strand $(w,k,\phi)$-minimizer}, or simply a \emph{minimizer}, of a
213 string $s$, $|s|\ge w+k-1$, is a triple $(h,i,r)$ such that there exists
214 $\max(1,i-w+1)\le j\le\min(i,|s|-w-k+1)$ which renders
215 $$
216 h=\phi(\pi(s_i^k,r))=\min\big\{\phi(\pi(s_{j+p}^k,r')):0\le p<w,r'\in\{0,1\}\big\}
217 $$
218 Let $\mathcal{M}(s)$ be the set of minimizers of $s$. Algorithm~\ref{alg:minimizer} gives the
219 pseudocode to compute $\mathcal{M}(s)$ in $O(w\cdot|s|)$ time. Our actual
220 implementation is close to $O(|s|)$ in average case. It uses a queue to cache
221 the previous minimals and avoids the loops at line~1 and~2 most of time. In
222 practice, time spent on collecting minimizers is insignificant.
223
224 A natural choice of hash function $\phi$ is to let $\phi(\mathrm{A})=0$,
225 $\phi(\mathrm{C})=1$, $\phi(\mathrm{G})=2$ and $\phi(\mathrm{T})=3$ and for a
226 $k$-mer $s=a_1\cdots a_k$, define
227 $$
228 \phi(s)=\phi(a_1)\times4^{k-1}+\phi(a_2)\times4^{k-2}+\cdots+\phi(a_k)
229 $$
230 This hash function always maps a $k$-mer to a distinct $2k$-bit integer. A
231 problem with this $\phi$ is that poly-A, which is often highly enriched in
232 genomes, always gets zero, the smallest value. We may oversample these
233 non-informative poly-A and hurt practical performance. To alleviate this issue,
234 we use function $\phi'=h\circ\phi$ instead, where $h$ is an invertible integer
235 hash function on $[0,4^k)$ (Algorithm~\ref{alg:invhash}; http://bit.ly/invihgi). The
236 invertibility of $h$ is not essential, but as such $\phi'$ never maps two
237 distinct $k$-mers to the same $2k$-bit integer, it helps to reduce hash
238 collisions.
239
240 \begin{algorithm}[tb]
241 \DontPrintSemicolon
242 \footnotesize
243 \KwIn{$p$-bit integer $x$}
244 \KwOut{hashed $p$-bit integer}
245 \BlankLine
246 \textbf{Function} {\sc InvertibleHash}$(x,p)$
247 \Begin {
248 $m\gets2^p-1$\;
249 $x\gets(\mbox{\tt\char126}x+(x\mbox{\tt\char60\char60}21))\mbox{ \tt\char38}\mbox{ }m$\;
250 $x\gets x\mbox{ \tt\char94}\mbox{ }x\mbox{\tt\char62\char62}24$\;
251 $x\gets(x+(x\mbox{\tt\char60\char60}3)+(x\mbox{\tt\char60\char60}8))\mbox{ \tt\char38}\mbox{ }m$\;
252 $x\gets x\mbox{ \tt\char94}\mbox{ }x\mbox{\tt\char62\char62}14$\;
253 $x\gets(x+(x\mbox{\tt\char60\char60}2)+(x\mbox{\tt\char60\char60}4))\mbox{ \tt\char38}\mbox{ }m$\;
254 $x\gets x\mbox{ \tt\char94}\mbox{ }x\mbox{\tt\char62\char62}28$\;
255 $x\gets(x+(x\mbox{\tt\char60\char60}31))\mbox{ \tt\char38}\mbox{ }m$\;
256 \Return $x$\;
257 }
258 \caption{Invertible integer hash function}\label{alg:invhash}
259 \end{algorithm}
260
261 Note that in a window of $w$ consecutive $k$-mers, there may be more than one
262 minimizers. Algorithm~\ref{alg:minimizer} keeps them all with the loop at line~2. This way, a
263 minimizer of $s$ always corresponds to a minimizer of $\overline{s}$.
264
265 For read overlapping, we use $k=15$ and $w=5$ to find minimizers.
266
267 \subsubsection{Indexing}
268
269 \begin{algorithm}[tb]
270 \DontPrintSemicolon
271 \footnotesize
272 \KwIn{Set of target sequences $\mathcal{T}=\{s_1,\ldots,s_T\}$}
273 \KwOut{Minimizer hash table $\mathcal{H}$}
274 \BlankLine
275 \textbf{Function} {\sc Index}$(\mathcal{T},w,k)$
276 \Begin {
277 $\mathcal{H}\gets$ empty hash table\;
278 \For{$t\gets1$ \KwTo $T$} {
279 $\mathcal{M}\gets${\sc MinimizerSketch}$(s_t,w,k)$\;
280 \ForEach{$(h,i,r)\in \mathcal{M}$} {
281 $\mathcal{H}[h]\gets\mathcal{H}[h]\cup\{(t,i,r)\}$\;
282 }
283 }
284 \Return $\mathcal{H}$\;
285 }
286 \caption{Index target sequences}\label{alg:idx}
287 \end{algorithm}
288
289 Algorithm~\ref{alg:idx} describes indexing target sequences. It keeps minimizers of all target
290 sequences in a hash table where the key is the minimizer hash and the value is
291 a set of target sequence index, the position of the minimizer and the strand
292 (packed into one 64-bit integer).
293
294 In implementation, we do not directly insert minimizers to the hash table.
295 Instead, we append minimizers to an array of two 64-bit integers (one for minimizer sequence and one for position) and sort the array after collecting
296 all minimizers. The hash table keeps the intervals on the sorted array. This
297 procedure dramatically reduces heap allocations and cache misses, and is
298 supposedly faster than direct hash table insertion.
299
300 \subsubsection{Mapping}
301
302 Given two sequences $s$ and $s'$, we say we find a \emph{minimizer hit}
303 $(h,x,i,i')$ if there exist $(h,i,r)\in\mathcal{M}(s)$ and
304 $(h,i',r')\in\mathcal{M}(s')$ with $x=r\oplus r'$ ($\oplus$ is the XOR
305 operator). Here $h$ is the minimizer hash value, $x$ indicates the relative
306 strand and $i$ and $i'$ are the positions on the two sequences, respectively.
307 We say two minimizer hits $(h_1,x,i_1,i'_1)$ and $(h_2,x,i_2,i'_2)$ are
308 \emph{$\epsilon$-away} if 1) $x=0$ and $|(i_1-i'_1)-(i_2-i'_2)|<\epsilon$
309 or 2) $x=1$ and $|(i_1+i'_1)-(i_2+i'_2)|<\epsilon$. Intuitively,
310 $\epsilon$-away hits are approximately colinear within a band of width
311 $\epsilon$ (500bp by default). Given a set of minimizer hits $\{(h,x,i,i')\}$, we can cluster
312 $i-i'$ for $x=0$ or $i+i'$ for $x=1$ to identify long colinear matches.
313 This procedure is inspired by Hough Transformation mentioned
314 by~\citet{sovic:2015aa}.
315
316 \begin{algorithm}[tb]
317 \DontPrintSemicolon
318 \footnotesize
319 \KwIn{Hash table $\mathcal{H}$ and query sequence $q$}
320 \KwOut{Print matching query and target intervals}
321 \BlankLine
322 \textbf{Function} {\sc Map}$(\mathcal{H},q,w,k,\epsilon)$
323 \Begin {
324 $\mathcal{A}\gets$ empty array\;
325 $\mathcal{M}\gets${\sc MinimizerSketch}$(q,w,k)$\;
326 \nl\ForEach (\Comment*[f]{Collect minimizer hits}) {$(h,i,r)\in \mathcal{M}$} {
327 \ForEach{$(t,i',r')\in \mathcal{H}[h]$} {
328 \uIf (\Comment*[f]{Minimizers on the same strand}) {$r=r'$} {
329 Append $(t,0,i-i',i')$ to $\mathcal{A}$\;
330 } \Else (\Comment*[f]{On different strands}) {
331 Append $(t,1,i+i',i')$ to $\mathcal{A}$\;
332 }
333 }
334 }
335 Sort $\mathcal{A}=[(t,r,c,i')]$ in the order of the four values in tuples\;
336 $b\gets1$\;
337 \nl\For (\Comment*[f]{Cluster minimizer hits}) {$e=1$ \KwTo $|\mathcal{A}|$} {
338 \If{$e=|\mathcal{A}|$ {\bf or} $\mathcal{A}[e+1].t\not=\mathcal{A}[e].t$ {\bf or} $\mathcal{A}[e+1].r\not=\mathcal{A}[e].r$ {\bf or} $\mathcal{A}[e+1].c-\mathcal{A}[e].c\ge\epsilon$} {
339 \nl$\mathcal{C}\gets$ the maximal colinear subset of $\mathcal{A}[b..e]$\;
340 Print the left- and right-most query/target positions in $\mathcal{C}$\;
341 $b\gets e+1$\;
342 }
343 }
344 }
345 \caption{Map a query sequence}\label{alg:map}
346 \end{algorithm}
347
348 Algorithm~\ref{alg:map} gives the details of the mapping algorithm. The loop at line~1
349 collects minimizer hits between the query and all the target sequences. The
350 loop at line~2 performs a single-linkage clustering to group approximately
351 colinear hits. Some hits in a cluster may not be colinear because two minimizer
352 hits within distance $\epsilon$ are always $\epsilon$-away. To fix this issue,
353 we find the maximal colinear subset of hits by solving a longest increasing
354 sequencing problem (line~3). This subset is the final mapping result. In
355 practical implementation, we set thresholds on the size of the subset (4 by
356 default) and the number of matching bases in the subset to filter poor mappings
357 (100 for read overlapping).
358
359 \subsection{Assembly graph}
360
361 Two strings $v$ and $w$ may be mapped to each other based on their sequence
362 similarity. If $v$ can be mapped to a substring of $w$, we say $w$
363 \emph{contains} $v$. If a suffix of $v$ and a prefix of $w$ can be mapped to
364 each other, we say $v$ \emph{overlaps} $w$, written as $v\to w$.
365 If we regard strings $v$ and $w$ as vertices, the overlap relationship defines
366 a directed edge between them. The \emph{length} of $v\to w$ equals the length
367 of $v$'s prefix that is not in the prefix-suffix match.
368
369 Let $G=(V,E,\ell)$ be a graph without multi-edges, where $V$ is a
370 set of DNA sequences (vertices), $E$ a set of overlaps between them (edges) and
371 $\ell:E\to\Re_+$ is the edge length function. $G$ is said to be
372 \emph{Watson-Crick complete} if i) $\forall v\in V$, $\overline{v}\in V$ and
373 ii) $\forall v\to w\in E$, $\overline{w}\to\overline{v}\in E$. $G$ is said to
374 be \emph{containment-free} if any sequence $v$ is not contained in other
375 sequences in $V$. If $G$ is both Watson-Crick complete and containment-free, it
376 is an \emph{assembly graph}. By definition, any vertex $v$ has a
377 \emph{complement vertex} $\overline{v}$ in the graph and any edge $v\to w$ has
378 a \emph{complement edge} $\overline{w}\to\overline{v}$. Let
379 $\mathrm{deg}^+(v)$ be the outdegree of $v$ and $\mathrm{deg}^-(v)$ be the
380 indegree. It follows that $\mathrm{deg}^-(v)=\mathrm{deg}^+(\overline{v})$.
381
382 An assembly graph has the same topology as a string graph~\citep{Myers:2005bh},
383 though the interpretation of the vertex set $V$ is different. In a string
384 graph, $V$ is the set of the two ends of sequences, not the set of forward and
385 reverse-complemented sequences. De Bruijn graph can be regarded as a special
386 case of overlap graph. It is also an assembly graph.
387
388 In an assembly graph, an edge $v\to w$ is \emph{transitive} if there exist
389 $v\to u$ and $u\to w$. Removing a transitive edge does not affect the
390 connectivity of the graph. A vertex $v$ is a \emph{tip} if ${\rm deg}^+(v)=0$
391 and ${\rm deg}^-(v)>0$. The majority of tips are caused by artifacts or missing
392 overlaps. A \emph{bubble} is a directed acyclic subgraph with a single source
393 $v$ and a single sink $w$ having at least two paths between $v$ and $w$, and without connecting the rest of the graph. The
394 bubble is tight if ${\rm deg}^+(v)>1$ and ${\rm deg}^-(w)>1$. A bubble may be
395 caused by missing overlaps or by variants between haplotypes in multi-ploidy samples or paralogs.
396 It is preferred to collapse bubbles for high contiguity, though this introduces loss of information.
397
398 \subsection{Miniasm}
399
400 \subsubsection{Trimming reads}
401
402 Raw read sequences may contain artifacts such as untrimmed adapters and
403 chimaera. The first step of assembly to reduce such artifacts by examining
404 read-to-read mappings. For each read, miniasm computes per-base coverage based
405 on good mappings against other reads (longer than 2000bp with at least
406 100bp non-redundant bases on matching minimizers). It then identifies the
407 longest region having coverage three or more, and trims bases outside this
408 region.
409
410 \subsubsection{Generating assembly graph}
411
412 \begin{figure}[tb]
413 \centering
414 \includegraphics[width=.45\textwidth]{overhang}
415 \caption{Mapping between two reads. $b[1]$ and $e[1]$ are the 0-based starting and
416 ending mapping coordinates of the first read $v$, respectively. $b[2]$ and
417 $e[2]$ are the mapping coordinates of read $w$. Lightgray areas indicate
418 overhang regions that should be mapped together if the overlap is real. If the
419 overhang regions are small enough, the figure implies an edge $v\to w$ with
420 approximate length $\ell(v\to w)=b[1]-b[2]$ and its complement edge
421 $\overline{w}\to\overline{v}$ with
422 $\ell(\overline{w}\to\overline{v})=(l[2]-e[2])-(l[1]-e[1])$.}\label{fig:overhang}
423 \end{figure}
424
425 \begin{algorithm}[bt]
426 \DontPrintSemicolon
427 \footnotesize
428 \KwIn{Read length $l$, mapping begin coordinate $b$ and mapping end $e$ of the
429 two reads; max overhang length $o$ (1000 by default) and max overhang to mapping length ratio
430 $r$ (0.8 by default).}
431 \KwOut{hashed $p$-bit integer}
432 \BlankLine
433 \textbf{Function} {\sc ClassifyMapping}$(l[2], b[2], e[2], o, r)$
434 \Begin {
435 ${\it overhang}\gets\min(b[1], b[2])+\min(l[1]-e[1],l[2]-e[2])$\;
436 ${\it maplen}\gets\max(e[1]-b[1],e[2]-b[2])$\;
437 \uIf{${\it overhang}>\min(o,{\it maplen}\cdot r)$} {
438 \Return {\tt INTERNAL\_MATCH}
439 } \uElseIf {$b[1]\le b[2]$ {\bf and} $l[1]-e[1]\le l[2]-e[2]$} {
440 \Return {\tt FIRST\_CONTAINED}
441 }\uElseIf {$b[1]\ge b[2]$ {\bf and} $l[1]-e[1]\ge l[2]-e[2]$} {
442 \Return {\tt SECOND\_CONTAINED}
443 } \uElseIf {$b[1]>b[2]$} {
444 \Return {\tt FIRST\_TO\_SECOND\_OVERLAP}
445 } \Else {
446 \Return {\tt SECOND\_TO\_FIRST\_OVERLAP}
447 }
448 }
449 \caption{Mapping classification}\label{alg:ovlp}
450 \end{algorithm}
451
452 For each trimmed mapping, miniasm applies Algorithm~\ref{alg:ovlp} to classify the mapping
453 (see also Figure~\ref{fig:overhang} for the explanation of input variables).
454 It ignores internal matches, drops contained reads and adds overlaps to the
455 assembly graph. For a pair of reads, miniasm uses the longest overlap only to
456 avoid multi-edges.
457
458
459 \subsubsection{Graph cleaning}
460
461 \begin{algorithm}[tb]
462 \DontPrintSemicolon
463 \footnotesize
464 \KwIn{$G=(V,E)$, starting vertex $v_0$ and maximum probe distance $d$}
465 \KwOut{the sink vertex of a bubble within $d$; or {\bf nil} if not found}
466 \BlankLine
467 \textbf{Function} {\sc DetectBubble}$(V,E,v_0,d)$
468 \Begin {
469 \lIf{$\mathrm{deg}^+(v_0)<2$} { \Return {\bf nil} } \Comment*[r]{Not a source of bubble}
470 \lFor{$v\in V$} { $\delta[v]\gets\infty$ } \Comment*[r]{the min distance from $v_0$ to $v$}
471 $\delta[v_0]\gets0$\;
472 $S\gets$ empty stack \Comment*[r]{Vertices with all incoming edges visited}
473 {\sc Push}$(S,v_0)$\;
474 $p\gets0$ \Comment*[r]{Number of visited vertices never added to $S$}
475 \While{$S$ is not empty} {
476 $v\gets$ {\sc Pop}$(S)$\;
477 \ForEach{$v\to w\in E$} {
478 \If (\Comment*[f]{A circle involving the starting vertex}) {$w=v_0$} {
479 \Return {\bf nil}\;
480 }
481 \If (\Comment*[f]{Moving too far}) {$\delta[v]+\ell(v\to w)>d$} {
482 \Return {\bf nil}\;
483 }
484 \If (\Comment*[f]{Not visited before}) {$\delta[w]=\infty$} {
485 $\gamma[w]\gets \mathrm{deg}^-(w)$ \Comment*[r]{No. unvisited incoming edges}
486 $p\gets p+1$\;
487 }
488 \If{$\delta[v]+\ell(v\to w)<\delta[w]$} {
489 \nl$\delta[w]\gets \delta[v]+\ell(v\to w)$\;
490 }
491 $\gamma[w]\gets\gamma[w]-1$\;
492 \If (\Comment*[f]{All incoming edges visited}) {$\gamma[w]=0$} {
493 \If (\Comment*[f]{Not a tip}) {$\mathrm{deg}^+(w)\not=0$} {
494 {\sc Push}$(S,w)$\;
495 }
496 $p\gets p-1$\;
497 }
498 }
499 \If (\Comment*[f]{Found the sink}) {$|S|=1$ {\bf and} $p=0$} {
500 \Return {\sc Pop}$(S)$\;
501 }
502 }
503 \Return {\bf nil}\;
504 }
505 \caption{Bubble detection}\label{alg:popbub}
506 \end{algorithm}
507
508 After constructing the assembly graph, miniasm removes transitive
509 edges~\citep{Myers:2005bh}, trims tipping unitigs composed of few reads (4 by default) and pops small
510 bubbles~\citep{Zerbino:2008uq}. Algorithm~\ref{alg:popbub} detects bubbles where the longest path is shorter than $d$ (50kb by default). It is
511 adapted from Kahn's topological sorting algorithm~\citep{Kahn62aa}. It starts
512 from the potential source and visits a vertex when all its incoming edges are
513 visited before. Algorithm~6 only detects bubbles. We can keep track of the
514 optimal parent vertex at line~1 and then backtrack to collapse bubbles to a
515 single path. Fermi~\citep{Li:2012fk} uses a similar algorithm except that it
516 keeps two optimal paths through the bubble. \citet{DBLP:conf/wabi/OnoderaSS13}
517 and \citet{TCS15} have also independently found similar algorithms.
518
519 In addition, if $v\to w_1$ and $v\to w_2$ exist and $\ell(v\to w_1)<\ell(v\to
520 w_2)$, miniasm removes $v\to w_2$ if $[|v|-\ell(v\to w_2)]/[|v|-\ell(v\to
521 w_1)]$ is small enough (70\% by default). When there are longer overlaps,
522 shorter overlaps after transitive reduction may be due to repeats.
523 However, non-repetitive overlaps may also be removed at a small chance, which
524 leads to missing overlaps and misassemblies.
525
526 \subsubsection{Generating unitig sequences}
527
528 If there are no multi-edges in the assembly graph, we can use $v_1\to
529 v_2\to\cdots\to v_k$ to represent a path consisting of $k$ vertices. The
530 sequence spelled from this path is the concatenation of vertex substrings:
531 $v_1[1,\ell(v_1\to v_2)]\circ v_2[1,\ell(v_2\to v_3)]\circ\cdots\circ
532 v_{k-1}[1,\ell(v_{k-1},v_k)]\circ v_k$, where $v[i,j]$ is the substring between
533 $i$ and $j$ inclusive, and $\circ$ is the string concatenation operator.
534
535 In a transitively reduced graph, a \emph{unitig}~\citep{Myers:2000kl} is a path $v_1\to
536 v_2\to\cdots\to v_k$ such that ${\rm deg}^+(v_i)={\rm deg}^-(v_{i+1})=1$ and i)
537 $v_1=v_k$ or ii) ${\rm deg}^-(v_1)\not=1$ and ${\rm deg}^+(v_k)\not=1$.
538 Its sequence is the sequence spelled from the path. Intuitively, a unitig is a
539 maximal path on which adjacent vertices can be ``unambiguously merged'' without
540 affecting the connectivity of the original assembly graph.
541
542 As miniasm does not correct sequencing errors, the error rate of unitig
543 sequence is the same as the error rate of the raw input reads. It is in theory
544 possible to derive a better unitig sequence by taking the advantage of read
545 overlaps. We have not implemented such a consensus tool yet.
546
547 \subsection{Formats: PAF and GFA}
548
549 \subsubsection{Pairing mapping format (PAF)}
550
551 \begin{table}[tb]
552 \processtable{Pairwise mapping format (PAF)}
553 {\footnotesize\label{tab:paf}
554 \begin{tabular}{rcl}
555 \toprule
556 Col & Type & Description \\
557 \midrule
558 1 & string & Query sequence name \\
559 2 & int & Query sequence length \\
560 3 & int & Query start coordinate (BED-like) \\
561 4 & int & Query end coordinate (BED-like) \\
562 5 & char & `+' if query and target on the same strand; `-' if opposite \\
563 6 & string & Target sequence name \\
564 7 & int & Target sequence length \\
565 8 & int & Target start coordinate on the original strand \\
566 9 & int & Target end coordinate on the original strand \\
567 10& int & Number of matching bases in the mapping \\
568 11& int & Number bases, including gaps, in the mapping \\
569 12& int & Mapping quality (0--255 with 255 for missing) \\
570 \botrule
571 \end{tabular}
572 }{PAF is TAB-delimited text format with each line consisting of the above fixed
573 fields. When the alignment is available, column 11 equals the total number of
574 sequence matches, mismatches and gaps in the alignment. Column 10 divided by
575 column 11 gives the alignment identity. If the detailed alignment is not
576 available, column 10 and 11 can be approximate. PAF may optionally have
577 additional fields in the SAM-like typed key-value format~\citep{Li:2009ys}.}
578 \end{table}
579
580 PAF is a lightweight format keeping the key mapping information (Table~\ref{tab:paf}).
581 Minimap outputs mappings in PAF, which are taken by miniasm as input for
582 assembly. We also provide scripts to convert DALIGNER, MHAP and SAM formats to
583 PAF.
584
585 \subsubsection{Graphical fragment assembly format (GFA)}
586
587 \begin{table}[tb]
588 \processtable{Graphical fragment assembly format (GFA)}
589 {\footnotesize\label{tab:gfa}
590 \begin{tabular}{clp{5.8cm}}
591 \toprule
592 Line & Comment & Fixed fields \\
593 \midrule
594 H & Header & N/A \\
595 S & Segment & segName,segSeq \\
596 L & Overlap & segName1,segOri1,segName2,segOri2,CIGAR \\
597 \botrule
598 \end{tabular}
599 }{GFA is a line-based TAB-delimited format. Each line starts with a single
600 letter determining the interpretation of the following TAB-delimited fields. In
601 GFA, segment refers to a read or a unitig. A line start with `S' gives the name
602 and sequence of a segment. When the sequence is not available, it can be a star
603 `*'. Overlaps between segments are represented in lines starting with `L',
604 giving the names and orientations of the two segments in an overlap. The last
605 field `CIGAR' on an `L'-line describes the detailed alignment of the overlap if
606 available. In addition to the types of lines in the table, GFA may contain
607 other line types starting with different letters. Each line may optionally have
608 additional SAM-like typed key-value pairs.}
609 \end{table}
610
611 GFA is a concise assembly format (Table~\ref{tab:gfa}; http://bit.ly/gfaspec) initially proposed by
612 us prior to miniasm and later improved by community (P. Melsted, S. Jackman,
613 J. Simpson and E. Garrison, personal communication). GFA has an explicit
614 relationship to an assembly graph -- an `S' line in the GFA corresponds to a
615 vertex and its complement in the graph; an `L' line corresponds to an edge and
616 its complement. GFA is able to represent graphs produced at all the stages of
617 an assembly pipeline, from initial read overlaps to the unitig relationship in
618 the final assembly.
619
620 FASTG (http://bit.ly/fastgfmt) is another assembly format prior to GFA.
621 It uses different terminologies. A vertex in an assembly graph is called an
622 edge in FASTG, and an edge is called an adjacency. In FASTG, subgraphs can be
623 nested, though no tools work with nested graphs due to technical complications.
624 In addition, with nesting, one assembly graph can be represented in distinct
625 ways, which we regard as a limitation of FASTG.
626
627 \subsection{Evaluating the layout accuracy}\label{sec:eval}
628
629 Miniasm outputs the approximate positions of trimmed reads on the resulting
630 unitigs. We extract these reads, map to the true assembly with minimap (option:
631 `-L100 -m0 -w5') and select the best mapping for each read. For a read $i$, let
632 ${\rm utg}_i$ be the unitig name and ${\rm rank}_i$ be its index on ${\rm
633 utg}_i$ (i.e. read $i$ is the ${\rm rank}_i$-th read on the unitig). If two
634 reads $i$ and $j$ are mapped adjacently on the true assembly, we say the
635 adjacency is \emph{$w$-consistent}, if (i) ${\rm utg}_i={\rm utg}_j$ and $|{\rm
636 rank}_i-{\rm rank}_j|<w$, or (ii) both read $i$ and $j$ are the first or the
637 last $w$ reads of some unitigs. We use $w=5$ to detect large structural
638 misassemblies.
639
640 \end{methods}
641
642 \begin{table}[tb]
643 \processtable{Evaluation data sets}
644 {\footnotesize\label{tab:data}
645 \begin{tabular}{llrrr}
646 \toprule
647 Name & Species & Size & Cov. & N50 \\
648 \midrule
649 PB-ce-40X & {\it Caenorhabditis elegans} & 104M & 45 & 16572 \\
650 ERS473430 & {\it Citrobacter koseri} & 4.9M & 106 & 7543 \\
651 ERS544009 & {\it Yersinia pseudotuberculosis} & 4.7M & 147 & 9002 \\
652 ERS554120 & {\it Pseudomonas aeruginosa} & 6.4M & 90 & 7106 \\
653 ERS605484 & {\it Vibrio vulnificus} & 5.0M & 155 & 5091 \\
654 ERS617393 & {\it Acinetobacter baumannii} & 4.0M & 237 & 7911 \\
655 ERS646601 & {\it Haemophilus influenzae} & 1.9M & 258 & 4081 \\
656 ERS659581 & {\it Klebsiella sp.} & 5.1M & 129 & 8031 \\
657 ERS670327 & {\it Shimwellia blattae} & 4.2M & 155 & 6765 \\
658 ERS685285 & {\it Streptococcus sanguinis} & 2.4M & 224 & 5791 \\
659 ERS743109 & {\it Salmonella enterica} & 4.8M & 188 & 6051 \\
660 PB-ecoli & {\it Escherichia coli} & 4.6M & 160 & 13976 \\
661 PBcR-PB-ec & {\it Escherichia coli} & 4.6M & 30 & 11757 \\
662 PBcR-ONT-ec & {\it Escherichia coli} & 4.6M & 29 & 9356 \\
663 MAP-006-1 & {\it Escherichia coli} & 4.6M & 54 & 10892 \\
664 MAP-006-2 & {\it Escherichia coli} & 4.6M & 30 & 10794 \\
665 MAP-006-pcr-1 & {\it Escherichia coli} & 4.6M & 30 & 8080 \\
666 MAP-006-pcr-2 & {\it Escherichia coli} & 4.6M & 60 & 8064 \\
667 \botrule
668 \end{tabular}
669 }{Evaluation data set name, species, reference genome size, theoretical
670 sequencing coverage and the N50 read length. Names starting with ``MAP'' are
671 unpublished recent ONT data provided by the Loman lab (http://bit.ly/loman006).
672 Names starting with ``ERS'' are accession numbers of unpublished PacBio data
673 from the NCTC project (http://bit.ly/nctc3k). PB-ecoli and PB-ce-40X are PacBio
674 public data sets sequenced with the P6/C4 chemistry (http://bit.ly/pbpubdat;
675 retrieved on 11/03/2015). PBcR-PB-ec is the PacBio sample data (P5/C3
676 chemistry) used in the tutorial of the PBcR pipeline; PBcR-ONT-ec is the ONT
677 example originally used by \citet{Loman:2015xu}. `pls2fasta --trimByRegion' was
678 applied to ERS* and PB-ecoli data sets as they do not provide read sequences in
679 the FASTQ format.}
680 \end{table}
681
682 \section{Results}
683
684 \subsection{The accuracy of minimap}
685
686 We mapped a human PacBio run ``m130928\_232712\_42213\_*.1.*''
687 (http://bit.ly/chm1p5c3) with minimap and BWA-MEM~\citep{Li:2013aa}
688 against GRCh37 plus decoy sequences (http://bit.ly/GRCh37d5).
689 We started from 23,235 reads (131Mbp), filtered out 7,593 reads (10Mbp) without
690 $\ge$2kbp BWA-MEM alignments, and further dropped 815 reads (11Mbp) with two or more
691 $\ge$2kbp chimeric alignments and 598 reads (4Mbp) with mapping quality below 10.
692 Of the remaining reads, we found only 2.0\% not overlapping the best minimap
693 mapping of the same read. The majority of them hit to the decoy sequence where
694 defining the true alignment is challenging as decoy is enriched with incomplete
695 segments of centromeric repeats. If we exclude hits to the decoy, the
696 percentage drops to 0.7\%. On this input, minimap is 50 times faster than
697 BWA-MEM, while finding similar best mapping positions. This experiment
698 evaluates both the sensitivity and the specificity of minimap: if minimap had
699 low sensitivity, it would miss the BWA-MEM mapping completely; if minimap had
700 low specificity, its best mapping would often be a wrong mapping.
701
702 To test the sensitivity for read overlapping, we aligned all reads from
703 PBcR-PB-ec (Table~\ref{tab:data}) against the reference genome with BWA-MEM,
704 extracted reads with mapping quality $\ge$10, and identified $\ge$2kb overlaps
705 between the extracted reads based on their positions on the reference genome.
706 Minimap finds 93\% of these overlaps. It is more sensitive than MHAP in its
707 sensitive mode (78\%) but less than DALIGNER (98\%).
708
709 \subsection{Assembling bacterial genomes}
710
711 We evaluated the performance of miniasm on 17 bacterial data sets
712 (Table~\ref{tab:data}) with command line `minimap -Sw5 -L100 -m0 reads.fa reads.fa $|$
713 miniasm -f reads.fa -'. Miniasm is able to derive a single contig per
714 chromosome/plasmid for all but four data sets: 3 extra $>$50kb contigs for
715 ERS554120, and 1 extra contig for ERS605484, PBcR-ONT-ec and MAP-006-pcr-1
716 each. In the dotter plot between the assembly and the reference genome (similar
717 to Figure~\ref{fig:ce}), no large-scale misassemblies are observed. We also
718 applied the method in Section~\ref{sec:eval}. Except ERS473430, the miniasm layouts are
719 5-consistent with the reference assemblies. For ERS473430, the NCTC project page
720 claimed the sample has a plasmid. Miniasm gives two contigs, but the NCTC
721 assembly has one contig only. The difference in layout may be an error in the
722 NCTC assembly.
723
724 %Encouraged by the single-contig assembly for PBcR-PB-ec at only 30-fold
725 %coverage, we randomly down-sampled PacBio data sets and tried to assemble the
726 %subset. For PB-ecoli, miniasm still produced a single contig at 24-fold
727 %coverage, or two contigs at 20-fold. For the other data sets, however, miniasm
728 %generated fragmented assemblies when we sampled a third of reads. We speculate
729 %the shorter read lengths of the ERS* data sets made it more difficult to
730 %produce good assemblies at relatively low coverage.
731
732 We have also run the PBcR pipeline~\citep{Berlin:2015xy}. PBcR requires a spec
733 file. We took `pacbio.spec' from the PBcR-PB-ec example and `oxford.spec' from
734 PBcR-ONT-ec, and applied them to all data sets based on their data types. MAP*
735 data sets only provide FASTA sequences for download. We assigned quality 9 to
736 all bases as PBcR requires base quality. PBcR assembled all PacBio data sets
737 without extra contigs longer than 50kb -- better than miniasm. However, on the
738 ONT data sets, PBcR produced more fragmented assemblies for MAP-006-2,
739 MAP-006-pcr-1 and MAP-006-pcr-2; the PBcR-ONT-ec assembly is 300kb shorter.
740
741 With four CPU cores, it took miniasm 14 seconds to assemble the 30-fold
742 PBcR-PB-ec data set and 2 minutes to assemble the 160-fold PB-ecoli data set.
743 PBcR, with four CPU cores, too, is about 700 times slower on PBcR-PB-ecoli and
744 60 times slower on PB-ecoli. It is slower on low-coverage data
745 because PBcR automatically switches to the slower sensitive mode. Here we
746 should remind readers that without an error correction stage, the contig
747 sequences generated by miniasm are of much lower accuracy in comparison to
748 PBcR. Nonetheless, miniasm is still tens of times faster than PBcR excluding
749 the time spent on error correction.
750
751 \subsection{Assembling a C. elegans genome}
752
753 \begin{figure}[tb]
754 \includegraphics[width=.48\textwidth]{ce}
755 \caption{Dotter plot comparing the miniasm assembly and the {\it C. elegans}
756 reference genome. Thin gray lines mark the contig or chromosome boundaries. The
757 three arrows indicate large-scale misassemblies visible from the
758 plot. The mapping is done with `minimap -L500'.}\label{fig:ce}
759 \end{figure}
760
761 We assembled a 45-fold {\it C. elegans} data set (Table~\ref{tab:data}). With 16 CPU cores,
762 miniasm assembled the data in 9 minutes, achieving an N50 size 2.8Mb. From the
763 dotter plot (Figure~\ref{fig:ce}), we observed three structural misassemblies
764 (readers are advised to zoom into the vector graph to see the details).
765 PacBio has assembled the same data set with HGAP3~\citep{Chin:2013qr}. HGAP3
766 produces shorter contigs (N50=1.6Mb), but does not incur large-scale
767 misassemblies visible from the dotter plot between the {\it C. elegans}
768 reference genome and the contigs.
769
770 When we take the {\it C. elegans} reference genome as the truth, the method in
771 Section~\ref{sec:eval} also identifies the three structural misassemblies. The
772 method additionally finds eight intra-unitig and one inter-unitig
773 inconsistencies. In all cases, miniasm agrees with HGAP3, suggesting these
774 inconsistencies may be true structural variations between the reference strain
775 and the sequenced strain.
776
777 We have also tried PBcR on this data set. Based on the intermediate progress
778 report, we estimated that with 16 CPU cores, it would take a week or so to
779 finish the assembly in the automatically chosen `sensitive' mode.
780
781 For this data set, minimap takes 27GB RAM at the peak. As minimap loads 4Gbp
782 bases to index, the peak RAM will be capped around 27GB. The memory used by
783 miniasm is proportional to the number of overlaps. Although it only takes
784 1.3GB RAM here, it will become the limiting factor for larger data sets.
785
786 \subsection{Switching read overlappers}
787
788 Miniasm also works with other overlappers when we convert their output format
789 to PAF. On the 30-fold PBcR-PB-ec data set, we are able to produce a single
790 contig with DALIGNER (option -k15 -h50), MHAP (option
791 \mbox{--pacbio-sensitive}) and GraphMap (option -w owler). DALIGNER is the
792 fastest, taking 65 seconds with four CPUs. Minimap is five times as fast on
793 this data set and is 18 times as fast on PB-ecoli at 160-fold. Minimap is
794 faster on larger data sets possibly because without staging all possible hits
795 in RAM, minimap is able to process more reads in a batch while a large batch
796 usually helps performance. We should note that DALIGNER generates alignments
797 while minimap does not. Minimap would probably have a similar performance if it
798 included an alignment step.
799
800 \section{Discussions}
801
802 Miniasm implements the `O' and `L' steps in the Overlap-Layout-Consensus (OLC)
803 assembly paradigm. It confirms long noisy reads can be assembled without an
804 error correction stage, and without this stage, the assembly process can be
805 greatly accelerated and simplified, while achieving comparable contiguity and
806 large-scale accuracy to existing pipelines, at least for genomes without
807 excessive repetitive sequences. Although without the `C' step, miniasm
808 cannot produce high-quality consensus for many analyses, it opens the door
809 to ultrafast assembly if we can develop a fast consensus tool matching the
810 speed of minimap and miniasm. In addition, MinION has a `read-until' mode,
811 allowing users to pause sequencing and reload samples. Fast layout by miniasm
812 could already help to decide if enough data have been collected.
813
814 Our main concern with miniasm is that when we look at a low-identity match
815 between two noisy reads, it is difficult to tell whether the low identity is
816 caused by the stochastically higher base error rate on reads, or because
817 reads come from two recent segmental duplications.
818 In comparison, error correction takes the advantage of multiple reads and in
819 theory has more power to distinguish high error rate from duplications/repeats.
820 Bacteria and {\it C. elegans} evaluated in this article are repeat sparse.
821 We are yet to know the performance of miniasm given repeat-rich genomes. In addition, miniasm has
822 not been optimized for large repeat-rich genomes. It reads all hits into RAM,
823 which may not be practical when there are too many. We need to filter
824 repetitive hits, introduce disk-based algorithms (e.g. for sorting) or stream
825 hits before removing contained reads. Working with large complex genomes will
826 be an important future direction.
827
828 %Minimap is primarily used as a read overlapper in this article. It in fact has a
829 %wider range of applications in addition to overlapping. For example, with four
830 %CPU cores, it is able to map 1.6Gb PacBio reads to the human genome in 2.5
831 %minutes, map 1Gb {\it E. coli} reads to pre-indexed 9.6Gb bacterial genome in 3
832 %minutes and to pre-indexed 100Gb nt database in an hour with a third of time
833 %spent on loading the index from the network file system. It can also map 2800
834 %bacterial genomes to themselves in an hour. Minimap is fast, but is not as
835 %sensitive as proper whole-genome aligners and recent long-read aligners such as
836 %LASTZ~\citep{harris:2007aa}, LAST~\citep{Kiebasa:2011aa} and GraphMap. They use
837 %shorter spaced seeds which greatly help sensitivity at the cost of performance.
838
839 Oxford Nanopore is working on PromethION and PacBio will ship PacBio Sequel
840 later this year. Both sequencers promise significantly reduced sequencing cost and
841 increased throughput, which may stimulate the adoption of long-read sequencing
842 and subsequently the development of long-read mappers and assemblers. We hope
843 in this process, the community could standardize the input and output formats
844 of various tools, so that a developer could focus on a component he or she
845 understands best. Such a modular approach has been proved to be fruitful in the
846 development of short-read tools -- in fact, the best short-read pipelines all
847 consist of components developed by different groups -- and will be equally
848 beneficial to the future development of long-read mappers and assemblers.
849
850 \section*{Acknowledgement}
851
852 We thank P\'all Melsted for maintaining the GFA spec and are grateful to Gene
853 Myers, Jason Chin, Adam Phillippy, Jared Simpson, Zamin Iqbal, Nick Loman and
854 Ivan Sovic for their presentations, talks, comments on social media and
855 unpublished works which have greatly influenced and helped the development of
856 minimap and miniasm.
857
858 \paragraph{Funding\textcolon} NHGRI U54HG003037; NIH GM100233
859
860 \bibliography{miniasm}
861 \end{document}
0 %%
1 %% This is file `natbib.bst', generated
2 %% on <1994/9/16> with the docstrip utility (2.2h).
3 %%
4 %% The original source files were:
5 %%
6 %% genbst.mbs (with options: `ay,nat,seq-lab,nm-rev,dt-beg,yr-par,vol-bf,
7 %% volp-com,etal-it')
8 %% ----------------------------------------
9 %% *** Personal bib style, PWD ***
10 %%
11 %% (Here are the specifications of the source file)
12 %% \ProvidesFile{genbst.mbs}[1994/09/16 1.5 (PWD)]
13 %% For use with BibTeX version 0.99a or later
14 %% and with LaTeX 2.09 or 2e
15 %%-------------------------------------------------------------------
16 %% NOTICE:
17 %% This file may be used for non-profit purposes.
18 %% It may not be distributed in exchange for money,
19 %% other than distribution costs.
20 %%
21 %% The author provides it `as is' and does not guarantee it in any way.
22 %%
23 %% Copyright (C) 1994 Patrick W. Daly
24 %% Max-Planck-Institut f\"ur Aeronomie
25 %% Postfach 20
26 %% D-37189 Katlenburg-Lindau
27 %% Germany
28 %%
29 %% E-mail:
30 %% SPAN-- nsp::linmpi::daly (note nsp also known as ecd1)
31 %% Internet-- daly@linmpi.dnet.gwdg.de
32 %%-----------------------------------------------------------
33 %% \CharacterTable
34 %% {Upper-case \A\B\C\D\E\F\G\H\I\J\K\L\M\N\O\P\Q\R\S\T\U\V\W\X\Y\Z
35 %% Lower-case \a\b\c\d\e\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u\v\w\x\y\z
36 %% Digits \0\1\2\3\4\5\6\7\8\9
37 %% Exclamation \! Double quote \" Hash (number) \#
38 %% Dollar \$ Percent \% Ampersand \&
39 %% Acute accent \' Left paren \( Right paren \)
40 %% Asterisk \* Plus \+ Comma \,
41 %% Minus \- Point \. Solidus \/
42 %% Colon \: Semicolon \; Less than \<
43 %% Equals \= Greater than \> Question mark \?
44 %% Commercial at \@ Left bracket \[ Backslash \\
45 %% Right bracket \] Circumflex \^ Underscore \_
46 %% Grave accent \` Left brace \{ Vertical bar \|
47 %% Right brace \} Tilde \~}
48 %%---------------------------------------------------------------------
49 % This is an author-year citation style bibliography. As such, it is
50 % non-standard LaTeX, and requires a special package file to function properly.
51 % Such a package is natbib.sty by Patrick W. Daly
52 % The form of the \bibitem entries is
53 % \bibitem[Jones et al.(1990)]{key}...
54 % \bibitem[Jones et al.(1990)Jones, Baker, and Smith]{key}...
55 % The essential feature is that the label (the part in brackets) consists
56 % of the author names, as they should appear in the citation, with the year
57 % in parentheses following. There must be no space before the opening
58 % parenthesis!
59 % With natbib v5.3, a full list of authors may also follow the year.
60 % In natbib.sty, it is possible to define the type of enclosures that is
61 % really wanted (brackets or parentheses), but in either case, there must
62 % be parentheses in the label.
63 % The \cite command functions as follows:
64 % \cite{key} ==>> Jones et al. (1990)
65 % \cite[]{key} ==>> (Jones et al., 1990)
66 % \cite[chap. 2]{key} ==>> (Jones et al., 1990, chap. 2)
67 % \cite[e.g.][]{key} ==>> (e.g. Jones et al., 1990)
68 % \cite[e.g.][p. 32]{key} ==>> (e.g. Jones et al., p. 32)
69 % \citeauthor{key} Jones et al.
70 % \citefullauthor{key} Jones, Baker, and Smith
71 % \citeyear{key} 1990
72 %%---------------------------------------------------------------------
73
74 ENTRY
75 { address
76 author
77 booktitle
78 chapter
79 edition
80 editor
81 howpublished
82 institution
83 journal
84 key
85 month
86 note
87 number
88 organization
89 pages
90 publisher
91 school
92 series
93 title
94 type
95 volume
96 year
97 }
98 {}
99 { label extra.label sort.label }
100
101 INTEGERS { output.state before.all mid.sentence after.sentence after.block }
102
103 FUNCTION {init.state.consts}
104 { #0 'before.all :=
105 #1 'mid.sentence :=
106 #2 'after.sentence :=
107 #3 'after.block :=
108 }
109
110 STRINGS { s t }
111
112 FUNCTION {output.nonnull}
113 { 's :=
114 output.state mid.sentence =
115 { ", " * write$ }
116 { output.state after.block =
117 { add.period$ write$
118 newline$
119 "\newblock " write$
120 }
121 { output.state before.all =
122 'write$
123 { add.period$ " " * write$ }
124 if$
125 }
126 if$
127 mid.sentence 'output.state :=
128 }
129 if$
130 s
131 }
132
133 FUNCTION {output}
134 { duplicate$ empty$
135 'pop$
136 'output.nonnull
137 if$
138 }
139
140 FUNCTION {output.check}
141 { 't :=
142 duplicate$ empty$
143 { pop$ "empty " t * " in " * cite$ * warning$ }
144 'output.nonnull
145 if$
146 }
147
148 FUNCTION {fin.entry}
149 { add.period$
150 write$
151 newline$
152 }
153
154 FUNCTION {new.block}
155 { output.state before.all =
156 'skip$
157 { after.block 'output.state := }
158 if$
159 }
160
161 FUNCTION {new.sentence}
162 { output.state after.block =
163 'skip$
164 { output.state before.all =
165 'skip$
166 { after.sentence 'output.state := }
167 if$
168 }
169 if$
170 }
171
172 FUNCTION {not}
173 { { #0 }
174 { #1 }
175 if$
176 }
177
178 FUNCTION {and}
179 { 'skip$
180 { pop$ #0 }
181 if$
182 }
183
184 FUNCTION {or}
185 { { pop$ #1 }
186 'skip$
187 if$
188 }
189
190 FUNCTION {non.stop}
191 { duplicate$
192 "}" * add.period$
193 #-1 #1 substring$ "." =
194 }
195
196 FUNCTION {new.block.checkb}
197 { empty$
198 swap$ empty$
199 and
200 'skip$
201 'new.block
202 if$
203 }
204
205 FUNCTION {field.or.null}
206 { duplicate$ empty$
207 { pop$ "" }
208 'skip$
209 if$
210 }
211
212 FUNCTION {emphasize}
213 { duplicate$ empty$
214 { pop$ "" }
215 { "{\em " swap$ * non.stop
216 { "\/}" * }
217 { "}" * }
218 if$
219 }
220 if$
221 }
222
223 FUNCTION {bolden}
224 { duplicate$ empty$
225 { pop$ "" }
226 { "{\bf " swap$ * "}" * }
227 if$
228 }
229
230 INTEGERS { nameptr namesleft numnames }
231
232 FUNCTION {format.names}
233 { 's :=
234 #1 'nameptr :=
235 s num.names$ 'numnames :=
236 numnames 'namesleft :=
237 { namesleft #0 > }
238 { s nameptr
239 "{vv~}{ll}{, jj}{, f.}" format.name$ 't :=
240 nameptr #1 >
241 {
242 namesleft #1 >
243 { ", " * t * }
244 {
245 numnames #2 >
246 { "," * }
247 'skip$
248 if$
249 t "others" =
250 { " " * "et~al." emphasize * }
251 { " and " * t * }
252 if$
253 }
254 if$
255 }
256 't
257 if$
258 nameptr #1 + 'nameptr :=
259 namesleft #1 - 'namesleft :=
260 }
261 while$
262 }
263
264 FUNCTION {format.names.ed}
265 { 's :=
266 #1 'nameptr :=
267 s num.names$ 'numnames :=
268 numnames 'namesleft :=
269 { namesleft #0 > }
270 { s nameptr
271 "{f.~}{vv~}{ll}{, jj}"
272 format.name$ 't :=
273 nameptr #1 >
274 {
275 namesleft #1 >
276 { ", " * t * }
277 {
278 numnames #2 >
279 { "," * }
280 'skip$
281 if$
282 t "others" =
283 { " " * "et~al." emphasize * }
284 { " and " * t * }
285 if$
286 }
287 if$
288 }
289 't
290 if$
291 nameptr #1 + 'nameptr :=
292 namesleft #1 - 'namesleft :=
293 }
294 while$
295 }
296
297 FUNCTION {format.key}
298 { empty$
299 { key field.or.null }
300 { "" }
301 if$
302 }
303
304 FUNCTION {format.authors}
305 { author empty$
306 { "" }
307 { author format.names }
308 if$
309 }
310
311 FUNCTION {format.editors}
312 { editor empty$
313 { "" }
314 { editor format.names
315 editor num.names$ #1 >
316 { ", editors" * }
317 { ", editor" * }
318 if$
319 }
320 if$
321 }
322
323 FUNCTION {format.in.editors}
324 { editor empty$
325 { "" }
326 { editor format.names.ed
327 editor num.names$ #1 >
328 { ", editors" * }
329 { ", editor" * }
330 if$
331 }
332 if$
333 }
334
335 FUNCTION {format.title}
336 { title empty$
337 { "" }
338 { title "t" change.case$
339 }
340 if$
341 }
342
343 FUNCTION {format.full.names}
344 {'s :=
345 #1 'nameptr :=
346 s num.names$ 'numnames :=
347 numnames 'namesleft :=
348 { namesleft #0 > }
349 { s nameptr
350 "{vv~}{ll}" format.name$ 't :=
351 nameptr #1 >
352 {
353 namesleft #1 >
354 { ", " * t * }
355 {
356 numnames #2 >
357 { "," * }
358 'skip$
359 if$
360 t "others" =
361 { " " * "et~al." emphasize * }
362 { " and " * t * }
363 if$
364 }
365 if$
366 }
367 't
368 if$
369 nameptr #1 + 'nameptr :=
370 namesleft #1 - 'namesleft :=
371 }
372 while$
373 }
374
375 FUNCTION {author.editor.key.full}
376 { author empty$
377 { editor empty$
378 { key empty$
379 { cite$ #1 #3 substring$ }
380 'key
381 if$
382 }
383 { editor format.full.names }
384 if$
385 }
386 { author format.full.names }
387 if$
388 }
389
390 FUNCTION {author.key.full}
391 { author empty$
392 { key empty$
393 { cite$ #1 #3 substring$ }
394 'key
395 if$
396 }
397 { author format.full.names }
398 if$
399 }
400
401 FUNCTION {editor.key.full}
402 { editor empty$
403 { key empty$
404 { cite$ #1 #3 substring$ }
405 'key
406 if$
407 }
408 { editor format.full.names }
409 if$
410 }
411
412 FUNCTION {make.full.names}
413 { type$ "book" =
414 type$ "inbook" =
415 or
416 'author.editor.key.full
417 { type$ "proceedings" =
418 'editor.key.full
419 'author.key.full
420 if$
421 }
422 if$
423 }
424
425 FUNCTION {output.bibitem}
426 { newline$
427 "\bibitem[" write$
428 label write$
429 ")" make.full.names * "]{" * write$
430 cite$ write$
431 "}" write$
432 newline$
433 ""
434 before.all 'output.state :=
435 }
436
437 FUNCTION {n.dashify}
438 { 't :=
439 ""
440 { t empty$ not }
441 { t #1 #1 substring$ "-" =
442 { t #1 #2 substring$ "--" = not
443 { "--" *
444 t #2 global.max$ substring$ 't :=
445 }
446 { { t #1 #1 substring$ "-" = }
447 { "-" *
448 t #2 global.max$ substring$ 't :=
449 }
450 while$
451 }
452 if$
453 }
454 { t #1 #1 substring$ *
455 t #2 global.max$ substring$ 't :=
456 }
457 if$
458 }
459 while$
460 }
461
462 FUNCTION {word.in}
463 { "In " }
464
465 FUNCTION {format.date}
466 { year duplicate$ empty$
467 { "empty year in " cite$ * "; set to ????" * warning$
468 pop$ "????" }
469 'skip$
470 if$
471 before.all 'output.state :=
472 " (" swap$ * extra.label * ")" *
473 }
474
475 FUNCTION {format.btitle}
476 { title emphasize
477 }
478
479 FUNCTION {tie.or.space.connect}
480 { duplicate$ text.length$ #3 <
481 { "~" }
482 { " " }
483 if$
484 swap$ * *
485 }
486
487 FUNCTION {either.or.check}
488 { empty$
489 'pop$
490 { "can't use both " swap$ * " fields in " * cite$ * warning$ }
491 if$
492 }
493
494 FUNCTION {format.bvolume}
495 { volume empty$
496 { "" }
497 { "volume" volume tie.or.space.connect
498 series empty$
499 'skip$
500 { " of " * series emphasize * }
501 if$
502 "volume and number" number either.or.check
503 }
504 if$
505 }
506
507 FUNCTION {format.number.series}
508 { volume empty$
509 { number empty$
510 { series field.or.null }
511 { output.state mid.sentence =
512 { "number" }
513 { "Number" }
514 if$
515 number tie.or.space.connect
516 series empty$
517 { "there's a number but no series in " cite$ * warning$ }
518 { " in " * series * }
519 if$
520 }
521 if$
522 }
523 { "" }
524 if$
525 }
526
527 FUNCTION {format.edition}
528 { edition empty$
529 { "" }
530 { output.state mid.sentence =
531 { edition "l" change.case$ " edition" * }
532 { edition "t" change.case$ " edition" * }
533 if$
534 }
535 if$
536 }
537
538 INTEGERS { multiresult }
539
540 FUNCTION {multi.page.check}
541 { 't :=
542 #0 'multiresult :=
543 { multiresult not
544 t empty$ not
545 and
546 }
547 { t #1 #1 substring$
548 duplicate$ "-" =
549 swap$ duplicate$ "," =
550 swap$ "+" =
551 or or
552 { #1 'multiresult := }
553 { t #2 global.max$ substring$ 't := }
554 if$
555 }
556 while$
557 multiresult
558 }
559
560 FUNCTION {format.pages}
561 { pages empty$
562 { "" }
563 { pages multi.page.check
564 { "pages" pages n.dashify tie.or.space.connect }
565 { "page" pages tie.or.space.connect }
566 if$
567 }
568 if$
569 }
570
571 FUNCTION {format.vol.num.pages}
572 { volume field.or.null
573 bolden
574 number empty$
575 'skip$
576 { "(" number * ")" * *
577 volume empty$
578 { "there's a number but no volume in " cite$ * warning$ }
579 'skip$
580 if$
581 }
582 if$
583 pages empty$
584 'skip$
585 { duplicate$ empty$
586 { pop$ format.pages }
587 { ", " * pages n.dashify * }
588 if$
589 }
590 if$
591 }
592
593 FUNCTION {format.chapter.pages}
594 { chapter empty$
595 'format.pages
596 { type empty$
597 { "chapter" }
598 { type "l" change.case$ }
599 if$
600 chapter tie.or.space.connect
601 pages empty$
602 'skip$
603 { ", " * format.pages * }
604 if$
605 }
606 if$
607 }
608
609 FUNCTION {format.in.ed.booktitle}
610 { booktitle empty$
611 { "" }
612 { editor empty$
613 { word.in booktitle emphasize * }
614 { word.in format.in.editors * ", " * booktitle emphasize * }
615 if$
616 }
617 if$
618 }
619
620 FUNCTION {format.thesis.type}
621 { type empty$
622 'skip$
623 { pop$
624 type "t" change.case$
625 }
626 if$
627 }
628
629 FUNCTION {format.tr.number}
630 { type empty$
631 { "Technical Report" }
632 'type
633 if$
634 number empty$
635 { "t" change.case$ }
636 { number tie.or.space.connect }
637 if$
638 }
639
640 FUNCTION {format.article.crossref}
641 {
642 word.in
643 "\cite{" * crossref * "}" *
644 }
645
646 FUNCTION {format.book.crossref}
647 { volume empty$
648 { "empty volume in " cite$ * "'s crossref of " * crossref * warning$
649 word.in
650 }
651 { "Volume" volume tie.or.space.connect
652 " of " *
653 }
654 if$
655 "\cite{" * crossref * "}" *
656 }
657
658 FUNCTION {format.incoll.inproc.crossref}
659 {
660 word.in
661 "\cite{" * crossref * "}" *
662 }
663
664 FUNCTION {article}
665 { output.bibitem
666 format.authors "author" output.check
667 author format.key output
668 format.date "year" output.check
669 new.block
670 format.title "title" output.check
671 new.block
672 crossref missing$
673 { journal emphasize "journal" output.check
674 format.vol.num.pages output
675 }
676 { format.article.crossref output.nonnull
677 format.pages output
678 }
679 if$
680 new.block
681 note output
682 fin.entry
683 }
684
685 FUNCTION {book}
686 { output.bibitem
687 author empty$
688 { format.editors "author and editor" output.check
689 editor format.key output
690 }
691 { format.authors output.nonnull
692 crossref missing$
693 { "author and editor" editor either.or.check }
694 'skip$
695 if$
696 }
697 if$
698 format.date "year" output.check
699 new.block
700 format.btitle "title" output.check
701 crossref missing$
702 { format.bvolume output
703 new.block
704 format.number.series output
705 new.sentence
706 publisher "publisher" output.check
707 address output
708 }
709 {
710 new.block
711 format.book.crossref output.nonnull
712 }
713 if$
714 format.edition output
715 new.block
716 note output
717 fin.entry
718 }
719
720 FUNCTION {booklet}
721 { output.bibitem
722 format.authors output
723 author format.key output
724 format.date "year" output.check
725 new.block
726 format.title "title" output.check
727 new.block
728 howpublished output
729 address output
730 new.block
731 note output
732 fin.entry
733 }
734
735 FUNCTION {inbook}
736 { output.bibitem
737 author empty$
738 { format.editors "author and editor" output.check
739 editor format.key output
740 }
741 { format.authors output.nonnull
742 crossref missing$
743 { "author and editor" editor either.or.check }
744 'skip$
745 if$
746 }
747 if$
748 format.date "year" output.check
749 new.block
750 format.btitle "title" output.check
751 crossref missing$
752 { format.bvolume output
753 format.chapter.pages "chapter and pages" output.check
754 new.block
755 format.number.series output
756 new.sentence
757 publisher "publisher" output.check
758 address output
759 }
760 { format.chapter.pages "chapter and pages" output.check
761 new.block
762 format.book.crossref output.nonnull
763 }
764 if$
765 format.edition output
766 new.block
767 note output
768 fin.entry
769 }
770
771 FUNCTION {incollection}
772 { output.bibitem
773 format.authors "author" output.check
774 author format.key output
775 format.date "year" output.check
776 new.block
777 format.title "title" output.check
778 new.block
779 crossref missing$
780 { format.in.ed.booktitle "booktitle" output.check
781 format.bvolume output
782 format.number.series output
783 format.chapter.pages output
784 new.sentence
785 publisher "publisher" output.check
786 address output
787 format.edition output
788 }
789 { format.incoll.inproc.crossref output.nonnull
790 format.chapter.pages output
791 }
792 if$
793 new.block
794 note output
795 fin.entry
796 }
797
798 FUNCTION {inproceedings}
799 { output.bibitem
800 format.authors "author" output.check
801 author format.key output
802 format.date "year" output.check
803 new.block
804 format.title "title" output.check
805 new.block
806 crossref missing$
807 { format.in.ed.booktitle "booktitle" output.check
808 format.bvolume output
809 format.number.series output
810 format.pages output
811 address output
812 new.sentence
813 organization output
814 publisher output
815 }
816 { format.incoll.inproc.crossref output.nonnull
817 format.pages output
818 }
819 if$
820 new.block
821 note output
822 fin.entry
823 }
824
825 FUNCTION {conference} { inproceedings }
826
827 FUNCTION {manual}
828 { output.bibitem
829 format.authors output
830 author format.key output
831 format.date "year" output.check
832 new.block
833 format.btitle "title" output.check
834 organization address new.block.checkb
835 organization output
836 address output
837 format.edition output
838 new.block
839 note output
840 fin.entry
841 }
842
843 FUNCTION {mastersthesis}
844 { output.bibitem
845 format.authors "author" output.check
846 author format.key output
847 format.date "year" output.check
848 new.block
849 format.btitle "title" output.check
850 new.block
851 "Master's thesis" format.thesis.type output.nonnull
852 school "school" output.check
853 address output
854 new.block
855 note output
856 fin.entry
857 }
858
859 FUNCTION {misc}
860 { output.bibitem
861 format.authors output
862 author format.key output
863 format.date "year" output.check
864 new.block
865 format.title output
866 new.block
867 howpublished output
868 new.block
869 note output
870 fin.entry
871 }
872
873 FUNCTION {phdthesis}
874 { output.bibitem
875 format.authors "author" output.check
876 author format.key output
877 format.date "year" output.check
878 new.block
879 format.btitle "title" output.check
880 new.block
881 "Ph.D. thesis" format.thesis.type output.nonnull
882 school "school" output.check
883 address output
884 new.block
885 note output
886 fin.entry
887 }
888
889 FUNCTION {proceedings}
890 { output.bibitem
891 format.editors output
892 editor format.key output
893 format.date "year" output.check
894 new.block
895 format.btitle "title" output.check
896 format.bvolume output
897 format.number.series output
898 address output
899 new.sentence
900 organization output
901 publisher output
902 new.block
903 note output
904 fin.entry
905 }
906
907 FUNCTION {techreport}
908 { output.bibitem
909 format.authors "author" output.check
910 author format.key output
911 format.date "year" output.check
912 new.block
913 format.title "title" output.check
914 new.block
915 format.tr.number output.nonnull
916 institution "institution" output.check
917 address output
918 new.block
919 note output
920 fin.entry
921 }
922
923 FUNCTION {unpublished}
924 { output.bibitem
925 format.authors "author" output.check
926 author format.key output
927 format.date "year" output.check
928 new.block
929 format.title "title" output.check
930 new.block
931 note "note" output.check
932 fin.entry
933 }
934
935 FUNCTION {default.type} { misc }
936
937 MACRO {jan} {"January"}
938
939 MACRO {feb} {"February"}
940
941 MACRO {mar} {"March"}
942
943 MACRO {apr} {"April"}
944
945 MACRO {may} {"May"}
946
947 MACRO {jun} {"June"}
948
949 MACRO {jul} {"July"}
950
951 MACRO {aug} {"August"}
952
953 MACRO {sep} {"September"}
954
955 MACRO {oct} {"October"}
956
957 MACRO {nov} {"November"}
958
959 MACRO {dec} {"December"}
960
961 MACRO {acmcs} {"ACM Computing Surveys"}
962
963 MACRO {acta} {"Acta Informatica"}
964
965 MACRO {cacm} {"Communications of the ACM"}
966
967 MACRO {ibmjrd} {"IBM Journal of Research and Development"}
968
969 MACRO {ibmsj} {"IBM Systems Journal"}
970
971 MACRO {ieeese} {"IEEE Transactions on Software Engineering"}
972
973 MACRO {ieeetc} {"IEEE Transactions on Computers"}
974
975 MACRO {ieeetcad}
976 {"IEEE Transactions on Computer-Aided Design of Integrated Circuits"}
977
978 MACRO {ipl} {"Information Processing Letters"}
979
980 MACRO {jacm} {"Journal of the ACM"}
981
982 MACRO {jcss} {"Journal of Computer and System Sciences"}
983
984 MACRO {scp} {"Science of Computer Programming"}
985
986 MACRO {sicomp} {"SIAM Journal on Computing"}
987
988 MACRO {tocs} {"ACM Transactions on Computer Systems"}
989
990 MACRO {tods} {"ACM Transactions on Database Systems"}
991
992 MACRO {tog} {"ACM Transactions on Graphics"}
993
994 MACRO {toms} {"ACM Transactions on Mathematical Software"}
995
996 MACRO {toois} {"ACM Transactions on Office Information Systems"}
997
998 MACRO {toplas} {"ACM Transactions on Programming Languages and Systems"}
999
1000 MACRO {tcs} {"Theoretical Computer Science"}
1001
1002 READ
1003
1004 FUNCTION {sortify}
1005 { purify$
1006 "l" change.case$
1007 }
1008
1009 INTEGERS { len }
1010
1011 FUNCTION {chop.word}
1012 { 's :=
1013 'len :=
1014 s #1 len substring$ =
1015 { s len #1 + global.max$ substring$ }
1016 's
1017 if$
1018 }
1019
1020 FUNCTION {format.lab.names}
1021 { 's :=
1022 s #1 "{vv~}{ll}" format.name$
1023 s num.names$ duplicate$
1024 #2 >
1025 { pop$ " " * "et~al." emphasize * }
1026 { #2 <
1027 'skip$
1028 { s #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" =
1029 { " " * "et~al." emphasize * }
1030 { " and " * s #2 "{vv~}{ll}" format.name$ * }
1031 if$
1032 }
1033 if$
1034 }
1035 if$
1036 }
1037
1038 FUNCTION {author.key.label}
1039 { author empty$
1040 { key empty$
1041 { cite$ #1 #3 substring$ }
1042 'key
1043 if$
1044 }
1045 { author format.lab.names }
1046 if$
1047 }
1048
1049 FUNCTION {author.editor.key.label}
1050 { author empty$
1051 { editor empty$
1052 { key empty$
1053 { cite$ #1 #3 substring$ }
1054 'key
1055 if$
1056 }
1057 { editor format.lab.names }
1058 if$
1059 }
1060 { author format.lab.names }
1061 if$
1062 }
1063
1064 FUNCTION {editor.key.label}
1065 { editor empty$
1066 { key empty$
1067 { cite$ #1 #3 substring$ }
1068 'key
1069 if$
1070 }
1071 { editor format.lab.names }
1072 if$
1073 }
1074
1075 FUNCTION {calc.label}
1076 { type$ "book" =
1077 type$ "inbook" =
1078 or
1079 'author.editor.key.label
1080 { type$ "proceedings" =
1081 'editor.key.label
1082 'author.key.label
1083 if$
1084 }
1085 if$
1086 "("
1087 *
1088 year duplicate$ empty$
1089 { pop$ "????" }
1090 { purify$ #-1 #4 substring$ }
1091 if$
1092 *
1093 'label :=
1094 }
1095
1096 FUNCTION {sort.format.names}
1097 { 's :=
1098 #1 'nameptr :=
1099 ""
1100 s num.names$ 'numnames :=
1101 numnames 'namesleft :=
1102 { namesleft #0 > }
1103 { nameptr #1 >
1104 { " " * }
1105 'skip$
1106 if$
1107 s nameptr
1108 "{vv{ } }{ll{ }}{ f{ }}{ jj{ }}"
1109 format.name$ 't :=
1110 nameptr numnames = t "others" = and
1111 { "et al" * }
1112 { numnames #2 > nameptr #2 = and
1113 { "zzzzzz" * #1 'namesleft := }
1114 { t sortify * }
1115 if$
1116 }
1117 if$
1118 nameptr #1 + 'nameptr :=
1119 namesleft #1 - 'namesleft :=
1120 }
1121 while$
1122 }
1123
1124 FUNCTION {sort.format.title}
1125 { 't :=
1126 "A " #2
1127 "An " #3
1128 "The " #4 t chop.word
1129 chop.word
1130 chop.word
1131 sortify
1132 #1 global.max$ substring$
1133 }
1134
1135 FUNCTION {author.sort}
1136 { author empty$
1137 { key empty$
1138 { "to sort, need author or key in " cite$ * warning$
1139 ""
1140 }
1141 { key sortify }
1142 if$
1143 }
1144 { author sort.format.names }
1145 if$
1146 }
1147
1148 FUNCTION {author.editor.sort}
1149 { author empty$
1150 { editor empty$
1151 { key empty$
1152 { "to sort, need author, editor, or key in " cite$ * warning$
1153 ""
1154 }
1155 { key sortify }
1156 if$
1157 }
1158 { editor sort.format.names }
1159 if$
1160 }
1161 { author sort.format.names }
1162 if$
1163 }
1164
1165 FUNCTION {editor.sort}
1166 { editor empty$
1167 { key empty$
1168 { "to sort, need editor or key in " cite$ * warning$
1169 ""
1170 }
1171 { key sortify }
1172 if$
1173 }
1174 { editor sort.format.names }
1175 if$
1176 }
1177
1178 FUNCTION {presort}
1179 { calc.label
1180 label sortify
1181 " "
1182 *
1183 type$ "book" =
1184 type$ "inbook" =
1185 or
1186 'author.editor.sort
1187 { type$ "proceedings" =
1188 'editor.sort
1189 'author.sort
1190 if$
1191 }
1192 if$
1193 #1 entry.max$ substring$
1194 'sort.label :=
1195 sort.label
1196 *
1197 " "
1198 *
1199 title field.or.null
1200 sort.format.title
1201 *
1202 #1 entry.max$ substring$
1203 'sort.key$ :=
1204 }
1205
1206 ITERATE {presort}
1207
1208 SORT
1209
1210 STRINGS { last.label next.extra }
1211
1212 INTEGERS { last.extra.num }
1213
1214 FUNCTION {initialize.extra.label.stuff}
1215 { #0 int.to.chr$ 'last.label :=
1216 "" 'next.extra :=
1217 #0 'last.extra.num :=
1218 }
1219
1220 FUNCTION {forward.pass}
1221 { last.label label =
1222 { last.extra.num #1 + 'last.extra.num :=
1223 last.extra.num int.to.chr$ 'extra.label :=
1224 }
1225 { "a" chr.to.int$ 'last.extra.num :=
1226 "" 'extra.label :=
1227 label 'last.label :=
1228 }
1229 if$
1230 }
1231
1232 FUNCTION {reverse.pass}
1233 { next.extra "b" =
1234 { "a" 'extra.label := }
1235 'skip$
1236 if$
1237 extra.label 'next.extra :=
1238 label extra.label * 'label :=
1239 }
1240
1241 EXECUTE {initialize.extra.label.stuff}
1242
1243 ITERATE {forward.pass}
1244
1245 REVERSE {reverse.pass}
1246
1247 FUNCTION {bib.sort.order}
1248 { sort.label
1249 " "
1250 *
1251 year field.or.null sortify
1252 *
1253 " "
1254 *
1255 title field.or.null
1256 sort.format.title
1257 *
1258 #1 entry.max$ substring$
1259 'sort.key$ :=
1260 }
1261
1262 ITERATE {bib.sort.order}
1263
1264 SORT
1265
1266 FUNCTION {begin.bib}
1267 { preamble$ empty$
1268 'skip$
1269 { preamble$ write$ newline$ }
1270 if$
1271 "\begin{thebibliography}{}" write$ newline$
1272 }
1273
1274 EXECUTE {begin.bib}
1275
1276 EXECUTE {init.state.consts}
1277
1278 ITERATE {call.type$}
1279
1280 FUNCTION {end.bib}
1281 { newline$
1282 "\end{thebibliography}" write$ newline$
1283 }
1284
1285 EXECUTE {end.bib}
1286 %% End of customized bst file
1287
0 %%
1 %% This is file `natbib.sty',
2 %% generated with the docstrip utility.
3 %%
4 %% The original source files were:
5 %%
6 %% natbib.dtx (with options: `package,all')
7 %% =============================================
8 %% IMPORTANT NOTICE:
9 %%
10 %% This program can be redistributed and/or modified under the terms
11 %% of the LaTeX Project Public License Distributed from CTAN
12 %% archives in directory macros/latex/base/lppl.txt; either
13 %% version 1 of the License, or any later version.
14 %%
15 %% This is a generated file.
16 %% It may not be distributed without the original source file natbib.dtx.
17 %%
18 %% Full documentation can be obtained by LaTeXing that original file.
19 %% Only a few abbreviated comments remain here to describe the usage.
20 %% =============================================
21 %% Copyright 1993-2000 Patrick W Daly
22 %% Max-Planck-Institut f\"ur Aeronomie
23 %% Max-Planck-Str. 2
24 %% D-37191 Katlenburg-Lindau
25 %% Germany
26 %% E-mail: daly@linmpi.mpg.de
27 \NeedsTeXFormat{LaTeX2e}[1995/06/01]
28 \ProvidesPackage{natbib}
29 [2000/07/24 7.0a (PWD)]
30 % This package reimplements the LaTeX \cite command to be used for various
31 % citation styles, both author-year and numerical. It accepts BibTeX
32 % output intended for many other packages, and therefore acts as a
33 % general, all-purpose citation-style interface.
34 %
35 % With standard numerical .bst files, only numerical citations are
36 % possible. With an author-year .bst file, both numerical and
37 % author-year citations are possible.
38 %
39 % If author-year citations are selected, \bibitem must have one of the
40 % following forms:
41 % \bibitem[Jones et al.(1990)]{key}...
42 % \bibitem[Jones et al.(1990)Jones, Baker, and Williams]{key}...
43 % \bibitem[Jones et al., 1990]{key}...
44 % \bibitem[\protect\citeauthoryear{Jones, Baker, and Williams}{Jones
45 % et al.}{1990}]{key}...
46 % \bibitem[\protect\citeauthoryear{Jones et al.}{1990}]{key}...
47 % \bibitem[\protect\astroncite{Jones et al.}{1990}]{key}...
48 % \bibitem[\protect\citename{Jones et al., }1990]{key}...
49 % \harvarditem[Jones et al.]{Jones, Baker, and Williams}{1990}{key}...
50 %
51 % This is either to be made up manually, or to be generated by an
52 % appropriate .bst file with BibTeX.
53 % Author-year mode || Numerical mode
54 % Then, \citet{key} ==>> Jones et al. (1990) || Jones et al. [21]
55 % \citep{key} ==>> (Jones et al., 1990) || [21]
56 % Multiple citations as normal:
57 % \citep{key1,key2} ==>> (Jones et al., 1990; Smith, 1989) || [21,24]
58 % or (Jones et al., 1990, 1991) || [21,24]
59 % or (Jones et al., 1990a,b) || [21,24]
60 % \cite{key} is the equivalent of \citet{key} in author-year mode
61 % and of \citep{key} in numerical mode
62 % Full author lists may be forced with \citet* or \citep*, e.g.
63 % \citep*{key} ==>> (Jones, Baker, and Williams, 1990)
64 % Optional notes as:
65 % \citep[chap. 2]{key} ==>> (Jones et al., 1990, chap. 2)
66 % \citep[e.g.,][]{key} ==>> (e.g., Jones et al., 1990)
67 % \citep[see][pg. 34]{key}==>> (see Jones et al., 1990, pg. 34)
68 % (Note: in standard LaTeX, only one note is allowed, after the ref.
69 % Here, one note is like the standard, two make pre- and post-notes.)
70 % \citealt{key} ==>> Jones et al. 1990
71 % \citealt*{key} ==>> Jones, Baker, and Williams 1990
72 % \citealp{key} ==>> Jones et al., 1990
73 % \citealp*{key} ==>> Jones, Baker, and Williams, 1990
74 % Additional citation possibilities (both author-year and numerical modes)
75 % \citeauthor{key} ==>> Jones et al.
76 % \citeauthor*{key} ==>> Jones, Baker, and Williams
77 % \citeyear{key} ==>> 1990
78 % \citeyearpar{key} ==>> (1990)
79 % \citetext{priv. comm.} ==>> (priv. comm.)
80 % Note: full author lists depends on whether the bib style supports them;
81 % if not, the abbreviated list is printed even when full requested.
82 %
83 % For names like della Robbia at the start of a sentence, use
84 % \Citet{dRob98} ==>> Della Robbia (1998)
85 % \Citep{dRob98} ==>> (Della Robbia, 1998)
86 % \Citeauthor{dRob98} ==>> Della Robbia
87 %
88 %
89 % Citation aliasing is achieved with
90 % \defcitealias{key}{text}
91 % \citetalias{key} ==>> text
92 % \citepalias{key} ==>> (text)
93 %
94 % Defining the citation style of a given bib style:
95 % Use \bibpunct (in the preamble only) with 6 mandatory arguments:
96 % 1. opening bracket for citation
97 % 2. closing bracket
98 % 3. citation separator (for multiple citations in one \cite)
99 % 4. the letter n for numerical styles, s for superscripts
100 % else anything for author-year
101 % 5. punctuation between authors and date
102 % 6. punctuation between years (or numbers) when common authors missing
103 % One optional argument is the character coming before post-notes. It
104 % appears in square braces before all other arguments. May be left off.
105 % Example (and default) \bibpunct[, ]{(}{)}{;}{a}{,}{,}
106 %
107 % To make this automatic for a given bib style, named newbib, say, make
108 % a local configuration file, natbib.cfg, with the definition
109 % \newcommand{\bibstyle@newbib}{\bibpunct...}
110 % Then the \bibliographystyle{newbib} will cause \bibstyle@newbib to
111 % be called on THE NEXT LATEX RUN (via the aux file).
112 %
113 % Such preprogrammed definitions may be invoked in the text (preamble only)
114 % by calling \citestyle{newbib}. This is only useful if the style specified
115 % differs from that in \bibliographystyle.
116 %
117 % With \citeindextrue and \citeindexfalse, one can control whether the
118 % \cite commands make an automatic entry of the citation in the .idx
119 % indexing file. For this, \makeindex must also be given in the preamble.
120 %
121 % LaTeX2e Options: (for selecting punctuation)
122 % round - round parentheses are used (default)
123 % square - square brackets are used [option]
124 % curly - curly braces are used {option}
125 % angle - angle brackets are used <option>
126 % colon - multiple citations separated by colon (default)
127 % comma - separated by comma
128 % authoryear - selects author-year citations (default)
129 % numbers- selects numerical citations
130 % super - numerical citations as superscripts
131 % sort - sorts multiple citations according to order in ref. list
132 % sort&compress - like sort, but also compresses numerical citations
133 % longnamesfirst - makes first citation full author list
134 % sectionbib - puts bibliography in a \section* instead of \chapter*
135 % Punctuation so selected dominates over any predefined ones.
136 % LaTeX2e options are called as, e.g.
137 % \usepackage[square,comma]{natbib}
138 % LaTeX the source file natbib.dtx to obtain more details
139 % or the file natnotes.tex for a brief reference sheet.
140 %-----------------------------------------------------------
141 \@ifclassloaded{aguplus}{\PackageError{natbib}
142 {The aguplus class already includes natbib coding,\MessageBreak
143 so you should not add it explicitly}
144 {Type <Return> for now, but then later remove\MessageBreak
145 the command \protect\usepackage{natbib} from the document}
146 \endinput}{}
147 \@ifclassloaded{nlinproc}{\PackageError{natbib}
148 {The nlinproc class already includes natbib coding,\MessageBreak
149 so you should not add it explicitly}
150 {Type <Return> for now, but then later remove\MessageBreak
151 the command \protect\usepackage{natbib} from the document}
152 \endinput}{}
153 \@ifclassloaded{egs}{\PackageError{natbib}
154 {The egs class already includes natbib coding,\MessageBreak
155 so you should not add it explicitly}
156 {Type <Return> for now, but then later remove\MessageBreak
157 the command \protect\usepackage{natbib} from the document}
158 \endinput}{}
159 % Define citation punctuation for some author-year styles
160 % One may add and delete at this point
161 % Or put additions into local configuration file natbib.cfg
162 \newcommand\bibstyle@chicago{\bibpunct{(}{)}{;}{a}{,}{,}}
163 \newcommand\bibstyle@named{\bibpunct{[}{]}{;}{a}{,}{,}}
164 \newcommand\bibstyle@agu{\bibpunct{[}{]}{;}{a}{,}{,~}}%Amer. Geophys. Union
165 \newcommand\bibstyle@egs{\bibpunct{(}{)}{;}{a}{,}{,}}%Eur. Geophys. Soc.
166 \newcommand\bibstyle@agsm{\bibpunct{(}{)}{,}{a}{}{,}\gdef\harvardand{\&}}
167 \newcommand\bibstyle@kluwer{\bibpunct{(}{)}{,}{a}{}{,}\gdef\harvardand{\&}}
168 \newcommand\bibstyle@dcu{\bibpunct{(}{)}{;}{a}{;}{,}\gdef\harvardand{and}}
169 \newcommand\bibstyle@aa{\bibpunct{(}{)}{;}{a}{}{,}} %Astronomy & Astrophysics
170 \newcommand\bibstyle@pass{\bibpunct{(}{)}{;}{a}{,}{,}}%Planet. & Space Sci
171 \newcommand\bibstyle@anngeo{\bibpunct{(}{)}{;}{a}{,}{,}}%Annales Geophysicae
172 \newcommand\bibstyle@nlinproc{\bibpunct{(}{)}{;}{a}{,}{,}}%Nonlin.Proc.Geophys.
173 % Define citation punctuation for some numerical styles
174 \newcommand\bibstyle@cospar{\bibpunct{/}{/}{,}{n}{}{}%
175 \gdef\NAT@biblabelnum##1{##1.}}
176 \newcommand\bibstyle@esa{\bibpunct{(Ref.~}{)}{,}{n}{}{}%
177 \gdef\NAT@biblabelnum##1{##1.\hspace{1em}}}
178 \newcommand\bibstyle@nature{\bibpunct{}{}{,}{s}{}{\textsuperscript{,}}%
179 \gdef\NAT@biblabelnum##1{##1.}}
180 % The standard LaTeX styles
181 \newcommand\bibstyle@plain{\bibpunct{[}{]}{,}{n}{}{,}}
182 \let\bibstyle@alpha=\bibstyle@plain
183 \let\bibstyle@abbrv=\bibstyle@plain
184 \let\bibstyle@unsrt=\bibstyle@plain
185 % The author-year modifications of the standard styles
186 \newcommand\bibstyle@plainnat{\bibpunct{[}{]}{,}{a}{,}{,}}
187 \let\bibstyle@abbrvnat=\bibstyle@plainnat
188 \let\bibstyle@unsrtnat=\bibstyle@plainnat
189 \newif\ifNAT@numbers \NAT@numbersfalse
190 \newif\ifNAT@super \NAT@superfalse
191 \DeclareOption{numbers}{\NAT@numberstrue
192 \ExecuteOptions{square,comma,nobibstyle}}
193 \DeclareOption{super}{\NAT@supertrue\NAT@numberstrue
194 \renewcommand\NAT@open{}\renewcommand\NAT@close{}
195 \ExecuteOptions{nobibstyle}}
196 \DeclareOption{authoryear}{\NAT@numbersfalse
197 \ExecuteOptions{round,colon,bibstyle}}
198 \DeclareOption{round}{%
199 \renewcommand\NAT@open{(} \renewcommand\NAT@close{)}
200 \ExecuteOptions{nobibstyle}}
201 \DeclareOption{square}{%
202 \renewcommand\NAT@open{[} \renewcommand\NAT@close{]}
203 \ExecuteOptions{nobibstyle}}
204 \DeclareOption{angle}{%
205 \renewcommand\NAT@open{$<$} \renewcommand\NAT@close{$>$}
206 \ExecuteOptions{nobibstyle}}
207 \DeclareOption{curly}{%
208 \renewcommand\NAT@open{\{} \renewcommand\NAT@close{\}}
209 \ExecuteOptions{nobibstyle}}
210 \DeclareOption{comma}{\renewcommand\NAT@sep{,}
211 \ExecuteOptions{nobibstyle}}
212 \DeclareOption{colon}{\renewcommand\NAT@sep{;}
213 \ExecuteOptions{nobibstyle}}
214 \DeclareOption{nobibstyle}{\let\bibstyle=\@gobble}
215 \DeclareOption{bibstyle}{\let\bibstyle=\@citestyle}
216 \newif\ifNAT@openbib \NAT@openbibfalse
217 \DeclareOption{openbib}{\NAT@openbibtrue}
218 \DeclareOption{sectionbib}{\def\NAT@sectionbib{on}}
219 \def\NAT@sort{0}
220 \DeclareOption{sort}{\def\NAT@sort{1}}
221 \DeclareOption{sort&compress}{\def\NAT@sort{2}}
222 \@ifpackageloaded{cite}{\PackageWarningNoLine{natbib}
223 {The `cite' package should not be used\MessageBreak
224 with natbib. Use option `sort' instead}\ExecuteOptions{sort}}{}
225 \newif\ifNAT@longnames\NAT@longnamesfalse
226 \DeclareOption{longnamesfirst}{\NAT@longnamestrue}
227 \DeclareOption{nonamebreak}{\def\NAT@nmfmt#1{\mbox{\NAT@up#1}}}
228 \def\NAT@nmfmt#1{{\NAT@up#1}}
229 \renewcommand\bibstyle[1]{\@ifundefined{bibstyle@#1}{\relax}
230 {\csname bibstyle@#1\endcsname}}
231 \AtBeginDocument{\global\let\bibstyle=\@gobble}
232 \let\@citestyle\bibstyle
233 \newcommand\citestyle[1]{\@citestyle{#1}\let\bibstyle\@gobble}
234 \@onlypreamble{\citestyle}\@onlypreamble{\@citestyle}
235 \newcommand\bibpunct[7][, ]%
236 {\gdef\NAT@open{#2}\gdef\NAT@close{#3}\gdef
237 \NAT@sep{#4}\global\NAT@numbersfalse\ifx #5n\global\NAT@numberstrue
238 \else
239 \ifx #5s\global\NAT@numberstrue\global\NAT@supertrue
240 \fi\fi
241 \gdef\NAT@aysep{#6}\gdef\NAT@yrsep{#7}%
242 \gdef\NAT@cmt{#1}%
243 \global\let\bibstyle\@gobble
244 }
245 \@onlypreamble{\bibpunct}
246 \newcommand\NAT@open{(} \newcommand\NAT@close{)}
247 \newcommand\NAT@sep{;}
248 \ProcessOptions
249 \newcommand\NAT@aysep{,} \newcommand\NAT@yrsep{,}
250 \newcommand\NAT@cmt{, }
251 \newcommand\NAT@cite%
252 [3]{\ifNAT@swa\NAT@@open\if*#2*\else#2\ \fi
253 #1\if*#3*\else\NAT@cmt#3\fi\NAT@@close\else#1\fi\endgroup}
254 \newcommand\NAT@citenum%
255 [3]{\ifNAT@swa\NAT@@open\if*#2*\else#2\ \fi
256 #1\if*#3*\else\NAT@cmt#3\fi\NAT@@close\else#1\fi\endgroup}
257 \newcommand\NAT@citesuper[3]{\ifNAT@swa
258 \unskip\hspace{1\p@}\textsuperscript{#1}%
259 \if*#3*\else\ (#3)\fi\else #1\fi\endgroup}
260 \providecommand
261 \textsuperscript[1]{\mbox{$^{\mbox{\scriptsize#1}}$}}
262 \providecommand\@firstofone[1]{#1}
263 \newcommand\NAT@citexnum{}
264 \def\NAT@citexnum[#1][#2]#3{%
265 \NAT@sort@cites{#3}%
266 \let\@citea\@empty
267 \@cite{\def\NAT@num{-1}\let\NAT@last@yr\relax\let\NAT@nm\@empty
268 \@for\@citeb:=\NAT@cite@list\do
269 {\edef\@citeb{\expandafter\@firstofone\@citeb}%
270 \if@filesw\immediate\write\@auxout{\string\citation{\@citeb}}\fi
271 \@ifundefined{b@\@citeb\@extra@b@citeb}{%
272 {\reset@font\bfseries?}
273 \NAT@citeundefined\PackageWarning{natbib}%
274 {Citation `\@citeb' on page \thepage \space undefined}}%
275 {\let\NAT@last@num\NAT@num\let\NAT@last@nm\NAT@nm
276 \NAT@parse{\@citeb}%
277 \ifNAT@longnames\@ifundefined{bv@\@citeb\@extra@b@citeb}{%
278 \let\NAT@name=\NAT@all@names
279 \global\@namedef{bv@\@citeb\@extra@b@citeb}{}}{}%
280 \fi
281 \ifNAT@full\let\NAT@nm\NAT@all@names\else
282 \let\NAT@nm\NAT@name\fi
283 \ifNAT@swa
284 \ifnum\NAT@ctype>1\relax\@citea
285 \hyper@natlinkstart{\@citeb\@extra@b@citeb}%
286 \ifnum\NAT@ctype=2\relax\NAT@test{\NAT@ctype}%
287 \else\NAT@alias
288 \fi\hyper@natlinkend\else
289 \ifnum\NAT@sort>1
290 \begingroup\catcode`\_=8
291 \ifcat _\ifnum\z@<0\NAT@num _\else A\fi
292 \global\let\NAT@nm=\NAT@num \else \gdef\NAT@nm{-2}\fi
293 \ifcat _\ifnum\z@<0\NAT@last@num _\else A\fi
294 \global\@tempcnta=\NAT@last@num \global\advance\@tempcnta by\@ne
295 \else \global\@tempcnta\m@ne\fi
296 \endgroup
297 \ifnum\NAT@nm=\@tempcnta
298 \ifx\NAT@last@yr\relax
299 \edef\NAT@last@yr{\@citea \mbox{\noexpand\citenumfont{\NAT@num}}}%
300 \else
301 \edef\NAT@last@yr{--\penalty\@m\mbox{\noexpand\citenumfont{\NAT@num}}}%
302 \fi
303 \else
304 \NAT@last@yr \@citea \mbox{\citenumfont{\NAT@num}}%
305 \let\NAT@last@yr\relax
306 \fi
307 \else
308 \@citea \mbox{\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
309 {\citenumfont{\NAT@num}}\hyper@natlinkend}%
310 \fi
311 \fi
312 \def\@citea{\NAT@sep\penalty\@m\NAT@space}%
313 \else
314 \ifcase\NAT@ctype\relax
315 \ifx\NAT@last@nm\NAT@nm \NAT@yrsep\penalty\@m\NAT@space\else
316 \@citea \NAT@test{1}\ \NAT@@open
317 \if*#1*\else#1\ \fi\fi \NAT@mbox{%
318 \hyper@natlinkstart{\@citeb\@extra@b@citeb}%
319 {\citenumfont{\NAT@num}}\hyper@natlinkend}%
320 \def\@citea{\NAT@@close\NAT@sep\penalty\@m\ }%
321 \or\@citea
322 \hyper@natlinkstart{\@citeb\@extra@b@citeb}%
323 \NAT@test{\NAT@ctype}\hyper@natlinkend
324 \def\@citea{\NAT@sep\penalty\@m\ }%
325 \or\@citea
326 \hyper@natlinkstart{\@citeb\@extra@b@citeb}%
327 \NAT@test{\NAT@ctype}\hyper@natlinkend
328 \def\@citea{\NAT@sep\penalty\@m\ }%
329 \or\@citea
330 \hyper@natlinkstart{\@citeb\@extra@b@citeb}%
331 \NAT@alias\hyper@natlinkend
332 \def\@citea{\NAT@sep\penalty\@m\ }%
333 \fi
334 \fi
335 }}%
336 \ifnum\NAT@sort>1\relax\NAT@last@yr\fi
337 \ifNAT@swa\else\ifnum\NAT@ctype=0\if*#2*\else
338 \NAT@cmt#2\fi \NAT@@close\fi\fi}{#1}{#2}}
339 \newcommand\NAT@test[1]{\ifnum#1=1 \ifx\NAT@nm\NAT@noname
340 {\reset@font\bfseries(author?)}\PackageWarning{natbib}
341 {Author undefined for citation`\@citeb'
342 \MessageBreak
343 on page \thepage}\else \NAT@nm \fi
344 \else \if\relax\NAT@date\relax
345 {\reset@font\bfseries(year?)}\PackageWarning{natbib}
346 {Year undefined for citation`\@citeb'
347 \MessageBreak
348 on page \thepage}\else \NAT@date \fi \fi}
349 \let\citenumfont=\relax
350 \newcommand\NAT@citex{}
351 \def\NAT@citex%
352 [#1][#2]#3{%
353 \NAT@sort@cites{#3}%
354 \let\@citea\@empty
355 \@cite{\let\NAT@nm\@empty\let\NAT@year\@empty
356 \@for\@citeb:=\NAT@cite@list\do
357 {\edef\@citeb{\expandafter\@firstofone\@citeb}%
358 \if@filesw\immediate\write\@auxout{\string\citation{\@citeb}}\fi
359 \@ifundefined{b@\@citeb\@extra@b@citeb}{\@citea%
360 {\reset@font\bfseries ?}\NAT@citeundefined
361 \PackageWarning{natbib}%
362 {Citation `\@citeb' on page \thepage \space undefined}\def\NAT@date{}}%
363 {\let\NAT@last@nm=\NAT@nm\let\NAT@last@yr=\NAT@year
364 \NAT@parse{\@citeb}%
365 \ifNAT@longnames\@ifundefined{bv@\@citeb\@extra@b@citeb}{%
366 \let\NAT@name=\NAT@all@names
367 \global\@namedef{bv@\@citeb\@extra@b@citeb}{}}{}%
368 \fi
369 \ifNAT@full\let\NAT@nm\NAT@all@names\else
370 \let\NAT@nm\NAT@name\fi
371 \ifNAT@swa\ifcase\NAT@ctype
372 \if\relax\NAT@date\relax
373 \@citea\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
374 \NAT@nmfmt{\NAT@nm}\NAT@date\hyper@natlinkend
375 \else
376 \ifx\NAT@last@nm\NAT@nm\NAT@yrsep
377 \ifx\NAT@last@yr\NAT@year
378 \hyper@natlinkstart{\@citeb\@extra@b@citeb}\NAT@exlab
379 \hyper@natlinkend
380 \else\unskip\
381 \hyper@natlinkstart{\@citeb\@extra@b@citeb}\NAT@date
382 \hyper@natlinkend
383 \fi
384 \else\@citea\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
385 \NAT@nmfmt{\NAT@nm}%
386 \hyper@natlinkbreak{\NAT@aysep\ }{\@citeb\@extra@b@citeb}%
387 \NAT@date\hyper@natlinkend
388 \fi
389 \fi
390 \or\@citea\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
391 \NAT@nmfmt{\NAT@nm}\hyper@natlinkend
392 \or\@citea\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
393 \NAT@date\hyper@natlinkend
394 \or\@citea\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
395 \NAT@alias\hyper@natlinkend
396 \fi \def\@citea{\NAT@sep\ }%
397 \else\ifcase\NAT@ctype
398 \if\relax\NAT@date\relax
399 \@citea\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
400 \NAT@nmfmt{\NAT@nm}\hyper@natlinkend
401 \else
402 \ifx\NAT@last@nm\NAT@nm\NAT@yrsep
403 \ifx\NAT@last@yr\NAT@year
404 \hyper@natlinkstart{\@citeb\@extra@b@citeb}\NAT@exlab
405 \hyper@natlinkend
406 \else\unskip\
407 \hyper@natlinkstart{\@citeb\@extra@b@citeb}\NAT@date
408 \hyper@natlinkend
409 \fi
410 \else\@citea\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
411 \NAT@nmfmt{\NAT@nm}%
412 \hyper@natlinkbreak{\ \NAT@@open\if*#1*\else#1\ \fi}%
413 {\@citeb\@extra@b@citeb}%
414 \NAT@date\hyper@natlinkend\fi
415 \fi
416 \or\@citea\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
417 \NAT@nmfmt{\NAT@nm}\hyper@natlinkend
418 \or\@citea\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
419 \NAT@date\hyper@natlinkend
420 \or\@citea\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
421 \NAT@alias\hyper@natlinkend
422 \fi \if\relax\NAT@date\relax\def\@citea{\NAT@sep\ }%
423 \else\def\@citea{\NAT@@close\NAT@sep\ }\fi
424 \fi
425 }}\ifNAT@swa\else\if*#2*\else\NAT@cmt#2\fi
426 \if\relax\NAT@date\relax\else\NAT@@close\fi\fi}{#1}{#2}}
427 \newif\ifNAT@par \NAT@partrue
428 \newcommand\NAT@@open{\ifNAT@par\NAT@open\fi}
429 \newcommand\NAT@@close{\ifNAT@par\NAT@close\fi}
430 \newcommand\NAT@alias{\@ifundefined{al@\@citeb\@extra@b@citeb}{%
431 {\reset@font\bfseries(alias?)}\PackageWarning{natbib}
432 {Alias undefined for citation `\@citeb'
433 \MessageBreak on page \thepage}}{\@nameuse{al@\@citeb\@extra@b@citeb}}}
434 \let\NAT@up\relax
435 \newcommand\NAT@Up[1]{{\let\protect\@unexpandable@protect\let~\relax
436 \expandafter\NAT@deftemp#1}\expandafter\NAT@UP\NAT@temp}
437 \newcommand\NAT@deftemp[1]{\xdef\NAT@temp{#1}}
438 \newcommand\NAT@UP[1]{\let\@tempa\NAT@UP\ifcat a#1\MakeUppercase{#1}%
439 \let\@tempa\relax\else#1\fi\@tempa}
440 \newcommand\shortcites[1]{%
441 \@bsphack\@for\@citeb:=#1\do
442 {\edef\@citeb{\expandafter\@firstofone\@citeb}%
443 \global\@namedef{bv@\@citeb\@extra@b@citeb}{}}\@esphack}
444 \newcommand\NAT@biblabel[1]{\hfill}
445 \newcommand\NAT@biblabelnum[1]{\bibnumfmt{#1}}
446 \newcommand\bibnumfmt[1]{[#1]}
447 \def\@tempa#1{[#1]}
448 \ifx\@tempa\@biblabel\let\@biblabel\@empty\fi
449 \newcommand\NAT@bibsetnum[1]{\settowidth\labelwidth{\@biblabel{#1}}%
450 \setlength{\leftmargin}{\labelwidth}\addtolength{\leftmargin}{\labelsep}%
451 \setlength{\itemsep}{\bibsep}\setlength{\parsep}{\z@}%
452 \ifNAT@openbib
453 \addtolength{\leftmargin}{4mm}%
454 \setlength{\itemindent}{-4mm}%
455 \setlength{\listparindent}{\itemindent}%
456 \setlength{\parsep}{0pt}%
457 \fi
458 }
459 \newlength{\bibhang}
460 \setlength{\bibhang}{1em}
461 \newlength{\bibsep}
462 {\@listi \global\bibsep\itemsep \global\advance\bibsep by\parsep}
463
464 \newcommand\NAT@bibsetup%
465 [1]{\setlength{\leftmargin}{\bibhang}\setlength{\itemindent}{-\leftmargin}%
466 \setlength{\itemsep}{\bibsep}\setlength{\parsep}{\z@}}
467 \newcommand\NAT@set@cites{\ifNAT@numbers
468 \ifNAT@super \let\@cite\NAT@citesuper
469 \def\NAT@mbox##1{\unskip\nobreak\hspace{1\p@}\textsuperscript{##1}}%
470 \let\citeyearpar=\citeyear
471 \let\NAT@space\relax\else
472 \let\NAT@mbox=\mbox
473 \let\@cite\NAT@citenum \def\NAT@space{ }\fi
474 \let\@citex\NAT@citexnum
475 \ifx\@biblabel\@empty\let\@biblabel\NAT@biblabelnum\fi
476 \let\@bibsetup\NAT@bibsetnum
477 \def\natexlab##1{}%
478 \else
479 \let\@cite\NAT@cite
480 \let\@citex\NAT@citex
481 \let\@biblabel\NAT@biblabel
482 \let\@bibsetup\NAT@bibsetup
483 \def\natexlab##1{##1}%
484 \fi}
485 \AtBeginDocument{\NAT@set@cites}
486 \AtBeginDocument{\ifx\SK@def\@undefined\else
487 \ifx\SK@cite\@empty\else
488 \SK@def\@citex[#1][#2]#3{\SK@\SK@@ref{#3}\SK@@citex[#1][#2]{#3}}\fi
489 \ifx\SK@citeauthor\@undefined\def\HAR@checkdef{}\else
490 \let\citeauthor\SK@citeauthor
491 \let\citefullauthor\SK@citefullauthor
492 \let\citeyear\SK@citeyear\fi
493 \fi}
494 \AtBeginDocument{\@ifpackageloaded{hyperref}{%
495 \ifnum\NAT@sort=2\def\NAT@sort{1}\fi}{}}
496 \newif\ifNAT@full\NAT@fullfalse
497 \newif\ifNAT@swa
498 \DeclareRobustCommand\citet
499 {\begingroup\NAT@swafalse\def\NAT@ctype{0}\NAT@partrue
500 \@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
501 \newcommand\NAT@citetp{\@ifnextchar[{\NAT@@citetp}{\NAT@@citetp[]}}
502 \newcommand\NAT@@citetp{}
503 \def\NAT@@citetp[#1]{\@ifnextchar[{\@citex[#1]}{\@citex[][#1]}}
504 \DeclareRobustCommand\citep
505 {\begingroup\NAT@swatrue\def\NAT@ctype{0}\NAT@partrue
506 \@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
507 \DeclareRobustCommand\cite
508 {\begingroup\def\NAT@ctype{0}\NAT@partrue\NAT@swatrue
509 \@ifstar{\NAT@fulltrue\NAT@cites}{\NAT@fullfalse\NAT@cites}}
510 \newcommand\NAT@cites{\@ifnextchar [{\NAT@@citetp}{%
511 \ifNAT@numbers\else
512 \NAT@swafalse
513 \fi
514 \NAT@@citetp[]}}
515 \DeclareRobustCommand\citealt
516 {\begingroup\NAT@swafalse\def\NAT@ctype{0}\NAT@parfalse
517 \@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
518 \DeclareRobustCommand\citealp
519 {\begingroup\NAT@swatrue\def\NAT@ctype{0}\NAT@parfalse
520 \@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
521 \DeclareRobustCommand\citeauthor
522 {\begingroup\NAT@swafalse\def\NAT@ctype{1}\NAT@parfalse
523 \@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
524 \DeclareRobustCommand\Citet
525 {\begingroup\NAT@swafalse\def\NAT@ctype{0}\NAT@partrue
526 \let\NAT@up\NAT@Up
527 \@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
528 \DeclareRobustCommand\Citep
529 {\begingroup\NAT@swatrue\def\NAT@ctype{0}\NAT@partrue
530 \let\NAT@up\NAT@Up
531 \@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
532 \DeclareRobustCommand\Citealt
533 {\begingroup\NAT@swafalse\def\NAT@ctype{0}\NAT@parfalse
534 \let\NAT@up\NAT@Up
535 \@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
536 \DeclareRobustCommand\Citealp
537 {\begingroup\NAT@swatrue\def\NAT@ctype{0}\NAT@parfalse
538 \let\NAT@up\NAT@Up
539 \@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
540 \DeclareRobustCommand\Citeauthor
541 {\begingroup\NAT@swafalse\def\NAT@ctype{1}\NAT@parfalse
542 \let\NAT@up\NAT@Up
543 \@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
544 \DeclareRobustCommand\citeyear
545 {\begingroup\NAT@swafalse\def\NAT@ctype{2}\NAT@parfalse\NAT@citetp}
546 \DeclareRobustCommand\citeyearpar
547 {\begingroup\NAT@swatrue\def\NAT@ctype{2}\NAT@partrue\NAT@citetp}
548 \newcommand\citetext[1]{\NAT@open#1\NAT@close}
549 \DeclareRobustCommand\citefullauthor
550 {\citeauthor*}
551 \newcommand\defcitealias[2]{%
552 \@ifundefined{al@#1\@extra@b@citeb}{}
553 {\PackageWarning{natbib}{Overwriting existing alias for citation #1}}
554 \@namedef{al@#1\@extra@b@citeb}{#2}}
555 \DeclareRobustCommand\citetalias{\begingroup
556 \NAT@swafalse\def\NAT@ctype{3}\NAT@parfalse\NAT@citetp}
557 \DeclareRobustCommand\citepalias{\begingroup
558 \NAT@swatrue\def\NAT@ctype{3}\NAT@partrue\NAT@citetp}
559 \renewcommand\nocite[1]{\@bsphack
560 \@for\@citeb:=#1\do{%
561 \edef\@citeb{\expandafter\@firstofone\@citeb}%
562 \if@filesw\immediate\write\@auxout{\string\citation{\@citeb}}\fi
563 \if*\@citeb\else
564 \@ifundefined{b@\@citeb\@extra@b@citeb}{%
565 \NAT@citeundefined \PackageWarning{natbib}%
566 {Citation `\@citeb' undefined}}{}\fi}%
567 \@esphack}
568 \newcommand\NAT@parse[1]{{%
569 \let\protect=\@unexpandable@protect\let~\relax
570 \let\active@prefix=\@gobble
571 \xdef\NAT@temp{\csname b@#1\@extra@b@citeb\endcsname}}%
572 \expandafter\NAT@split\NAT@temp
573 \expandafter\NAT@parse@date\NAT@date??????@@%
574 \ifciteindex\NAT@index\fi
575 }
576 \newcommand\NAT@split[4]{%
577 \gdef\NAT@num{#1}\gdef\NAT@name{#3}\gdef\NAT@date{#2}%
578 \gdef\NAT@all@names{#4}%
579 \ifx\NAT@noname\NAT@all@names \gdef\NAT@all@names{#3}\fi}
580 \newcommand\NAT@parse@date{}
581 \def\NAT@parse@date#1#2#3#4#5#6@@{%
582 \ifnum\the\catcode`#1=11\def\NAT@year{}\def\NAT@exlab{#1}\else
583 \ifnum\the\catcode`#2=11\def\NAT@year{#1}\def\NAT@exlab{#2}\else
584 \ifnum\the\catcode`#3=11\def\NAT@year{#1#2}\def\NAT@exlab{#3}\else
585 \ifnum\the\catcode`#4=11\def\NAT@year{#1#2#3}\def\NAT@exlab{#4}\else
586 \def\NAT@year{#1#2#3#4}\def\NAT@exlab{{#5}}\fi\fi\fi\fi}
587 \newcommand\NAT@index{}
588 \let\NAT@makeindex=\makeindex
589 \renewcommand\makeindex{\NAT@makeindex
590 \renewcommand\NAT@index{\@bsphack\begingroup
591 \def~{\string~}\@wrindex{\NAT@idxtxt}}}
592 \newcommand\NAT@idxtxt{\NAT@name\ \NAT@open\NAT@date\NAT@close}
593 \@ifundefined{@indexfile}{}{\let\NAT@makeindex\relax\makeindex}
594 \newif\ifciteindex \citeindexfalse
595 \newcommand\citeindextype{default}
596 \newcommand\NAT@index@alt{{\let\protect=\noexpand\let~\relax
597 \xdef\NAT@temp{\NAT@idxtxt}}\expandafter\NAT@exp\NAT@temp\@nil}
598 \newcommand\NAT@exp{}
599 \def\NAT@exp#1\@nil{\mbox{}\index[\citeindextype]{#1}}
600
601 \AtBeginDocument{%
602 \@ifpackageloaded{index}{\let\NAT@index=\NAT@index@alt}{}}
603 \newcommand\NAT@ifcmd{\futurelet\NAT@temp\NAT@ifxcmd}
604 \newcommand\NAT@ifxcmd{\ifx\NAT@temp\relax\else\expandafter\NAT@bare\fi}
605 \def\NAT@bare#1(#2)#3(@)#4\@nil#5{%
606 \if @#2
607 \expandafter\NAT@apalk#1, , \@nil{#5}\else
608 \stepcounter{NAT@ctr}%
609 \NAT@wrout{\arabic {NAT@ctr}}{#2}{#1}{#3}{#5}
610 \fi
611 }
612 \newcommand\NAT@wrout[5]{%
613 \if@filesw
614 {\let\protect\noexpand\let~\relax
615 \immediate
616 \write\@auxout{\string\bibcite{#5}{{#1}{#2}{{#3}}{{#4}}}}}\fi
617 \ignorespaces}
618 \def\NAT@noname{{}}
619 \renewcommand\bibitem{%
620 \@ifnextchar[{\@lbibitem}{%
621 \global\NAT@stdbsttrue
622 \stepcounter{NAT@ctr}\@lbibitem[\arabic{NAT@ctr}]}}
623 \def\@lbibitem[#1]#2{%
624 \if\relax\@extra@b@citeb\relax\else
625 \@ifundefined{br@#2\@extra@b@citeb}{}{%
626 \@namedef{br@#2}{\@nameuse{br@#2\@extra@b@citeb}}}\fi
627 \@ifundefined{b@#2\@extra@b@citeb}{\def\NAT@num{}}{\NAT@parse{#2}}%
628 \item[\hfil\hyper@natanchorstart{#2\@extra@b@citeb}\@biblabel{\NAT@num}%
629 \hyper@natanchorend]%
630 \NAT@ifcmd#1(@)(@)\@nil{#2}}
631 \ifx\SK@lbibitem\@undefined\else
632 \let\SK@lbibitem\@lbibitem
633 \def\@lbibitem[#1]#2{%
634 \SK@lbibitem[#1]{#2}\SK@\SK@@label{#2}\ignorespaces}\fi
635 \newif\ifNAT@stdbst \NAT@stdbstfalse
636
637 \AtEndDocument
638 {\ifNAT@stdbst\if@filesw\immediate\write\@auxout{\string
639 \global\string\NAT@numberstrue}\fi\fi
640 }
641 \providecommand\bibcite{}
642 \renewcommand\bibcite[2]{\@ifundefined{b@#1\@extra@binfo}\relax
643 {\NAT@citemultiple
644 \PackageWarningNoLine{natbib}{Citation `#1' multiply defined}}%
645 \global\@namedef{b@#1\@extra@binfo}{#2}}
646 \AtEndDocument{\NAT@swatrue\let\bibcite\NAT@testdef}
647 \newcommand\NAT@testdef[2]{%
648 \def\NAT@temp{#2}\expandafter \ifx \csname b@#1\@extra@binfo\endcsname
649 \NAT@temp \else \ifNAT@swa \NAT@swafalse
650 \PackageWarningNoLine{natbib}{Citation(s) may have
651 changed.\MessageBreak
652 Rerun to get citations correct}\fi\fi}
653 \newcommand\NAT@apalk{}
654 \def\NAT@apalk#1, #2, #3\@nil#4{\if\relax#2\relax
655 \global\NAT@stdbsttrue
656 \NAT@wrout{#1}{}{}{}{#4}\else
657 \stepcounter{NAT@ctr}%
658 \NAT@wrout{\arabic {NAT@ctr}}{#2}{#1}{}{#4}\fi}
659 \newcommand\citeauthoryear{}
660 \def\citeauthoryear#1#2#3(@)(@)\@nil#4{\stepcounter{NAT@ctr}\if\relax#3\relax
661 \NAT@wrout{\arabic {NAT@ctr}}{#2}{#1}{}{#4}\else
662 \NAT@wrout{\arabic {NAT@ctr}}{#3}{#2}{#1}{#4}\fi}
663 \newcommand\citestarts{\NAT@open}
664 \newcommand\citeends{\NAT@close}
665 \newcommand\betweenauthors{and}
666 \newcommand\astroncite{}
667 \def\astroncite#1#2(@)(@)\@nil#3{\stepcounter{NAT@ctr}\NAT@wrout{\arabic
668 {NAT@ctr}}{#2}{#1}{}{#3}}
669 \newcommand\citename{}
670 \def\citename#1#2(@)(@)\@nil#3{\expandafter\NAT@apalk#1#2, \@nil{#3}}
671 \newcommand\harvarditem[4][]%
672 {\if\relax#1\relax\bibitem[#2(#3)]{#4}\else
673 \bibitem[#1(#3)#2]{#4}\fi }
674 \newcommand\harvardleft{\NAT@open}
675 \newcommand\harvardright{\NAT@close}
676 \newcommand\harvardyearleft{\NAT@open}
677 \newcommand\harvardyearright{\NAT@close}
678 \AtBeginDocument{\providecommand{\harvardand}{and}}
679 \newcommand\harvardurl[1]{\textbf{URL:} \textit{#1}}
680 \providecommand\bibsection{}
681 \@ifundefined{chapter}%
682 {\renewcommand\bibsection{\section*{\refname
683 \@mkboth{\MakeUppercase{\refname}}{\MakeUppercase{\refname}}}}}
684 {\@ifundefined{NAT@sectionbib}%
685 {\renewcommand\bibsection{\chapter*{\bibname
686 \@mkboth{\MakeUppercase{\bibname}}{\MakeUppercase{\bibname}}}}}
687 {\renewcommand\bibsection{\section*{\bibname
688 \ifx\@mkboth\@gobbletwo\else\markright{\MakeUppercase{\bibname}}\fi}}}}
689 \@ifclassloaded{amsart}%
690 {\renewcommand\bibsection{\section*{\refname}}}{}
691 \@ifclassloaded{amsbook}%
692 {\renewcommand\bibsection{\chapter*{\bibname}}}{}
693 \@ifundefined{bib@heading}{}{\let\bibsection\bib@heading}
694 \newcounter{NAT@ctr}
695 \renewenvironment{thebibliography}[1]{%
696 \bibsection
697 \vspace{1\p@}\parindent \z@\bibpreamble\bibfont\list
698 {\@biblabel{\arabic{NAT@ctr}}}{\@bibsetup{#1}%
699 \setcounter{NAT@ctr}{0}}%
700 \ifNAT@openbib
701 \renewcommand\newblock{\par}
702 \else
703 \renewcommand\newblock{\hskip .11em \@plus.33em \@minus.07em}%
704 \fi
705 \sloppy\clubpenalty4000\widowpenalty4000
706 \sfcode`\.=1000\relax
707 \let\citeN\cite \let\shortcite\cite
708 \let\citeasnoun\cite\fontsize{7}{9}\selectfont
709 }{\def\@noitemerr{%
710 \PackageWarning{natbib}
711 {Empty `thebibliography' environment}}%
712 \endlist\vskip-\lastskip}
713 \let\bibfont\relax
714 \let\bibpreamble\relax
715 \providecommand\reset@font{\relax}
716 \providecommand\bibname{Bibliography}
717 \providecommand\refname{References}
718 \newcommand\NAT@citeundefined{\gdef \NAT@undefined {%
719 \PackageWarningNoLine{natbib}{There were undefined citations}}}
720 \let \NAT@undefined \relax
721 \newcommand\NAT@citemultiple{\gdef \NAT@multiple {%
722 \PackageWarningNoLine{natbib}{There were multiply defined citations}}}
723 \let \NAT@multiple \relax
724 \AtEndDocument{\NAT@undefined\NAT@multiple}
725 \providecommand\@mkboth[2]{}
726 \providecommand\MakeUppercase{\uppercase}
727 \providecommand{\@extra@b@citeb}{}
728 \gdef\@extra@binfo{}
729 \providecommand\hyper@natanchorstart[1]{}
730 \providecommand\hyper@natanchorend{}
731 \providecommand\hyper@natlinkstart[1]{}
732 \providecommand\hyper@natlinkend{}
733 \providecommand\hyper@natlinkbreak[2]{#1}
734 \@ifundefined{bbl@redefine}{}{%
735 \bbl@redefine\nocite#1{%
736 \@safe@activestrue\org@nocite{#1}\@safe@activesfalse}%
737 \bbl@redefine\@lbibitem[#1]#2{%
738 \@safe@activestrue\org@@lbibitem[#1]{#2}\@safe@activesfalse}%
739 }
740 \AtBeginDocument{\@ifundefined{bbl@redefine}{}{%
741 \bbl@redefine\@citex[#1][#2]#3{%
742 \@safe@activestrue\org@@citex[#1][#2]{#3}\@safe@activesfalse}%
743 \bbl@redefine\NAT@testdef#1#2{%
744 \@safe@activestrue\org@NAT@testdef{#1}{#2}\@safe@activesfalse}%
745 \@ifundefined{org@@lbibitem}{%
746 \bbl@redefine\@lbibitem[#1]#2{%
747 \@safe@activestrue\org@@lbibitem[#1]{#2}\@safe@activesfalse}}{}%
748 }}
749 \ifnum\NAT@sort>0
750 \newcommand\NAT@sort@cites[1]{%
751 \@tempcntb\m@ne
752 \let\@celt\delimiter
753 \def\NAT@num@list{}%
754 \def\NAT@cite@list{}%
755 \def\NAT@nonsort@list{}%
756 \@for \@citeb:=#1\do{\NAT@make@cite@list}%
757 \edef\NAT@cite@list{\NAT@cite@list\NAT@nonsort@list}%
758 \edef\NAT@cite@list{\expandafter\NAT@xcom\NAT@cite@list @@}}
759 \begingroup \catcode`\_=8
760 \gdef\NAT@make@cite@list{%
761 \edef\@citeb{\expandafter\@firstofone\@citeb}%
762 \@ifundefined{b@\@citeb\@extra@b@citeb}{\def\NAT@num{A}}%
763 {\NAT@parse{\@citeb}}%
764 \ifcat _\ifnum\z@<0\NAT@num _\else A\fi
765 \@tempcnta\NAT@num \relax
766 \ifnum \@tempcnta>\@tempcntb
767 \edef\NAT@num@list{\NAT@num@list \@celt{\NAT@num}}%
768 \edef\NAT@cite@list{\NAT@cite@list\@citeb,}%
769 \@tempcntb\@tempcnta
770 \else
771 \let\NAT@@cite@list=\NAT@cite@list \def\NAT@cite@list{}%
772 \edef\NAT@num@list{\expandafter\NAT@num@celt \NAT@num@list \@gobble @}%
773 {\let\@celt=\NAT@celt\NAT@num@list}%
774 \fi
775 \else
776 \edef\NAT@nonsort@list{\NAT@nonsort@list\@citeb,}%
777 \fi}
778 \endgroup
779 \def\NAT@celt#1{\ifnum #1<\@tempcnta
780 \xdef\NAT@cite@list{\NAT@cite@list\expandafter\NAT@nextc\NAT@@cite@list @@}%
781 \xdef\NAT@@cite@list{\expandafter\NAT@restc\NAT@@cite@list}%
782 \else
783 \xdef\NAT@cite@list{\NAT@cite@list\@citeb,\NAT@@cite@list}\let\@celt\@gobble%
784 \fi}
785 \def\NAT@num@celt#1#2{\ifx \@celt #1%
786 \ifnum #2<\@tempcnta
787 \@celt{#2}%
788 \expandafter\expandafter\expandafter\NAT@num@celt
789 \else
790 \@celt{\number\@tempcnta}\@celt{#2}%
791 \fi\fi}
792 \def\NAT@nextc#1,#2@@{#1,}
793 \def\NAT@restc#1,#2{#2}
794 \def\NAT@xcom#1,@@{#1}
795 \else
796 \newcommand\NAT@sort@cites[1]{\edef\NAT@cite@list{#1}}\fi
797 \InputIfFileExists{natbib.cfg}
798 {\typeout{Local config file natbib.cfg used}}{}
799 %%
800 %% <<<<< End of generated file <<<<<<
801 %%
802 %% End of file `natbib.sty'.
0 var getopt = function(args, ostr) {
1 var oli; // option letter list index
2 if (typeof(getopt.place) == 'undefined')
3 getopt.ind = 0, getopt.arg = null, getopt.place = -1;
4 if (getopt.place == -1) { // update scanning pointer
5 if (getopt.ind >= args.length || args[getopt.ind].charAt(getopt.place = 0) != '-') {
6 getopt.place = -1;
7 return null;
8 }
9 if (getopt.place + 1 < args[getopt.ind].length && args[getopt.ind].charAt(++getopt.place) == '-') { // found "--"
10 ++getopt.ind;
11 getopt.place = -1;
12 return null;
13 }
14 }
15 var optopt = args[getopt.ind].charAt(getopt.place++); // character checked for validity
16 if (optopt == ':' || (oli = ostr.indexOf(optopt)) < 0) {
17 if (optopt == '-') return null; // if the user didn't specify '-' as an option, assume it means null.
18 if (getopt.place < 0) ++getopt.ind;
19 return '?';
20 }
21 if (oli+1 >= ostr.length || ostr.charAt(++oli) != ':') { // don't need argument
22 getopt.arg = null;
23 if (getopt.place < 0 || getopt.place >= args[getopt.ind].length) ++getopt.ind, getopt.place = -1;
24 } else { // need an argument
25 if (getopt.place >= 0 && getopt.place < args[getopt.ind].length)
26 getopt.arg = args[getopt.ind].substr(getopt.place);
27 else if (args.length <= ++getopt.ind) { // no arg
28 getopt.place = -1;
29 if (ostr.length > 0 && ostr.charAt(0) == ':') return ':';
30 return '?';
31 } else getopt.arg = args[getopt.ind]; // white space
32 getopt.place = -1;
33 ++getopt.ind;
34 }
35 return optopt;
36 }
37
38 var c, ws = 5, min_span = 2000;
39
40 while ((c = getopt(arguments, "w:s:")) != null)
41 if (c == 'w') ws = parseInt(getopt.arg);
42 else if (c == 's') min_span = parseInt(getopt.arg);
43
44 if (arguments.length - getopt.ind < 2) {
45 print("Usage: k8 cmp_order.js <gfa2bed.bed> <paftop>");
46 exit(1);
47 }
48
49 var b = new Bytes();
50
51 var bed = [], h = {}, end = {}, last_u = null, last_r = null, to_end = 0;
52 var f = new File(arguments[getopt.ind]);
53 while (f.readline(b) >= 0) {
54 var t = b.toString().split("\t");
55 var r = t[0] + ":" + (parseInt(t[1]) + 1) + "-" + t[2];
56 h[r] = bed.length;
57 if (to_end > 0) end[r] = 1, --to_end;
58 if (last_u == null || t[3] != last_u) {
59 end[r] = 1, to_end = ws - 1;
60 if (last_r != null) {
61 end[last_r] = 1;
62 for (var j = bed.length - 1; j >= 0 && j >= bed.length - ws; --j)
63 end[bed[j][2]] = 1;
64 }
65 }
66 var center = Math.floor(parseInt(t[5]) + (parseInt(t[2]) - parseInt(t[1])) / 2);
67 bed.push([t[3], t[4], r, center]);
68 last_r = r; last_u = t[3];
69 }
70 end[last_r] = 1;
71 for (var j = bed.length - 1; j >= 0 && j >= bed.length - ws; --j)
72 end[bed[j][2]] = 1;
73 f.close();
74
75 var paf = [];
76 f = new File(arguments[getopt.ind+1]);
77 while (f.readline(b) >= 0) {
78 var t = b.toString().split("\t");
79 if (parseInt(t[3]) - parseInt(t[2]) < min_span) continue; // a tiny hit
80 if (paf.length && t[0] == paf[paf.length - 1][0]) continue; // dup
81 var center;
82 t[1] = parseInt(t[1]);
83 t[2] = parseInt(t[2]); t[3] = parseInt(t[3]);
84 t[8] = parseInt(t[8]); t[9] = parseInt(t[9]);
85 if (t[4] == '+') {
86 center = Math.floor(((t[7] - t[2]) + (t[8] + (t[1] - t[3]))) / 2);
87 } else {
88 center = Math.floor(((t[7] - (t[1] - t[3])) + (t[8] + t[2])) / 2);
89 }
90 paf.push([t[0], t[5], t[4], parseInt(t[7]), center]);
91 }
92 f.close();
93
94 paf.sort(function(x,y){return x[1]<y[1]?-1:x[1]>y[1]?1:x[3]-y[3]});
95
96 var chr_se = {}, start = 0;
97 for (var i = 1; i <= paf.length; ++i) {
98 if (i == paf.length || paf[i][1] != paf[i-1][1]) {
99 chr_se[paf[i-1][1]] = [start, i];
100 start = i;
101 }
102 }
103
104 var cnt = 0;
105 for (var k in chr_se) {
106 var st = chr_se[k][0], en = chr_se[k][1];
107 for (var i = st + ws + 1; i < en - ws - 1; ++i) {
108 var j;
109 for (j = i - 1; j >= 0; --j)
110 if (paf[i][0] != paf[j][0])
111 break;
112 if (j < 0) continue; // the first read has multiple mappings
113 if (paf[i][1] != paf[j][1]) continue; // different reference chr
114 var hi = h[paf[i][0]], hj = h[paf[j][0]];
115 var paf_diff = paf[i][4] - paf[j][4];
116 var bed_diff = bed[hi][0] == bed[hj][0]? Math.abs(bed[hi][3] - bed[hj][3]) : '*';
117 if (hi - hj > ws || hj - hi > ws || bed[hi][0] != bed[hj][0]) {
118 if (end[paf[i][0]] != null && end[paf[j][0]] != null) continue;
119 if (bed_diff != '*' && Math.abs(paf_diff - bed_diff) < min_span) continue;
120 print("E", paf[j][1], bed[hi][0] != bed[hj][0]? '*' : hi-hj, paf_diff, bed_diff, bed[hj][0], bed[hi][0], paf[j][0], paf[i][0]);
121 ++cnt;
122 }
123 }
124 }
125
126 print("C", cnt);
127
128 b.destroy();
Binary diff not shown
0 if (arguments.length != 2) {
1 print("Usage: k8 paf_srtcmp.js <bwamem.srt.paf> <minimap.srt.paf>");
2 exit(1);
3 }
4
5 function read1(f, buf, last)
6 {
7 var a = [], l = null;
8 if (last != null) a.push(last);
9 while (f.readline(buf) >= 0) {
10 var t = buf.toString().split("\t");
11 for (var j = 1; j <= 3; ++j) t[j] = parseInt(t[j]);
12 for (var j = 6; j <= 11; ++j) t[j] = parseInt(t[j]);
13 if (last == null) {
14 last = t;
15 a.push(t);
16 } else if (last[0] != t[0]) {
17 l = t;
18 break;
19 } else a.push(t);
20 }
21 // if (a.length > 0) print(a[0][0], a.length);
22 return [a, l];
23 }
24
25 var buf = new Bytes();
26 var fb = new File(arguments[0]);
27 var fm = new File(arguments[1]);
28
29 var tot = 0, matched = 0;
30
31 var sb = read1(fb, buf, null);
32 var sm = read1(fm, buf, null);
33
34 function sync()
35 {
36 // print("here!");
37 while (sb[0][0][0] != sm[0][0][0]) {
38 if (sb[0][0][0] < sm[0][0][0]) {
39 if (sb[0].length == 1) ++tot;
40 sb = read1(fb, buf, sb[1]);
41 if (sb[0].length == 0) break;
42 } else if (sb[0][0][0] > sm[0][0][0]) {
43 sm = read1(fm, buf, sm[1]);
44 if (sm[0].length == 0) break;
45 }
46 }
47 }
48
49 while (1) {
50 sync();
51 if (sb[0].length == 0) break;
52 if (sm[0].length == 0) {
53 while (sb[0].length) {
54 if (sb[0].length == 1) ++tot;
55 sb = read1(fb, buf, sb[1]);
56 }
57 break;
58 }
59 if (sb[0].length == 1) {
60 var end = sm[0].length, hit = 0;
61 ++tot;
62 for (var j = 0; j < end; ++j) {
63 if (sb[0][0][4] != sm[0][j][4] || sb[0][0][5] != sm[0][j][5]) continue;
64 if (sb[0][0][8] > sm[0][j][7] && sm[0][j][8] > sb[0][0][7]) {
65 var ol, ml;
66 ol = sb[0][0][8] - sm[0][j][7];
67 ml = sm[0][j][8] - sb[0][0][7];
68 var r = ol < ml? ol / ml : ml / ol;
69 if (r >= .3333) ++matched, hit = 1;
70 break;
71 }
72 }
73 if (hit == 0) print(sb[0][0].join("\t"));
74 }
75 sb = read1(fb, buf, sb[1]);
76 sm = read1(fm, buf, sm[1]);
77 if (sb[0].length == 0) break;
78 }
79
80 fb.close();
81 fm.close();
82 buf.destroy();
83
84 print(tot, matched, matched/tot);
0 var min_l = 2000, min_q = 10;
1
2 var file = arguments.length? new File(arguments[0]) : new File();
3 var buf = new Bytes();
4
5 var a = [];
6 while (file.readline(buf) >= 0) {
7 var t = buf.toString().split("\t");
8 for (var j = 1; j <= 3; ++j) t[j] = parseInt(t[j]);
9 for (var j = 6; j <= 11; ++j) t[j] = parseInt(t[j]);
10 if (t[1] < min_l || t[11] < min_q) continue;
11 var st = 0;
12 for (var i = 0; i < a.length; ++i) {
13 if (t[7] + min_l >= a[i][8]) {
14 a[i] = null;
15 } else if (t[8] <= a[i][8]) {
16 print(t[0], a[i][0], -1);
17 } else {
18 print(t[0], a[i][0], a[i][8] - t[7]);
19 }
20 }
21 var n = 0;
22 for (var i = 0; i < a.length; ++i)
23 if (a[i] != null) a[n++] = a[i];
24 a.length = n;
25 a.push(t);
26 }
27
28 buf.destroy();
29 file.close();
0 #!/usr/bin/perl
1
2 use strict;
3 use warnings;
4
5 my $fn = shift(@ARGV);
6 open(FH, $fn =~ /\.gz$/? "gzip -dc $fn|" : $fn) || die;
7 my %h;
8 while (<FH>) {
9 my @t = split;
10 $h{"$t[0]\t$t[1]"} = 1;
11 }
12 close(FH);
13
14 while (<>) {
15 my @t = split;
16 $h{"$t[0]\t$t[5]"} = 2 if ($h{"$t[0]\t$t[5]"});
17 $h{"$t[5]\t$t[0]"} = 2 if ($h{"$t[5]\t$t[0]"});
18 }
19
20 my @cnt = (0, 0);
21 for my $x (keys %h) {
22 ++$cnt[$h{$x}-1];
23 }
24 print("$cnt[0]\t$cnt[1]\t", $cnt[1]/($cnt[0]+$cnt[1]), "\n");