Codebase list fastqc / dd57a3e
New upstream version 0.11.7+dfsg Andreas Tille 6 years ago
80 changed file(s) with 9202 addition(s) and 8622 deletion(s). Raw diff Collapse all Expand all
0 <?xml version="1.0" encoding="UTF-8"?>
1 <classpath>
2 <classpathentry kind="src" path=""/>
3 <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
4 <classpathentry kind="lib" path="jbzip2-0.9.jar"/>
5 <classpathentry kind="lib" path="sam-1.103.jar"/>
6 <classpathentry kind="lib" path="cisd-jhdf5.jar"/>
7 <classpathentry kind="output" path="bin"/>
8 </classpath>
0 # Compiled files
1 /bin/
2
3 # Compiled class file
4 *.class
5
6 # Log file
7 *.log
8
9 # BlueJ files
10 *.ctxt
11
12 # Mobile Tools for Java (J2ME)
13 .mtj.tmp/
14
15 # Package Files #
16 *.jar
17 *.war
18 *.ear
19 *.zip
20 *.tar.gz
21 *.rar
22
23 # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
24 hs_err_pid*
0 <?xml version="1.0" encoding="UTF-8"?>
1 <projectDescription>
2 <name>FastQC</name>
3 <comment></comment>
4 <projects>
5 </projects>
6 <buildSpec>
7 <buildCommand>
8 <name>org.eclipse.jdt.core.javabuilder</name>
9 <arguments>
10 </arguments>
11 </buildCommand>
12 </buildSpec>
13 <natures>
14 <nature>org.eclipse.jdt.core.javanature</nature>
15 </natures>
16 </projectDescription>
0 #Tue Nov 23 20:41:22 GMT 2010
1 eclipse.preferences.version=1
2 org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
3 org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
4 org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
5 org.eclipse.jdt.core.compiler.compliance=1.5
6 org.eclipse.jdt.core.compiler.debug.lineNumber=generate
7 org.eclipse.jdt.core.compiler.debug.localVariable=generate
8 org.eclipse.jdt.core.compiler.debug.sourceFile=generate
9 org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
10 org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
11 org.eclipse.jdt.core.compiler.source=1.5
0 # This file contains a set of sequence fragments which will be explicitly
1 # searched against your library. The reporting will be similar to the
2 # Kmer plot, except that every sequence in this list will be shown so
3 # you can use this to judge the level of adapter read-through even if those
4 # adapter sequences aren't picked out by the Kmer module.
5 #
6 # Since every sequence here will be analysed and the results plotted it
7 # doesn't make any sense to include duplicate sequences, or to add too
8 # many sequences since your plot will end up a mess.
9 #
10 # You can add more sequences to the file by putting one line per entry
11 # and specifying a name[tab]sequence. If the contaminant you add is
12 # likely to be of use to others please consider sending it to the FastQ
13 # authors, either via a bug report at www.bioinformatics.babraham.ac.uk/bugzilla/
14 # or by directly emailing simon.andrews@babraham.ac.uk so other users of
15 # the program can benefit.
16 #
17 # For the time being it's going to be easier to interpret this plot if all
18 # of the sequences provided are the same length, so we've gone with 12bp
19 # fragments for now.
20
21 Illumina Universal Adapter AGATCGGAAGAG
22 Illumina Small RNA 3' Adapter TGGAATTCTCGG
23 Illumina Small RNA 5' Adapter GATCGTCGGACT
24 Nextera Transposase Sequence CTGTCTCTTATA
0 # This file contains a set of sequence fragments which will be explicitly
1 # searched against your library. The reporting will be similar to the
2 # Kmer plot, except that every sequence in this list will be shown so
3 # you can use this to judge the level of adapter read-through even if those
4 # adapter sequences aren't picked out by the Kmer module.
5 #
6 # Since every sequence here will be analysed and the results plotted it
7 # doesn't make any sense to include duplicate sequences, or to add too
8 # many sequences since your plot will end up a mess.
9 #
10 # You can add more sequences to the file by putting one line per entry
11 # and specifying a name[tab]sequence. If the contaminant you add is
12 # likely to be of use to others please consider sending it to the FastQ
13 # authors, either via a bug report at www.bioinformatics.babraham.ac.uk/bugzilla/
14 # or by directly emailing simon.andrews@babraham.ac.uk so other users of
15 # the program can benefit.
16 #
17 # For the time being it's going to be easier to interpret this plot if all
18 # of the sequences provided are the same length, so we've gone with 12bp
19 # fragments for now.
20
21 Illumina Universal Adapter AGATCGGAAGAG
22 Illumina Small RNA 3' Adapter TGGAATTCTCGG
23 Illumina Small RNA 5' Adapter GATCGTCGGACT
24 Nextera Transposase Sequence CTGTCTCTTATA
2525 SOLID Small RNA Adapter CGCCTTGGCCGT
0 # This file contains a list of potential contaminants which are
1 # frequently found in high throughput sequencing reactions. These
2 # are mostly sequences of adapters / primers used in the various
3 # sequencing chemistries.
4 #
5 # Please DO NOT rely on these sequences to design your own oligos, some
6 # of them are truncated at ambiguous positions, and none of them are
7 # definitive sequences from the manufacturers so don't blame us if you
8 # try to use them and they don't work.
9 #
10 # You can add more sequences to the file by putting one line per entry
11 # and specifying a name[tab]sequence. If the contaminant you add is
12 # likely to be of use to others please consider sending it to the FastQ
13 # authors, either via a bug report at www.bioinformatics.babraham.ac.uk/bugzilla/
14 # or by directly emailing simon.andrews@babraham.ac.uk so other users of
15 # the program can benefit.
16
17 Illumina Single End Adapter 1 GATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG
18 Illumina Single End Adapter 2 CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT
19 Illumina Single End PCR Primer 1 AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
20 Illumina Single End PCR Primer 2 CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT
21 Illumina Single End Sequencing Primer ACACTCTTTCCCTACACGACGCTCTTCCGATCT
22
23 Illumina Paired End Adapter 1 ACACTCTTTCCCTACACGACGCTCTTCCGATCT
24 Illumina Paired End Adapter 2 GATCGGAAGAGCGGTTCAGCAGGAATGCCGAG
25 Illumina Paried End PCR Primer 1 AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
26 Illumina Paired End PCR Primer 2 CAAGCAGAAGACGGCATACGAGATCGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT
27 Illumina Paried End Sequencing Primer 1 ACACTCTTTCCCTACACGACGCTCTTCCGATCT
28 Illumina Paired End Sequencing Primer 2 CGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT
29
30 Illumina DpnII expression Adapter 1 ACAGGTTCAGAGTTCTACAGTCCGAC
31 Illumina DpnII expression Adapter 2 CAAGCAGAAGACGGCATACGA
32 Illumina DpnII expression PCR Primer 1 CAAGCAGAAGACGGCATACGA
33 Illumina DpnII expression PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
34 Illumina DpnII expression Sequencing Primer CGACAGGTTCAGAGTTCTACAGTCCGACGATC
35
36 Illumina NlaIII expression Adapter 1 ACAGGTTCAGAGTTCTACAGTCCGACATG
37 Illumina NlaIII expression Adapter 2 CAAGCAGAAGACGGCATACGA
38 Illumina NlaIII expression PCR Primer 1 CAAGCAGAAGACGGCATACGA
39 Illumina NlaIII expression PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
40 Illumina NlaIII expression Sequencing Primer CCGACAGGTTCAGAGTTCTACAGTCCGACATG
41
42 Illumina Small RNA Adapter 1 GTTCAGAGTTCTACAGTCCGACGATC
43 Illumina Small RNA Adapter 2 TGGAATTCTCGGGTGCCAAGG
44 Illumina Small RNA RT Primer CAAGCAGAAGACGGCATACGA
45 Illumina Small RNA PCR Primer 1 CAAGCAGAAGACGGCATACGA
46 Illumina Small RNA PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
47 Illumina Small RNA Sequencing Primer CGACAGGTTCAGAGTTCTACAGTCCGACGATC
48
49 Illumina Multiplexing Adapter 1 GATCGGAAGAGCACACGTCT
50 Illumina Multiplexing Adapter 2 ACACTCTTTCCCTACACGACGCTCTTCCGATCT
51 Illumina Multiplexing PCR Primer 1.01 AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
52 Illumina Multiplexing PCR Primer 2.01 GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT
53 Illumina Multiplexing Read1 Sequencing Primer ACACTCTTTCCCTACACGACGCTCTTCCGATCT
54 Illumina Multiplexing Index Sequencing Primer GATCGGAAGAGCACACGTCTGAACTCCAGTCAC
55 Illumina Multiplexing Read2 Sequencing Primer GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT
56
57 Illumina PCR Primer Index 1 CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTC
58 Illumina PCR Primer Index 2 CAAGCAGAAGACGGCATACGAGATACATCGGTGACTGGAGTTC
59 Illumina PCR Primer Index 3 CAAGCAGAAGACGGCATACGAGATGCCTAAGTGACTGGAGTTC
60 Illumina PCR Primer Index 4 CAAGCAGAAGACGGCATACGAGATTGGTCAGTGACTGGAGTTC
61 Illumina PCR Primer Index 5 CAAGCAGAAGACGGCATACGAGATCACTGTGTGACTGGAGTTC
62 Illumina PCR Primer Index 6 CAAGCAGAAGACGGCATACGAGATATTGGCGTGACTGGAGTTC
63 Illumina PCR Primer Index 7 CAAGCAGAAGACGGCATACGAGATGATCTGGTGACTGGAGTTC
64 Illumina PCR Primer Index 8 CAAGCAGAAGACGGCATACGAGATTCAAGTGTGACTGGAGTTC
65 Illumina PCR Primer Index 9 CAAGCAGAAGACGGCATACGAGATCTGATCGTGACTGGAGTTC
66 Illumina PCR Primer Index 10 CAAGCAGAAGACGGCATACGAGATAAGCTAGTGACTGGAGTTC
67 Illumina PCR Primer Index 11 CAAGCAGAAGACGGCATACGAGATGTAGCCGTGACTGGAGTTC
68 Illumina PCR Primer Index 12 CAAGCAGAAGACGGCATACGAGATTACAAGGTGACTGGAGTTC
69
70 Illumina DpnII Gex Adapter 1 GATCGTCGGACTGTAGAACTCTGAAC
71 Illumina DpnII Gex Adapter 1.01 ACAGGTTCAGAGTTCTACAGTCCGAC
72 Illumina DpnII Gex Adapter 2 CAAGCAGAAGACGGCATACGA
73 Illumina DpnII Gex Adapter 2.01 TCGTATGCCGTCTTCTGCTTG
74 Illumina DpnII Gex PCR Primer 1 CAAGCAGAAGACGGCATACGA
75 Illumina DpnII Gex PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
76 Illumina DpnII Gex Sequencing Primer CGACAGGTTCAGAGTTCTACAGTCCGACGATC
77
78 Illumina NlaIII Gex Adapter 1.01 TCGGACTGTAGAACTCTGAAC
79 Illumina NlaIII Gex Adapter 1.02 ACAGGTTCAGAGTTCTACAGTCCGACATG
80 Illumina NlaIII Gex Adapter 2.01 CAAGCAGAAGACGGCATACGA
81 Illumina NlaIII Gex Adapter 2.02 TCGTATGCCGTCTTCTGCTTG
82 Illumina NlaIII Gex PCR Primer 1 CAAGCAGAAGACGGCATACGA
83 Illumina NlaIII Gex PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
84 Illumina NlaIII Gex Sequencing Primer CCGACAGGTTCAGAGTTCTACAGTCCGACATG
85
86 Illumina Small RNA RT Primer CAAGCAGAAGACGGCATACGA
87 Illumina 5p RNA Adapter GTTCAGAGTTCTACAGTCCGACGATC
88 Illumina RNA Adapter1 TGGAATTCTCGGGTGCCAAGG
89
90 Illumina Small RNA 3p Adapter 1 ATCTCGTATGCCGTCTTCTGCTTG
91 Illumina Small RNA PCR Primer 1 CAAGCAGAAGACGGCATACGA
92 Illumina Small RNA PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
93 Illumina Small RNA Sequencing Primer CGACAGGTTCAGAGTTCTACAGTCCGACGATC
94
95 TruSeq Universal Adapter AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
96 TruSeq Adapter, Index 1 GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG
97 TruSeq Adapter, Index 2 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGATGTATCTCGTATGCCGTCTTCTGCTTG
98 TruSeq Adapter, Index 3 GATCGGAAGAGCACACGTCTGAACTCCAGTCACTTAGGCATCTCGTATGCCGTCTTCTGCTTG
99 TruSeq Adapter, Index 4 GATCGGAAGAGCACACGTCTGAACTCCAGTCACTGACCAATCTCGTATGCCGTCTTCTGCTTG
100 TruSeq Adapter, Index 5 GATCGGAAGAGCACACGTCTGAACTCCAGTCACACAGTGATCTCGTATGCCGTCTTCTGCTTG
101 TruSeq Adapter, Index 6 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGCCAATATCTCGTATGCCGTCTTCTGCTTG
102 TruSeq Adapter, Index 7 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCAGATCATCTCGTATGCCGTCTTCTGCTTG
103 TruSeq Adapter, Index 8 GATCGGAAGAGCACACGTCTGAACTCCAGTCACACTTGAATCTCGTATGCCGTCTTCTGCTTG
104 TruSeq Adapter, Index 9 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGATCAGATCTCGTATGCCGTCTTCTGCTTG
105 TruSeq Adapter, Index 10 GATCGGAAGAGCACACGTCTGAACTCCAGTCACTAGCTTATCTCGTATGCCGTCTTCTGCTTG
106 TruSeq Adapter, Index 11 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGGCTACATCTCGTATGCCGTCTTCTGCTTG
107 TruSeq Adapter, Index 12 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCTTGTAATCTCGTATGCCGTCTTCTGCTTG
108 TruSeq Adapter, Index 13 GATCGGAAGAGCACACGTCTGAACTCCAGTCACAGTCAACTCTCGTATGCCGTCTTCTGCTTG
109 TruSeq Adapter, Index 14 GATCGGAAGAGCACACGTCTGAACTCCAGTCACAGTTCCGTCTCGTATGCCGTCTTCTGCTTG
110 TruSeq Adapter, Index 15 GATCGGAAGAGCACACGTCTGAACTCCAGTCACATGTCAGTCTCGTATGCCGTCTTCTGCTTG
111 TruSeq Adapter, Index 16 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCCGTCCCTCTCGTATGCCGTCTTCTGCTTG
112 TruSeq Adapter, Index 18 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTCCGCATCTCGTATGCCGTCTTCTGCTTG
113 TruSeq Adapter, Index 19 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTGAAACTCTCGTATGCCGTCTTCTGCTTG
114 TruSeq Adapter, Index 20 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTGGCCTTCTCGTATGCCGTCTTCTGCTTG
115 TruSeq Adapter, Index 21 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTTTCGGTCTCGTATGCCGTCTTCTGCTTG
116 TruSeq Adapter, Index 22 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGTACGTTCTCGTATGCCGTCTTCTGCTTG
117 TruSeq Adapter, Index 23 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCCACTCTTCTCGTATGCCGTCTTCTGCTTG
118 TruSeq Adapter, Index 25 GATCGGAAGAGCACACGTCTGAACTCCAGTCACACTGATATCTCGTATGCCGTCTTCTGCTTG
119 TruSeq Adapter, Index 27 GATCGGAAGAGCACACGTCTGAACTCCAGTCACATTCCTTTCTCGTATGCCGTCTTCTGCTTG
120
121 Illumina RNA RT Primer GCCTTGGCACCCGAGAATTCCA
122 Illumina RNA PCR Primer AATGATACGGCGACCACCGAGATCTACACGTTCAGAGTTCTACAGTCCGA
123
124 RNA PCR Primer, Index 1 CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
125 RNA PCR Primer, Index 2 CAAGCAGAAGACGGCATACGAGATACATCGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
126 RNA PCR Primer, Index 3 CAAGCAGAAGACGGCATACGAGATGCCTAAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
127 RNA PCR Primer, Index 4 CAAGCAGAAGACGGCATACGAGATTGGTCAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
128 RNA PCR Primer, Index 5 CAAGCAGAAGACGGCATACGAGATCACTGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
129 RNA PCR Primer, Index 6 CAAGCAGAAGACGGCATACGAGATATTGGCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
130 RNA PCR Primer, Index 7 CAAGCAGAAGACGGCATACGAGATGATCTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
131 RNA PCR Primer, Index 8 CAAGCAGAAGACGGCATACGAGATTCAAGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
132 RNA PCR Primer, Index 9 CAAGCAGAAGACGGCATACGAGATCTGATCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
133 RNA PCR Primer, Index 10 CAAGCAGAAGACGGCATACGAGATAAGCTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
134 RNA PCR Primer, Index 11 CAAGCAGAAGACGGCATACGAGATGTAGCCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
135 RNA PCR Primer, Index 12 CAAGCAGAAGACGGCATACGAGATTACAAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
136 RNA PCR Primer, Index 13 CAAGCAGAAGACGGCATACGAGATTTGACTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
137 RNA PCR Primer, Index 14 CAAGCAGAAGACGGCATACGAGATGGAACTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
138 RNA PCR Primer, Index 15 CAAGCAGAAGACGGCATACGAGATTGACATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
139 RNA PCR Primer, Index 16 CAAGCAGAAGACGGCATACGAGATGGACGGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
140 RNA PCR Primer, Index 17 CAAGCAGAAGACGGCATACGAGATCTCTACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
141 RNA PCR Primer, Index 18 CAAGCAGAAGACGGCATACGAGATGCGGACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
142 RNA PCR Primer, Index 19 CAAGCAGAAGACGGCATACGAGATTTTCACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
143 RNA PCR Primer, Index 20 CAAGCAGAAGACGGCATACGAGATGGCCACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
144 RNA PCR Primer, Index 21 CAAGCAGAAGACGGCATACGAGATCGAAACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
145 RNA PCR Primer, Index 22 CAAGCAGAAGACGGCATACGAGATCGTACGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
146 RNA PCR Primer, Index 23 CAAGCAGAAGACGGCATACGAGATCCACTCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
147 RNA PCR Primer, Index 24 CAAGCAGAAGACGGCATACGAGATGCTACCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
148 RNA PCR Primer, Index 25 CAAGCAGAAGACGGCATACGAGATATCAGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
149 RNA PCR Primer, Index 26 CAAGCAGAAGACGGCATACGAGATGCTCATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
150 RNA PCR Primer, Index 27 CAAGCAGAAGACGGCATACGAGATAGGAATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
151 RNA PCR Primer, Index 28 CAAGCAGAAGACGGCATACGAGATCTTTTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
152 RNA PCR Primer, Index 29 CAAGCAGAAGACGGCATACGAGATTAGTTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
153 RNA PCR Primer, Index 30 CAAGCAGAAGACGGCATACGAGATCCGGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
154 RNA PCR Primer, Index 31 CAAGCAGAAGACGGCATACGAGATATCGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
155 RNA PCR Primer, Index 32 CAAGCAGAAGACGGCATACGAGATTGAGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
156 RNA PCR Primer, Index 33 CAAGCAGAAGACGGCATACGAGATCGCCTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
157 RNA PCR Primer, Index 34 CAAGCAGAAGACGGCATACGAGATGCCATGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
158 RNA PCR Primer, Index 35 CAAGCAGAAGACGGCATACGAGATAAAATGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
159 RNA PCR Primer, Index 36 CAAGCAGAAGACGGCATACGAGATTGTTGGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
160 RNA PCR Primer, Index 37 CAAGCAGAAGACGGCATACGAGATATTCCGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
161 RNA PCR Primer, Index 38 CAAGCAGAAGACGGCATACGAGATAGCTAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
162 RNA PCR Primer, Index 39 CAAGCAGAAGACGGCATACGAGATGTATAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
163 RNA PCR Primer, Index 40 CAAGCAGAAGACGGCATACGAGATTCTGAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
164 RNA PCR Primer, Index 41 CAAGCAGAAGACGGCATACGAGATGTCGTCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
165 RNA PCR Primer, Index 42 CAAGCAGAAGACGGCATACGAGATCGATTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
166 RNA PCR Primer, Index 43 CAAGCAGAAGACGGCATACGAGATGCTGTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
167 RNA PCR Primer, Index 44 CAAGCAGAAGACGGCATACGAGATATTATAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
168 RNA PCR Primer, Index 45 CAAGCAGAAGACGGCATACGAGATGAATGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
169 RNA PCR Primer, Index 46 CAAGCAGAAGACGGCATACGAGATTCGGGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
170 RNA PCR Primer, Index 47 CAAGCAGAAGACGGCATACGAGATCTTCGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
171 RNA PCR Primer, Index 48 CAAGCAGAAGACGGCATACGAGATTGCCGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
172
173 ABI Dynabead EcoP Oligo CTGATCTAGAGGTACCGGATCCCAGCAGT
174 ABI Solid3 Adapter A CTGCCCCGGGTTCCTCATTCTCTCAGCAGCATG
175 ABI Solid3 Adapter B CCACTACGCCTCCGCTTTCCTCTCTATGGGCAGTCGGTGAT
176 ABI Solid3 5' AMP Primer CCACTACGCCTCCGCTTTCCTCTCTATG
177 ABI Solid3 3' AMP Primer CTGCCCCGGGTTCCTCATTCT
178 ABI Solid3 EF1 alpha Sense Primer CATGTGTGTTGAGAGCTTC
179 ABI Solid3 EF1 alpha Antisense Primer GAAAACCAAAGTGGTCCAC
180 ABI Solid3 GAPDH Forward Primer TTAGCACCCCTGGCCAAGG
181 ABI Solid3 GAPDH Reverse Primer CTTACTCCTTGGAGGCCATG
0 # This file contains a list of potential contaminants which are
1 # frequently found in high throughput sequencing reactions. These
2 # are mostly sequences of adapters / primers used in the various
3 # sequencing chemistries.
4 #
5 # Please DO NOT rely on these sequences to design your own oligos, some
6 # of them are truncated at ambiguous positions, and none of them are
7 # definitive sequences from the manufacturers so don't blame us if you
8 # try to use them and they don't work.
9 #
10 # You can add more sequences to the file by putting one line per entry
11 # and specifying a name[tab]sequence. If the contaminant you add is
12 # likely to be of use to others please consider sending it to the FastQ
13 # authors, either via a bug report at www.bioinformatics.babraham.ac.uk/bugzilla/
14 # or by directly emailing simon.andrews@babraham.ac.uk so other users of
15 # the program can benefit.
16
17 Illumina Single End Adapter 1 GATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG
18 Illumina Single End Adapter 2 CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT
19 Illumina Single End PCR Primer 1 AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
20 Illumina Single End PCR Primer 2 CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT
21 Illumina Single End Sequencing Primer ACACTCTTTCCCTACACGACGCTCTTCCGATCT
22
23 Illumina Paired End Adapter 1 ACACTCTTTCCCTACACGACGCTCTTCCGATCT
24 Illumina Paired End Adapter 2 GATCGGAAGAGCGGTTCAGCAGGAATGCCGAG
25 Illumina Paried End PCR Primer 1 AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
26 Illumina Paired End PCR Primer 2 CAAGCAGAAGACGGCATACGAGATCGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT
27 Illumina Paried End Sequencing Primer 1 ACACTCTTTCCCTACACGACGCTCTTCCGATCT
28 Illumina Paired End Sequencing Primer 2 CGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT
29
30 Illumina DpnII expression Adapter 1 ACAGGTTCAGAGTTCTACAGTCCGAC
31 Illumina DpnII expression Adapter 2 CAAGCAGAAGACGGCATACGA
32 Illumina DpnII expression PCR Primer 1 CAAGCAGAAGACGGCATACGA
33 Illumina DpnII expression PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
34 Illumina DpnII expression Sequencing Primer CGACAGGTTCAGAGTTCTACAGTCCGACGATC
35
36 Illumina NlaIII expression Adapter 1 ACAGGTTCAGAGTTCTACAGTCCGACATG
37 Illumina NlaIII expression Adapter 2 CAAGCAGAAGACGGCATACGA
38 Illumina NlaIII expression PCR Primer 1 CAAGCAGAAGACGGCATACGA
39 Illumina NlaIII expression PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
40 Illumina NlaIII expression Sequencing Primer CCGACAGGTTCAGAGTTCTACAGTCCGACATG
41
42 Illumina Small RNA Adapter 1 GTTCAGAGTTCTACAGTCCGACGATC
43 Illumina Small RNA Adapter 2 TGGAATTCTCGGGTGCCAAGG
44 Illumina Small RNA RT Primer CAAGCAGAAGACGGCATACGA
45 Illumina Small RNA PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
46 Illumina Small RNA Sequencing Primer CGACAGGTTCAGAGTTCTACAGTCCGACGATC
47
48 Illumina Multiplexing Adapter 1 GATCGGAAGAGCACACGTCT
49 Illumina Multiplexing Adapter 2 ACACTCTTTCCCTACACGACGCTCTTCCGATCT
50 Illumina Multiplexing PCR Primer 1.01 AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
51 Illumina Multiplexing PCR Primer 2.01 GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT
52 Illumina Multiplexing Read1 Sequencing Primer ACACTCTTTCCCTACACGACGCTCTTCCGATCT
53 Illumina Multiplexing Index Sequencing Primer GATCGGAAGAGCACACGTCTGAACTCCAGTCAC
54 Illumina Multiplexing Read2 Sequencing Primer GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT
55
56 Illumina PCR Primer Index 1 CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTC
57 Illumina PCR Primer Index 2 CAAGCAGAAGACGGCATACGAGATACATCGGTGACTGGAGTTC
58 Illumina PCR Primer Index 3 CAAGCAGAAGACGGCATACGAGATGCCTAAGTGACTGGAGTTC
59 Illumina PCR Primer Index 4 CAAGCAGAAGACGGCATACGAGATTGGTCAGTGACTGGAGTTC
60 Illumina PCR Primer Index 5 CAAGCAGAAGACGGCATACGAGATCACTGTGTGACTGGAGTTC
61 Illumina PCR Primer Index 6 CAAGCAGAAGACGGCATACGAGATATTGGCGTGACTGGAGTTC
62 Illumina PCR Primer Index 7 CAAGCAGAAGACGGCATACGAGATGATCTGGTGACTGGAGTTC
63 Illumina PCR Primer Index 8 CAAGCAGAAGACGGCATACGAGATTCAAGTGTGACTGGAGTTC
64 Illumina PCR Primer Index 9 CAAGCAGAAGACGGCATACGAGATCTGATCGTGACTGGAGTTC
65 Illumina PCR Primer Index 10 CAAGCAGAAGACGGCATACGAGATAAGCTAGTGACTGGAGTTC
66 Illumina PCR Primer Index 11 CAAGCAGAAGACGGCATACGAGATGTAGCCGTGACTGGAGTTC
67 Illumina PCR Primer Index 12 CAAGCAGAAGACGGCATACGAGATTACAAGGTGACTGGAGTTC
68
69 Illumina DpnII Gex Adapter 1 GATCGTCGGACTGTAGAACTCTGAAC
70 Illumina DpnII Gex Adapter 1.01 ACAGGTTCAGAGTTCTACAGTCCGAC
71 Illumina DpnII Gex Adapter 2 CAAGCAGAAGACGGCATACGA
72 Illumina DpnII Gex Adapter 2.01 TCGTATGCCGTCTTCTGCTTG
73 Illumina DpnII Gex PCR Primer 1 CAAGCAGAAGACGGCATACGA
74 Illumina DpnII Gex PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
75 Illumina DpnII Gex Sequencing Primer CGACAGGTTCAGAGTTCTACAGTCCGACGATC
76
77 Illumina NlaIII Gex Adapter 1.01 TCGGACTGTAGAACTCTGAAC
78 Illumina NlaIII Gex Adapter 1.02 ACAGGTTCAGAGTTCTACAGTCCGACATG
79 Illumina NlaIII Gex Adapter 2.01 CAAGCAGAAGACGGCATACGA
80 Illumina NlaIII Gex Adapter 2.02 TCGTATGCCGTCTTCTGCTTG
81 Illumina NlaIII Gex PCR Primer 1 CAAGCAGAAGACGGCATACGA
82 Illumina NlaIII Gex PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
83 Illumina NlaIII Gex Sequencing Primer CCGACAGGTTCAGAGTTCTACAGTCCGACATG
84
85 Illumina 5p RNA Adapter GTTCAGAGTTCTACAGTCCGACGATC
86 Illumina RNA Adapter1 TGGAATTCTCGGGTGCCAAGG
87
88 Illumina Small RNA 3p Adapter 1 ATCTCGTATGCCGTCTTCTGCTTG
89 Illumina Small RNA PCR Primer 1 CAAGCAGAAGACGGCATACGA
90
91 TruSeq Universal Adapter AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
92 TruSeq Adapter, Index 1 GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG
93 TruSeq Adapter, Index 2 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGATGTATCTCGTATGCCGTCTTCTGCTTG
94 TruSeq Adapter, Index 3 GATCGGAAGAGCACACGTCTGAACTCCAGTCACTTAGGCATCTCGTATGCCGTCTTCTGCTTG
95 TruSeq Adapter, Index 4 GATCGGAAGAGCACACGTCTGAACTCCAGTCACTGACCAATCTCGTATGCCGTCTTCTGCTTG
96 TruSeq Adapter, Index 5 GATCGGAAGAGCACACGTCTGAACTCCAGTCACACAGTGATCTCGTATGCCGTCTTCTGCTTG
97 TruSeq Adapter, Index 6 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGCCAATATCTCGTATGCCGTCTTCTGCTTG
98 TruSeq Adapter, Index 7 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCAGATCATCTCGTATGCCGTCTTCTGCTTG
99 TruSeq Adapter, Index 8 GATCGGAAGAGCACACGTCTGAACTCCAGTCACACTTGAATCTCGTATGCCGTCTTCTGCTTG
100 TruSeq Adapter, Index 9 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGATCAGATCTCGTATGCCGTCTTCTGCTTG
101 TruSeq Adapter, Index 10 GATCGGAAGAGCACACGTCTGAACTCCAGTCACTAGCTTATCTCGTATGCCGTCTTCTGCTTG
102 TruSeq Adapter, Index 11 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGGCTACATCTCGTATGCCGTCTTCTGCTTG
103 TruSeq Adapter, Index 12 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCTTGTAATCTCGTATGCCGTCTTCTGCTTG
104 TruSeq Adapter, Index 13 GATCGGAAGAGCACACGTCTGAACTCCAGTCACAGTCAACTCTCGTATGCCGTCTTCTGCTTG
105 TruSeq Adapter, Index 14 GATCGGAAGAGCACACGTCTGAACTCCAGTCACAGTTCCGTCTCGTATGCCGTCTTCTGCTTG
106 TruSeq Adapter, Index 15 GATCGGAAGAGCACACGTCTGAACTCCAGTCACATGTCAGTCTCGTATGCCGTCTTCTGCTTG
107 TruSeq Adapter, Index 16 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCCGTCCCTCTCGTATGCCGTCTTCTGCTTG
108 TruSeq Adapter, Index 18 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTCCGCATCTCGTATGCCGTCTTCTGCTTG
109 TruSeq Adapter, Index 19 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTGAAACTCTCGTATGCCGTCTTCTGCTTG
110 TruSeq Adapter, Index 20 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTGGCCTTCTCGTATGCCGTCTTCTGCTTG
111 TruSeq Adapter, Index 21 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTTTCGGTCTCGTATGCCGTCTTCTGCTTG
112 TruSeq Adapter, Index 22 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGTACGTTCTCGTATGCCGTCTTCTGCTTG
113 TruSeq Adapter, Index 23 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCCACTCTTCTCGTATGCCGTCTTCTGCTTG
114 TruSeq Adapter, Index 25 GATCGGAAGAGCACACGTCTGAACTCCAGTCACACTGATATCTCGTATGCCGTCTTCTGCTTG
115 TruSeq Adapter, Index 27 GATCGGAAGAGCACACGTCTGAACTCCAGTCACATTCCTTTCTCGTATGCCGTCTTCTGCTTG
116
117 Illumina RNA RT Primer GCCTTGGCACCCGAGAATTCCA
118 Illumina RNA PCR Primer AATGATACGGCGACCACCGAGATCTACACGTTCAGAGTTCTACAGTCCGA
119
120 RNA PCR Primer, Index 1 CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
121 RNA PCR Primer, Index 2 CAAGCAGAAGACGGCATACGAGATACATCGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
122 RNA PCR Primer, Index 3 CAAGCAGAAGACGGCATACGAGATGCCTAAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
123 RNA PCR Primer, Index 4 CAAGCAGAAGACGGCATACGAGATTGGTCAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
124 RNA PCR Primer, Index 5 CAAGCAGAAGACGGCATACGAGATCACTGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
125 RNA PCR Primer, Index 6 CAAGCAGAAGACGGCATACGAGATATTGGCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
126 RNA PCR Primer, Index 7 CAAGCAGAAGACGGCATACGAGATGATCTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
127 RNA PCR Primer, Index 8 CAAGCAGAAGACGGCATACGAGATTCAAGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
128 RNA PCR Primer, Index 9 CAAGCAGAAGACGGCATACGAGATCTGATCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
129 RNA PCR Primer, Index 10 CAAGCAGAAGACGGCATACGAGATAAGCTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
130 RNA PCR Primer, Index 11 CAAGCAGAAGACGGCATACGAGATGTAGCCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
131 RNA PCR Primer, Index 12 CAAGCAGAAGACGGCATACGAGATTACAAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
132 RNA PCR Primer, Index 13 CAAGCAGAAGACGGCATACGAGATTTGACTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
133 RNA PCR Primer, Index 14 CAAGCAGAAGACGGCATACGAGATGGAACTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
134 RNA PCR Primer, Index 15 CAAGCAGAAGACGGCATACGAGATTGACATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
135 RNA PCR Primer, Index 16 CAAGCAGAAGACGGCATACGAGATGGACGGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
136 RNA PCR Primer, Index 17 CAAGCAGAAGACGGCATACGAGATCTCTACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
137 RNA PCR Primer, Index 18 CAAGCAGAAGACGGCATACGAGATGCGGACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
138 RNA PCR Primer, Index 19 CAAGCAGAAGACGGCATACGAGATTTTCACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
139 RNA PCR Primer, Index 20 CAAGCAGAAGACGGCATACGAGATGGCCACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
140 RNA PCR Primer, Index 21 CAAGCAGAAGACGGCATACGAGATCGAAACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
141 RNA PCR Primer, Index 22 CAAGCAGAAGACGGCATACGAGATCGTACGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
142 RNA PCR Primer, Index 23 CAAGCAGAAGACGGCATACGAGATCCACTCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
143 RNA PCR Primer, Index 24 CAAGCAGAAGACGGCATACGAGATGCTACCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
144 RNA PCR Primer, Index 25 CAAGCAGAAGACGGCATACGAGATATCAGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
145 RNA PCR Primer, Index 26 CAAGCAGAAGACGGCATACGAGATGCTCATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
146 RNA PCR Primer, Index 27 CAAGCAGAAGACGGCATACGAGATAGGAATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
147 RNA PCR Primer, Index 28 CAAGCAGAAGACGGCATACGAGATCTTTTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
148 RNA PCR Primer, Index 29 CAAGCAGAAGACGGCATACGAGATTAGTTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
149 RNA PCR Primer, Index 30 CAAGCAGAAGACGGCATACGAGATCCGGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
150 RNA PCR Primer, Index 31 CAAGCAGAAGACGGCATACGAGATATCGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
151 RNA PCR Primer, Index 32 CAAGCAGAAGACGGCATACGAGATTGAGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
152 RNA PCR Primer, Index 33 CAAGCAGAAGACGGCATACGAGATCGCCTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
153 RNA PCR Primer, Index 34 CAAGCAGAAGACGGCATACGAGATGCCATGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
154 RNA PCR Primer, Index 35 CAAGCAGAAGACGGCATACGAGATAAAATGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
155 RNA PCR Primer, Index 36 CAAGCAGAAGACGGCATACGAGATTGTTGGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
156 RNA PCR Primer, Index 37 CAAGCAGAAGACGGCATACGAGATATTCCGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
157 RNA PCR Primer, Index 38 CAAGCAGAAGACGGCATACGAGATAGCTAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
158 RNA PCR Primer, Index 39 CAAGCAGAAGACGGCATACGAGATGTATAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
159 RNA PCR Primer, Index 40 CAAGCAGAAGACGGCATACGAGATTCTGAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
160 RNA PCR Primer, Index 41 CAAGCAGAAGACGGCATACGAGATGTCGTCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
161 RNA PCR Primer, Index 42 CAAGCAGAAGACGGCATACGAGATCGATTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
162 RNA PCR Primer, Index 43 CAAGCAGAAGACGGCATACGAGATGCTGTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
163 RNA PCR Primer, Index 44 CAAGCAGAAGACGGCATACGAGATATTATAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
164 RNA PCR Primer, Index 45 CAAGCAGAAGACGGCATACGAGATGAATGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
165 RNA PCR Primer, Index 46 CAAGCAGAAGACGGCATACGAGATTCGGGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
166 RNA PCR Primer, Index 47 CAAGCAGAAGACGGCATACGAGATCTTCGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
167 RNA PCR Primer, Index 48 CAAGCAGAAGACGGCATACGAGATTGCCGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
168
169 ABI Dynabead EcoP Oligo CTGATCTAGAGGTACCGGATCCCAGCAGT
170 ABI Solid3 Adapter A CTGCCCCGGGTTCCTCATTCTCTCAGCAGCATG
171 ABI Solid3 Adapter B CCACTACGCCTCCGCTTTCCTCTCTATGGGCAGTCGGTGAT
172 ABI Solid3 5' AMP Primer CCACTACGCCTCCGCTTTCCTCTCTATG
173 ABI Solid3 3' AMP Primer CTGCCCCGGGTTCCTCATTCT
174 ABI Solid3 EF1 alpha Sense Primer CATGTGTGTTGAGAGCTTC
175 ABI Solid3 EF1 alpha Antisense Primer GAAAACCAAAGTGGTCCAC
176 ABI Solid3 GAPDH Forward Primer TTAGCACCCCTGGCCAAGG
177 ABI Solid3 GAPDH Reverse Primer CTTACTCCTTGGAGGCCATG
178
179
180
181 Clontech Universal Primer Mix Short CTAATACGACTCACTATAGGGC
182 Clontech Universal Primer Mix Long CTAATACGACTCACTATAGGGCAAGCAGTGGTATCAACGCAGAGT
183 Clontech SMARTer II A Oligonucleotide AAGCAGTGGTATCAACGCAGAGTAC
184 Clontech SMART CDS Primer II A AAGCAGTGGTATCAACGCAGAGTACT
185
11 # module at all by setting the value below to 1 for the
22 # modules you want to remove.
33 duplication ignore 0
4 kmer ignore 0
4 kmer ignore 1
55 n_content ignore 0
66 overrepresented ignore 0
77 quality_base ignore 0
2121 <p>
2222 The plot shows the deviation from the average quality for each tile.
2323 The colours are on a cold to hot scale, with cold colours being
24 positions where the quality was at or below the average for that
24 positions where the quality was at or above the average for that
2525 base in the run, and hotter colours indicate that a tile had worse
2626 qualities than other tiles for that base. In the example below you
2727 can see that certain tiles show consistently poor quality. A good
0 Installing FastQC
1 -------------------
2 FastQC is a java application. In order to run it needs your system to have a suitable
3 Java Runtime Environment (JRE) installed. Before you try to run FastQC you should therefore
4 ensure that you have a suitable JRE. There are a number of different JREs available
5 however the ones we have tested are the v1.6-v1.8 JREs from Oracle. These are available
6 for a number of different platforms.
7
8 Windows/Linux: Go to java.com - click on Free Java Download - DON'T click the large red button
9 but choose the smaller link to "See all java downloads". Find your operating system and select
10 the appropriate offline installer. If you are using a 64bit operating system (and nearly
11 everyone is these days), then make sure you select the 64bit version of the the installer.
12
13 OSX: On newer versions of OSX you need to install the Java Development Kit. The normal Java
14 runtime environment IS NOT enough. To get this go to java.com, click "Free java download",
15 then IGNORE the big red button, and select "See all java downloads", on the next screen select
16 "Looking for the JDK?" from the left hand menu and select the link to "JDK downloads" in the
17 first paragraph. You can then click the "Download" button underneath JDK in the page you are
18 taken to. Sorry this is such a pain!
19
20
21 If you're not sure whether you have java installed then you can test this from a command
22 prompt. To get a command prompt try:
23
24 Windows: Select Start > Run, and type 'cmd' (no quotes) in the box which appears, press OK
25
26 MaxOSX: Run Applications > Utilities > Terminal
27
28 Linux: From your applications menu look for an application called 'Terminal' or 'Konsole'.
29 Either of these will give you a usable shell.
30
31 At the command prompt type 'java -version' and press enter. You should see something like:
32
33 java version "1.8.0_60"
34 Java(TM) SE Runtime Environment (build 1.8.0_60-b27)
35 Java HotSpot(TM) 64-Bit Server VM (build 25.60-b23, mixed mode)
36
37 If you get an error then you don't have java installed. If the version listed on the first
38 line is less than 1.6 then you might have problems running FastQC.
39
40 Actually installing FastQC is as simple as unzipping the zip file it comes in into a
41 suitable location. That's it. Once unzipped it's ready to go.
42
43 Running FastQC
44 --------------
45
46 You can run FastQC in one of two modes, either as an interactive graphical application
47 in which you can dynamically load FastQ files and view their results.
48
49 Alternatively you can run FastQC in a non-interactive mode where you specify the files
50 you want to process on the command line and FastQC will generate an HTML report for
51 each file without launching a user interface. This would allow FastQC to be run as
52 part of an analysis pipeline.
53
54
55 Running FastQC Interactively
56 ----------------------------
57 Windows: Simply double click on the run_fastqc bat file. If you want to make a pretty
58 shortcut then we've included an icon file in the top level directory so you don't have
59 to use the generic bat file icon.
60
61 MacOSX: There is an application bundle for MacOSX which you can use to install and run
62 FastQC. Just drag the application from the disk image to your Applications folder (or
63 wherever you want to install the program).
64
65 Linux: We have included a wrapper script, called 'fastqc' which is the easiest way to
66 start the program. The wrapper is in the top level of the FastQC installation. You
67 may need to make this file executable:
68
69 chmod 755 fastqc
70
71 ..but once you have done that you can run it directly
72
73 ./fastqc
74
75 ..or place a link in /usr/local/bin to be able to run the program from any location:
76
77 sudo ln -s /path/to/FastQC/fastqc /usr/local/bin/fastqc
78
79
80 Running FastQC as part of a pipeline
81 ------------------------------------
82 To run FastQC non-interactively you should use the fastqc wrapper script to launch
83 the program. You will probably want to use the zipped install file on every platform
84 (even OSX).
85
86 To run non-interactively you simply have to specify a list of files to process
87 on the commandline
88
89 fastqc somefile.txt someotherfile.txt
90
91 You can specify as many files to process in a single run as you like. If you don't
92 specify any files to process the program will try to open the interactive application
93 which may result in an error if you're running in a non-graphical environment.
94
95 There are a few extra options you can specify when running non-interactively. Full
96 details of these can be found by running
97
98 fastqc --help
99
100 By default, in non-interactive mode FastQC will create an HTML report with embedded
101 graphs, but also a zip file containing individual graph files and additional data files
102 containing the raw data from which plots were drawn. The zip file will not be extracted
103 by default but you can enable this by adding:
104
105 --extract
106
107 To the launch command.
108
109 If you want to save your reports in a folder other than the folder which contained
110 your original FastQ files then you can specify an alternative location by setting a
111 --outdir value:
112
113 --outdir=/some/other/dir/
114
115 If you want to run fastqc on a stream of data to be read from standard input then you
116 can do this by specifing 'stdin' as the name of the file to be processed and then
117 streaming uncompressed fastq format data to the program. For example:
118
119 zcat *fastq.gz | fastqc stdin
120
121 Customising the report output
122 -----------------------------
123
124 If you want to run FastQC as part of a sequencing pipeline you may wish to change the
125 formatting of the report to add in your own branding or to include extra information.
126
127 In the Templates directory you will find a file called 'header_template.html' which
128 you can edit to change the look of the report. This file contains all of the header for
129 the report file, including the CSS section and you can alter this however you see fit.
130
131 Whilst you can make whatever changes you like you should probably leave in place the
132 <div> structure of the html template since later code will expect to close the main div
133 which is left open at the end of the header. There is no facility to change the code in
134 the main body of the report or the footer (although you can of course change the styling).
135
136 The text tags @@FILENAME@@ and @@DATE@@ are placeholders which are filled in when the
137 report it created. You can use these placeholders in other parts of the header if you
138 wish.
0 Installing FastQC
1 -------------------
2 FastQC is a java application. In order to run it needs your system to have a suitable
3 Java Runtime Environment (JRE) installed. Before you try to run FastQC you should therefore
4 ensure that you have a suitable JRE. There are a number of different JREs available
5 however the ones we have tested are the v1.6-v1.8 JREs from Oracle. These are available
6 for a number of different platforms.
7
8 Windows/Linux: Go to java.com - click on Free Java Download - DON'T click the large red button
9 but choose the smaller link to "See all java downloads". Find your operating system and select
10 the appropriate offline installer. If you are using a 64bit operating system (and nearly
11 everyone is these days), then make sure you select the 64bit version of the the installer.
12
13 OSX: On newer versions of OSX you need to install the Java Development Kit. The normal Java
14 runtime environment IS NOT enough. To get this go to java.com, click "Free java download",
15 then IGNORE the big red button, and select "See all java downloads", on the next screen select
16 "Looking for the JDK?" from the left hand menu and select the link to "JDK downloads" in the
17 first paragraph. You can then click the "Download" button underneath JDK in the page you are
18 taken to. Sorry this is such a pain!
19
20
21 If you're not sure whether you have java installed then you can test this from a command
22 prompt. To get a command prompt try:
23
24 Windows: Select Start > Run, and type 'cmd' (no quotes) in the box which appears, press OK
25
26 MaxOSX: Run Applications > Utilities > Terminal
27
28 Linux: From your applications menu look for an application called 'Terminal' or 'Konsole'.
29 Either of these will give you a usable shell.
30
31 At the command prompt type 'java -version' and press enter. You should see something like:
32
33 java version "1.8.0_60"
34 Java(TM) SE Runtime Environment (build 1.8.0_60-b27)
35 Java HotSpot(TM) 64-Bit Server VM (build 25.60-b23, mixed mode)
36
37 If you get an error then you don't have java installed. If the version listed on the first
38 line is less than 1.6 then you might have problems running FastQC.
39
40 Actually installing FastQC is as simple as unzipping the zip file it comes in into a
41 suitable location. That's it. Once unzipped it's ready to go.
42
43 Running FastQC
44 --------------
45
46 You can run FastQC in one of two modes, either as an interactive graphical application
47 in which you can dynamically load FastQ files and view their results.
48
49 Alternatively you can run FastQC in a non-interactive mode where you specify the files
50 you want to process on the command line and FastQC will generate an HTML report for
51 each file without launching a user interface. This would allow FastQC to be run as
52 part of an analysis pipeline.
53
54
55 Running FastQC Interactively
56 ----------------------------
57 Windows: Simply double click on the run_fastqc bat file. If you want to make a pretty
58 shortcut then we've included an icon file in the top level directory so you don't have
59 to use the generic bat file icon.
60
61 MacOSX: There is an application bundle for MacOSX which you can use to install and run
62 FastQC. Just drag the application from the disk image to your Applications folder (or
63 wherever you want to install the program).
64
65 Linux: We have included a wrapper script, called 'fastqc' which is the easiest way to
66 start the program. The wrapper is in the top level of the FastQC installation. You
67 may need to make this file executable:
68
69 chmod 755 fastqc
70
71 ..but once you have done that you can run it directly
72
73 ./fastqc
74
75 ..or place a link in /usr/local/bin to be able to run the program from any location:
76
77 sudo ln -s /path/to/FastQC/fastqc /usr/local/bin/fastqc
78
79
80 Running FastQC as part of a pipeline
81 ------------------------------------
82 To run FastQC non-interactively you should use the fastqc wrapper script to launch
83 the program. You will probably want to use the zipped install file on every platform
84 (even OSX).
85
86 To run non-interactively you simply have to specify a list of files to process
87 on the commandline
88
89 fastqc somefile.txt someotherfile.txt
90
91 You can specify as many files to process in a single run as you like. If you don't
92 specify any files to process the program will try to open the interactive application
93 which may result in an error if you're running in a non-graphical environment.
94
95 There are a few extra options you can specify when running non-interactively. Full
96 details of these can be found by running
97
98 fastqc --help
99
100 By default, in non-interactive mode FastQC will create an HTML report with embedded
101 graphs, but also a zip file containing individual graph files and additional data files
102 containing the raw data from which plots were drawn. The zip file will not be extracted
103 by default but you can enable this by adding:
104
105 --extract
106
107 To the launch command.
108
109 If you want to save your reports in a folder other than the folder which contained
110 your original FastQ files then you can specify an alternative location by setting a
111 --outdir value:
112
113 --outdir=/some/other/dir/
114
115 If you want to run fastqc on a stream of data to be read from standard input then you
116 can do this by specifing 'stdin' as the name of the file to be processed and then
117 streaming uncompressed fastq format data to the program. For example:
118
119 zcat *fastq.gz | fastqc stdin
120
121 If you want the results from a streamed analysis sent to a file with a name other than
122 stdin then you can add a colon and put the file name you want, for example:
123
124 zcat *fastq.gz | fastqc stdin:my_results
125
126 ..would write results to my_result.html and my_results.zip.
127
128
129 Customising the report output
130 -----------------------------
131
132 If you want to run FastQC as part of a sequencing pipeline you may wish to change the
133 formatting of the report to add in your own branding or to include extra information.
134
135 In the Templates directory you will find a file called 'header_template.html' which
136 you can edit to change the look of the report. This file contains all of the header for
137 the report file, including the CSS section and you can alter this however you see fit.
138
139 Whilst you can make whatever changes you like you should probably leave in place the
140 <div> structure of the html template since later code will expect to close the main div
141 which is left open at the end of the header. There is no facility to change the code in
142 the main body of the report or the footer (although you can of course change the styling).
143
144 The text tags @@FILENAME@@ and @@DATE@@ are placeholders which are filled in when the
145 report it created. You can use these placeholders in other parts of the header if you
146 wish.
0 GNU GENERAL PUBLIC LICENSE
1 Version 2, June 1991
2
3 Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
4 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
5 Everyone is permitted to copy and distribute verbatim copies
6 of this license document, but changing it is not allowed.
7
8 Preamble
9
10 The licenses for most software are designed to take away your
11 freedom to share and change it. By contrast, the GNU General Public
12 License is intended to guarantee your freedom to share and change free
13 software--to make sure the software is free for all its users. This
14 General Public License applies to most of the Free Software
15 Foundation's software and to any other program whose authors commit to
16 using it. (Some other Free Software Foundation software is covered by
17 the GNU Lesser General Public License instead.) You can apply it to
18 your programs, too.
19
20 When we speak of free software, we are referring to freedom, not
21 price. Our General Public Licenses are designed to make sure that you
22 have the freedom to distribute copies of free software (and charge for
23 this service if you wish), that you receive source code or can get it
24 if you want it, that you can change the software or use pieces of it
25 in new free programs; and that you know you can do these things.
26
27 To protect your rights, we need to make restrictions that forbid
28 anyone to deny you these rights or to ask you to surrender the rights.
29 These restrictions translate to certain responsibilities for you if you
30 distribute copies of the software, or if you modify it.
31
32 For example, if you distribute copies of such a program, whether
33 gratis or for a fee, you must give the recipients all the rights that
34 you have. You must make sure that they, too, receive or can get the
35 source code. And you must show them these terms so they know their
36 rights.
37
38 We protect your rights with two steps: (1) copyright the software, and
39 (2) offer you this license which gives you legal permission to copy,
40 distribute and/or modify the software.
41
42 Also, for each author's protection and ours, we want to make certain
43 that everyone understands that there is no warranty for this free
44 software. If the software is modified by someone else and passed on, we
45 want its recipients to know that what they have is not the original, so
46 that any problems introduced by others will not reflect on the original
47 authors' reputations.
48
49 Finally, any free program is threatened constantly by software
50 patents. We wish to avoid the danger that redistributors of a free
51 program will individually obtain patent licenses, in effect making the
52 program proprietary. To prevent this, we have made it clear that any
53 patent must be licensed for everyone's free use or not licensed at all.
54
55 The precise terms and conditions for copying, distribution and
56 modification follow.
57
58 GNU GENERAL PUBLIC LICENSE
59 TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
60
61 0. This License applies to any program or other work which contains
62 a notice placed by the copyright holder saying it may be distributed
63 under the terms of this General Public License. The "Program", below,
64 refers to any such program or work, and a "work based on the Program"
65 means either the Program or any derivative work under copyright law:
66 that is to say, a work containing the Program or a portion of it,
67 either verbatim or with modifications and/or translated into another
68 language. (Hereinafter, translation is included without limitation in
69 the term "modification".) Each licensee is addressed as "you".
70
71 Activities other than copying, distribution and modification are not
72 covered by this License; they are outside its scope. The act of
73 running the Program is not restricted, and the output from the Program
74 is covered only if its contents constitute a work based on the
75 Program (independent of having been made by running the Program).
76 Whether that is true depends on what the Program does.
77
78 1. You may copy and distribute verbatim copies of the Program's
79 source code as you receive it, in any medium, provided that you
80 conspicuously and appropriately publish on each copy an appropriate
81 copyright notice and disclaimer of warranty; keep intact all the
82 notices that refer to this License and to the absence of any warranty;
83 and give any other recipients of the Program a copy of this License
84 along with the Program.
85
86 You may charge a fee for the physical act of transferring a copy, and
87 you may at your option offer warranty protection in exchange for a fee.
88
89 2. You may modify your copy or copies of the Program or any portion
90 of it, thus forming a work based on the Program, and copy and
91 distribute such modifications or work under the terms of Section 1
92 above, provided that you also meet all of these conditions:
93
94 a) You must cause the modified files to carry prominent notices
95 stating that you changed the files and the date of any change.
96
97 b) You must cause any work that you distribute or publish, that in
98 whole or in part contains or is derived from the Program or any
99 part thereof, to be licensed as a whole at no charge to all third
100 parties under the terms of this License.
101
102 c) If the modified program normally reads commands interactively
103 when run, you must cause it, when started running for such
104 interactive use in the most ordinary way, to print or display an
105 announcement including an appropriate copyright notice and a
106 notice that there is no warranty (or else, saying that you provide
107 a warranty) and that users may redistribute the program under
108 these conditions, and telling the user how to view a copy of this
109 License. (Exception: if the Program itself is interactive but
110 does not normally print such an announcement, your work based on
111 the Program is not required to print an announcement.)
112
113 These requirements apply to the modified work as a whole. If
114 identifiable sections of that work are not derived from the Program,
115 and can be reasonably considered independent and separate works in
116 themselves, then this License, and its terms, do not apply to those
117 sections when you distribute them as separate works. But when you
118 distribute the same sections as part of a whole which is a work based
119 on the Program, the distribution of the whole must be on the terms of
120 this License, whose permissions for other licensees extend to the
121 entire whole, and thus to each and every part regardless of who wrote it.
122
123 Thus, it is not the intent of this section to claim rights or contest
124 your rights to work written entirely by you; rather, the intent is to
125 exercise the right to control the distribution of derivative or
126 collective works based on the Program.
127
128 In addition, mere aggregation of another work not based on the Program
129 with the Program (or with a work based on the Program) on a volume of
130 a storage or distribution medium does not bring the other work under
131 the scope of this License.
132
133 3. You may copy and distribute the Program (or a work based on it,
134 under Section 2) in object code or executable form under the terms of
135 Sections 1 and 2 above provided that you also do one of the following:
136
137 a) Accompany it with the complete corresponding machine-readable
138 source code, which must be distributed under the terms of Sections
139 1 and 2 above on a medium customarily used for software interchange; or,
140
141 b) Accompany it with a written offer, valid for at least three
142 years, to give any third party, for a charge no more than your
143 cost of physically performing source distribution, a complete
144 machine-readable copy of the corresponding source code, to be
145 distributed under the terms of Sections 1 and 2 above on a medium
146 customarily used for software interchange; or,
147
148 c) Accompany it with the information you received as to the offer
149 to distribute corresponding source code. (This alternative is
150 allowed only for noncommercial distribution and only if you
151 received the program in object code or executable form with such
152 an offer, in accord with Subsection b above.)
153
154 The source code for a work means the preferred form of the work for
155 making modifications to it. For an executable work, complete source
156 code means all the source code for all modules it contains, plus any
157 associated interface definition files, plus the scripts used to
158 control compilation and installation of the executable. However, as a
159 special exception, the source code distributed need not include
160 anything that is normally distributed (in either source or binary
161 form) with the major components (compiler, kernel, and so on) of the
162 operating system on which the executable runs, unless that component
163 itself accompanies the executable.
164
165 If distribution of executable or object code is made by offering
166 access to copy from a designated place, then offering equivalent
167 access to copy the source code from the same place counts as
168 distribution of the source code, even though third parties are not
169 compelled to copy the source along with the object code.
170
171 4. You may not copy, modify, sublicense, or distribute the Program
172 except as expressly provided under this License. Any attempt
173 otherwise to copy, modify, sublicense or distribute the Program is
174 void, and will automatically terminate your rights under this License.
175 However, parties who have received copies, or rights, from you under
176 this License will not have their licenses terminated so long as such
177 parties remain in full compliance.
178
179 5. You are not required to accept this License, since you have not
180 signed it. However, nothing else grants you permission to modify or
181 distribute the Program or its derivative works. These actions are
182 prohibited by law if you do not accept this License. Therefore, by
183 modifying or distributing the Program (or any work based on the
184 Program), you indicate your acceptance of this License to do so, and
185 all its terms and conditions for copying, distributing or modifying
186 the Program or works based on it.
187
188 6. Each time you redistribute the Program (or any work based on the
189 Program), the recipient automatically receives a license from the
190 original licensor to copy, distribute or modify the Program subject to
191 these terms and conditions. You may not impose any further
192 restrictions on the recipients' exercise of the rights granted herein.
193 You are not responsible for enforcing compliance by third parties to
194 this License.
195
196 7. If, as a consequence of a court judgment or allegation of patent
197 infringement or for any other reason (not limited to patent issues),
198 conditions are imposed on you (whether by court order, agreement or
199 otherwise) that contradict the conditions of this License, they do not
200 excuse you from the conditions of this License. If you cannot
201 distribute so as to satisfy simultaneously your obligations under this
202 License and any other pertinent obligations, then as a consequence you
203 may not distribute the Program at all. For example, if a patent
204 license would not permit royalty-free redistribution of the Program by
205 all those who receive copies directly or indirectly through you, then
206 the only way you could satisfy both it and this License would be to
207 refrain entirely from distribution of the Program.
208
209 If any portion of this section is held invalid or unenforceable under
210 any particular circumstance, the balance of the section is intended to
211 apply and the section as a whole is intended to apply in other
212 circumstances.
213
214 It is not the purpose of this section to induce you to infringe any
215 patents or other property right claims or to contest validity of any
216 such claims; this section has the sole purpose of protecting the
217 integrity of the free software distribution system, which is
218 implemented by public license practices. Many people have made
219 generous contributions to the wide range of software distributed
220 through that system in reliance on consistent application of that
221 system; it is up to the author/donor to decide if he or she is willing
222 to distribute software through any other system and a licensee cannot
223 impose that choice.
224
225 This section is intended to make thoroughly clear what is believed to
226 be a consequence of the rest of this License.
227
228 8. If the distribution and/or use of the Program is restricted in
229 certain countries either by patents or by copyrighted interfaces, the
230 original copyright holder who places the Program under this License
231 may add an explicit geographical distribution limitation excluding
232 those countries, so that distribution is permitted only in or among
233 countries not thus excluded. In such case, this License incorporates
234 the limitation as if written in the body of this License.
235
236 9. The Free Software Foundation may publish revised and/or new versions
237 of the General Public License from time to time. Such new versions will
238 be similar in spirit to the present version, but may differ in detail to
239 address new problems or concerns.
240
241 Each version is given a distinguishing version number. If the Program
242 specifies a version number of this License which applies to it and "any
243 later version", you have the option of following the terms and conditions
244 either of that version or of any later version published by the Free
245 Software Foundation. If the Program does not specify a version number of
246 this License, you may choose any version ever published by the Free Software
247 Foundation.
248
249 10. If you wish to incorporate parts of the Program into other free
250 programs whose distribution conditions are different, write to the author
251 to ask for permission. For software which is copyrighted by the Free
252 Software Foundation, write to the Free Software Foundation; we sometimes
253 make exceptions for this. Our decision will be guided by the two goals
254 of preserving the free status of all derivatives of our free software and
255 of promoting the sharing and reuse of software generally.
256
257 NO WARRANTY
258
259 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
260 FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
261 OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
262 PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
263 OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
264 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
265 TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
266 PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
267 REPAIR OR CORRECTION.
268
269 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
270 WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
271 REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
272 INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
273 OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
274 TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
275 YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
276 PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
277 POSSIBILITY OF SUCH DAMAGES.
278
279 END OF TERMS AND CONDITIONS
280
281 How to Apply These Terms to Your New Programs
282
283 If you develop a new program, and you want it to be of the greatest
284 possible use to the public, the best way to achieve this is to make it
285 free software which everyone can redistribute and change under these terms.
286
287 To do so, attach the following notices to the program. It is safest
288 to attach them to the start of each source file to most effectively
289 convey the exclusion of warranty; and each file should have at least
290 the "copyright" line and a pointer to where the full notice is found.
291
292 <one line to give the program's name and a brief idea of what it does.>
293 Copyright (C) <year> <name of author>
294
295 This program is free software; you can redistribute it and/or modify
296 it under the terms of the GNU General Public License as published by
297 the Free Software Foundation; either version 2 of the License, or
298 (at your option) any later version.
299
300 This program is distributed in the hope that it will be useful,
301 but WITHOUT ANY WARRANTY; without even the implied warranty of
302 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
303 GNU General Public License for more details.
304
305 You should have received a copy of the GNU General Public License along
306 with this program; if not, write to the Free Software Foundation, Inc.,
307 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
308
309 Also add information on how to contact you by electronic and paper mail.
310
311 If the program is interactive, make it output a short notice like this
312 when it starts in an interactive mode:
313
314 Gnomovision version 69, Copyright (C) year name of author
315 Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
316 This is free software, and you are welcome to redistribute it
317 under certain conditions; type `show c' for details.
318
319 The hypothetical commands `show w' and `show c' should show the appropriate
320 parts of the General Public License. Of course, the commands you use may
321 be called something other than `show w' and `show c'; they could even be
322 mouse-clicks or menu items--whatever suits your program.
323
324 You should also get your employer (if you work as a programmer) or your
325 school, if any, to sign a "copyright disclaimer" for the program, if
326 necessary. Here is a sample; alter the names:
327
328 Yoyodyne, Inc., hereby disclaims all copyright interest in the program
329 `Gnomovision' (which makes passes at compilers) written by James Hacker.
330
331 <signature of Ty Coon>, 1 April 1989
332 Ty Coon, President of Vice
333
334 This General Public License does not permit incorporating your program into
335 proprietary programs. If your program is a subroutine library, you may
336 consider it more useful to permit linking proprietary applications with the
337 library. If this is what you want to do, use the GNU Lesser General
338 Public License instead of this License.
0 GNU GENERAL PUBLIC LICENSE
1 Version 3, 29 June 2007
2
3 Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
4 Everyone is permitted to copy and distribute verbatim copies
5 of this license document, but changing it is not allowed.
6
7 Preamble
8
9 The GNU General Public License is a free, copyleft license for
10 software and other kinds of works.
11
12 The licenses for most software and other practical works are designed
13 to take away your freedom to share and change the works. By contrast,
14 the GNU General Public License is intended to guarantee your freedom to
15 share and change all versions of a program--to make sure it remains free
16 software for all its users. We, the Free Software Foundation, use the
17 GNU General Public License for most of our software; it applies also to
18 any other work released this way by its authors. You can apply it to
19 your programs, too.
20
21 When we speak of free software, we are referring to freedom, not
22 price. Our General Public Licenses are designed to make sure that you
23 have the freedom to distribute copies of free software (and charge for
24 them if you wish), that you receive source code or can get it if you
25 want it, that you can change the software or use pieces of it in new
26 free programs, and that you know you can do these things.
27
28 To protect your rights, we need to prevent others from denying you
29 these rights or asking you to surrender the rights. Therefore, you have
30 certain responsibilities if you distribute copies of the software, or if
31 you modify it: responsibilities to respect the freedom of others.
32
33 For example, if you distribute copies of such a program, whether
34 gratis or for a fee, you must pass on to the recipients the same
35 freedoms that you received. You must make sure that they, too, receive
36 or can get the source code. And you must show them these terms so they
37 know their rights.
38
39 Developers that use the GNU GPL protect your rights with two steps:
40 (1) assert copyright on the software, and (2) offer you this License
41 giving you legal permission to copy, distribute and/or modify it.
42
43 For the developers' and authors' protection, the GPL clearly explains
44 that there is no warranty for this free software. For both users' and
45 authors' sake, the GPL requires that modified versions be marked as
46 changed, so that their problems will not be attributed erroneously to
47 authors of previous versions.
48
49 Some devices are designed to deny users access to install or run
50 modified versions of the software inside them, although the manufacturer
51 can do so. This is fundamentally incompatible with the aim of
52 protecting users' freedom to change the software. The systematic
53 pattern of such abuse occurs in the area of products for individuals to
54 use, which is precisely where it is most unacceptable. Therefore, we
55 have designed this version of the GPL to prohibit the practice for those
56 products. If such problems arise substantially in other domains, we
57 stand ready to extend this provision to those domains in future versions
58 of the GPL, as needed to protect the freedom of users.
59
60 Finally, every program is threatened constantly by software patents.
61 States should not allow patents to restrict development and use of
62 software on general-purpose computers, but in those that do, we wish to
63 avoid the special danger that patents applied to a free program could
64 make it effectively proprietary. To prevent this, the GPL assures that
65 patents cannot be used to render the program non-free.
66
67 The precise terms and conditions for copying, distribution and
68 modification follow.
69
70 TERMS AND CONDITIONS
71
72 0. Definitions.
73
74 "This License" refers to version 3 of the GNU General Public License.
75
76 "Copyright" also means copyright-like laws that apply to other kinds of
77 works, such as semiconductor masks.
78
79 "The Program" refers to any copyrightable work licensed under this
80 License. Each licensee is addressed as "you". "Licensees" and
81 "recipients" may be individuals or organizations.
82
83 To "modify" a work means to copy from or adapt all or part of the work
84 in a fashion requiring copyright permission, other than the making of an
85 exact copy. The resulting work is called a "modified version" of the
86 earlier work or a work "based on" the earlier work.
87
88 A "covered work" means either the unmodified Program or a work based
89 on the Program.
90
91 To "propagate" a work means to do anything with it that, without
92 permission, would make you directly or secondarily liable for
93 infringement under applicable copyright law, except executing it on a
94 computer or modifying a private copy. Propagation includes copying,
95 distribution (with or without modification), making available to the
96 public, and in some countries other activities as well.
97
98 To "convey" a work means any kind of propagation that enables other
99 parties to make or receive copies. Mere interaction with a user through
100 a computer network, with no transfer of a copy, is not conveying.
101
102 An interactive user interface displays "Appropriate Legal Notices"
103 to the extent that it includes a convenient and prominently visible
104 feature that (1) displays an appropriate copyright notice, and (2)
105 tells the user that there is no warranty for the work (except to the
106 extent that warranties are provided), that licensees may convey the
107 work under this License, and how to view a copy of this License. If
108 the interface presents a list of user commands or options, such as a
109 menu, a prominent item in the list meets this criterion.
110
111 1. Source Code.
112
113 The "source code" for a work means the preferred form of the work
114 for making modifications to it. "Object code" means any non-source
115 form of a work.
116
117 A "Standard Interface" means an interface that either is an official
118 standard defined by a recognized standards body, or, in the case of
119 interfaces specified for a particular programming language, one that
120 is widely used among developers working in that language.
121
122 The "System Libraries" of an executable work include anything, other
123 than the work as a whole, that (a) is included in the normal form of
124 packaging a Major Component, but which is not part of that Major
125 Component, and (b) serves only to enable use of the work with that
126 Major Component, or to implement a Standard Interface for which an
127 implementation is available to the public in source code form. A
128 "Major Component", in this context, means a major essential component
129 (kernel, window system, and so on) of the specific operating system
130 (if any) on which the executable work runs, or a compiler used to
131 produce the work, or an object code interpreter used to run it.
132
133 The "Corresponding Source" for a work in object code form means all
134 the source code needed to generate, install, and (for an executable
135 work) run the object code and to modify the work, including scripts to
136 control those activities. However, it does not include the work's
137 System Libraries, or general-purpose tools or generally available free
138 programs which are used unmodified in performing those activities but
139 which are not part of the work. For example, Corresponding Source
140 includes interface definition files associated with source files for
141 the work, and the source code for shared libraries and dynamically
142 linked subprograms that the work is specifically designed to require,
143 such as by intimate data communication or control flow between those
144 subprograms and other parts of the work.
145
146 The Corresponding Source need not include anything that users
147 can regenerate automatically from other parts of the Corresponding
148 Source.
149
150 The Corresponding Source for a work in source code form is that
151 same work.
152
153 2. Basic Permissions.
154
155 All rights granted under this License are granted for the term of
156 copyright on the Program, and are irrevocable provided the stated
157 conditions are met. This License explicitly affirms your unlimited
158 permission to run the unmodified Program. The output from running a
159 covered work is covered by this License only if the output, given its
160 content, constitutes a covered work. This License acknowledges your
161 rights of fair use or other equivalent, as provided by copyright law.
162
163 You may make, run and propagate covered works that you do not
164 convey, without conditions so long as your license otherwise remains
165 in force. You may convey covered works to others for the sole purpose
166 of having them make modifications exclusively for you, or provide you
167 with facilities for running those works, provided that you comply with
168 the terms of this License in conveying all material for which you do
169 not control copyright. Those thus making or running the covered works
170 for you must do so exclusively on your behalf, under your direction
171 and control, on terms that prohibit them from making any copies of
172 your copyrighted material outside their relationship with you.
173
174 Conveying under any other circumstances is permitted solely under
175 the conditions stated below. Sublicensing is not allowed; section 10
176 makes it unnecessary.
177
178 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
179
180 No covered work shall be deemed part of an effective technological
181 measure under any applicable law fulfilling obligations under article
182 11 of the WIPO copyright treaty adopted on 20 December 1996, or
183 similar laws prohibiting or restricting circumvention of such
184 measures.
185
186 When you convey a covered work, you waive any legal power to forbid
187 circumvention of technological measures to the extent such circumvention
188 is effected by exercising rights under this License with respect to
189 the covered work, and you disclaim any intention to limit operation or
190 modification of the work as a means of enforcing, against the work's
191 users, your or third parties' legal rights to forbid circumvention of
192 technological measures.
193
194 4. Conveying Verbatim Copies.
195
196 You may convey verbatim copies of the Program's source code as you
197 receive it, in any medium, provided that you conspicuously and
198 appropriately publish on each copy an appropriate copyright notice;
199 keep intact all notices stating that this License and any
200 non-permissive terms added in accord with section 7 apply to the code;
201 keep intact all notices of the absence of any warranty; and give all
202 recipients a copy of this License along with the Program.
203
204 You may charge any price or no price for each copy that you convey,
205 and you may offer support or warranty protection for a fee.
206
207 5. Conveying Modified Source Versions.
208
209 You may convey a work based on the Program, or the modifications to
210 produce it from the Program, in the form of source code under the
211 terms of section 4, provided that you also meet all of these conditions:
212
213 a) The work must carry prominent notices stating that you modified
214 it, and giving a relevant date.
215
216 b) The work must carry prominent notices stating that it is
217 released under this License and any conditions added under section
218 7. This requirement modifies the requirement in section 4 to
219 "keep intact all notices".
220
221 c) You must license the entire work, as a whole, under this
222 License to anyone who comes into possession of a copy. This
223 License will therefore apply, along with any applicable section 7
224 additional terms, to the whole of the work, and all its parts,
225 regardless of how they are packaged. This License gives no
226 permission to license the work in any other way, but it does not
227 invalidate such permission if you have separately received it.
228
229 d) If the work has interactive user interfaces, each must display
230 Appropriate Legal Notices; however, if the Program has interactive
231 interfaces that do not display Appropriate Legal Notices, your
232 work need not make them do so.
233
234 A compilation of a covered work with other separate and independent
235 works, which are not by their nature extensions of the covered work,
236 and which are not combined with it such as to form a larger program,
237 in or on a volume of a storage or distribution medium, is called an
238 "aggregate" if the compilation and its resulting copyright are not
239 used to limit the access or legal rights of the compilation's users
240 beyond what the individual works permit. Inclusion of a covered work
241 in an aggregate does not cause this License to apply to the other
242 parts of the aggregate.
243
244 6. Conveying Non-Source Forms.
245
246 You may convey a covered work in object code form under the terms
247 of sections 4 and 5, provided that you also convey the
248 machine-readable Corresponding Source under the terms of this License,
249 in one of these ways:
250
251 a) Convey the object code in, or embodied in, a physical product
252 (including a physical distribution medium), accompanied by the
253 Corresponding Source fixed on a durable physical medium
254 customarily used for software interchange.
255
256 b) Convey the object code in, or embodied in, a physical product
257 (including a physical distribution medium), accompanied by a
258 written offer, valid for at least three years and valid for as
259 long as you offer spare parts or customer support for that product
260 model, to give anyone who possesses the object code either (1) a
261 copy of the Corresponding Source for all the software in the
262 product that is covered by this License, on a durable physical
263 medium customarily used for software interchange, for a price no
264 more than your reasonable cost of physically performing this
265 conveying of source, or (2) access to copy the
266 Corresponding Source from a network server at no charge.
267
268 c) Convey individual copies of the object code with a copy of the
269 written offer to provide the Corresponding Source. This
270 alternative is allowed only occasionally and noncommercially, and
271 only if you received the object code with such an offer, in accord
272 with subsection 6b.
273
274 d) Convey the object code by offering access from a designated
275 place (gratis or for a charge), and offer equivalent access to the
276 Corresponding Source in the same way through the same place at no
277 further charge. You need not require recipients to copy the
278 Corresponding Source along with the object code. If the place to
279 copy the object code is a network server, the Corresponding Source
280 may be on a different server (operated by you or a third party)
281 that supports equivalent copying facilities, provided you maintain
282 clear directions next to the object code saying where to find the
283 Corresponding Source. Regardless of what server hosts the
284 Corresponding Source, you remain obligated to ensure that it is
285 available for as long as needed to satisfy these requirements.
286
287 e) Convey the object code using peer-to-peer transmission, provided
288 you inform other peers where the object code and Corresponding
289 Source of the work are being offered to the general public at no
290 charge under subsection 6d.
291
292 A separable portion of the object code, whose source code is excluded
293 from the Corresponding Source as a System Library, need not be
294 included in conveying the object code work.
295
296 A "User Product" is either (1) a "consumer product", which means any
297 tangible personal property which is normally used for personal, family,
298 or household purposes, or (2) anything designed or sold for incorporation
299 into a dwelling. In determining whether a product is a consumer product,
300 doubtful cases shall be resolved in favor of coverage. For a particular
301 product received by a particular user, "normally used" refers to a
302 typical or common use of that class of product, regardless of the status
303 of the particular user or of the way in which the particular user
304 actually uses, or expects or is expected to use, the product. A product
305 is a consumer product regardless of whether the product has substantial
306 commercial, industrial or non-consumer uses, unless such uses represent
307 the only significant mode of use of the product.
308
309 "Installation Information" for a User Product means any methods,
310 procedures, authorization keys, or other information required to install
311 and execute modified versions of a covered work in that User Product from
312 a modified version of its Corresponding Source. The information must
313 suffice to ensure that the continued functioning of the modified object
314 code is in no case prevented or interfered with solely because
315 modification has been made.
316
317 If you convey an object code work under this section in, or with, or
318 specifically for use in, a User Product, and the conveying occurs as
319 part of a transaction in which the right of possession and use of the
320 User Product is transferred to the recipient in perpetuity or for a
321 fixed term (regardless of how the transaction is characterized), the
322 Corresponding Source conveyed under this section must be accompanied
323 by the Installation Information. But this requirement does not apply
324 if neither you nor any third party retains the ability to install
325 modified object code on the User Product (for example, the work has
326 been installed in ROM).
327
328 The requirement to provide Installation Information does not include a
329 requirement to continue to provide support service, warranty, or updates
330 for a work that has been modified or installed by the recipient, or for
331 the User Product in which it has been modified or installed. Access to a
332 network may be denied when the modification itself materially and
333 adversely affects the operation of the network or violates the rules and
334 protocols for communication across the network.
335
336 Corresponding Source conveyed, and Installation Information provided,
337 in accord with this section must be in a format that is publicly
338 documented (and with an implementation available to the public in
339 source code form), and must require no special password or key for
340 unpacking, reading or copying.
341
342 7. Additional Terms.
343
344 "Additional permissions" are terms that supplement the terms of this
345 License by making exceptions from one or more of its conditions.
346 Additional permissions that are applicable to the entire Program shall
347 be treated as though they were included in this License, to the extent
348 that they are valid under applicable law. If additional permissions
349 apply only to part of the Program, that part may be used separately
350 under those permissions, but the entire Program remains governed by
351 this License without regard to the additional permissions.
352
353 When you convey a copy of a covered work, you may at your option
354 remove any additional permissions from that copy, or from any part of
355 it. (Additional permissions may be written to require their own
356 removal in certain cases when you modify the work.) You may place
357 additional permissions on material, added by you to a covered work,
358 for which you have or can give appropriate copyright permission.
359
360 Notwithstanding any other provision of this License, for material you
361 add to a covered work, you may (if authorized by the copyright holders of
362 that material) supplement the terms of this License with terms:
363
364 a) Disclaiming warranty or limiting liability differently from the
365 terms of sections 15 and 16 of this License; or
366
367 b) Requiring preservation of specified reasonable legal notices or
368 author attributions in that material or in the Appropriate Legal
369 Notices displayed by works containing it; or
370
371 c) Prohibiting misrepresentation of the origin of that material, or
372 requiring that modified versions of such material be marked in
373 reasonable ways as different from the original version; or
374
375 d) Limiting the use for publicity purposes of names of licensors or
376 authors of the material; or
377
378 e) Declining to grant rights under trademark law for use of some
379 trade names, trademarks, or service marks; or
380
381 f) Requiring indemnification of licensors and authors of that
382 material by anyone who conveys the material (or modified versions of
383 it) with contractual assumptions of liability to the recipient, for
384 any liability that these contractual assumptions directly impose on
385 those licensors and authors.
386
387 All other non-permissive additional terms are considered "further
388 restrictions" within the meaning of section 10. If the Program as you
389 received it, or any part of it, contains a notice stating that it is
390 governed by this License along with a term that is a further
391 restriction, you may remove that term. If a license document contains
392 a further restriction but permits relicensing or conveying under this
393 License, you may add to a covered work material governed by the terms
394 of that license document, provided that the further restriction does
395 not survive such relicensing or conveying.
396
397 If you add terms to a covered work in accord with this section, you
398 must place, in the relevant source files, a statement of the
399 additional terms that apply to those files, or a notice indicating
400 where to find the applicable terms.
401
402 Additional terms, permissive or non-permissive, may be stated in the
403 form of a separately written license, or stated as exceptions;
404 the above requirements apply either way.
405
406 8. Termination.
407
408 You may not propagate or modify a covered work except as expressly
409 provided under this License. Any attempt otherwise to propagate or
410 modify it is void, and will automatically terminate your rights under
411 this License (including any patent licenses granted under the third
412 paragraph of section 11).
413
414 However, if you cease all violation of this License, then your
415 license from a particular copyright holder is reinstated (a)
416 provisionally, unless and until the copyright holder explicitly and
417 finally terminates your license, and (b) permanently, if the copyright
418 holder fails to notify you of the violation by some reasonable means
419 prior to 60 days after the cessation.
420
421 Moreover, your license from a particular copyright holder is
422 reinstated permanently if the copyright holder notifies you of the
423 violation by some reasonable means, this is the first time you have
424 received notice of violation of this License (for any work) from that
425 copyright holder, and you cure the violation prior to 30 days after
426 your receipt of the notice.
427
428 Termination of your rights under this section does not terminate the
429 licenses of parties who have received copies or rights from you under
430 this License. If your rights have been terminated and not permanently
431 reinstated, you do not qualify to receive new licenses for the same
432 material under section 10.
433
434 9. Acceptance Not Required for Having Copies.
435
436 You are not required to accept this License in order to receive or
437 run a copy of the Program. Ancillary propagation of a covered work
438 occurring solely as a consequence of using peer-to-peer transmission
439 to receive a copy likewise does not require acceptance. However,
440 nothing other than this License grants you permission to propagate or
441 modify any covered work. These actions infringe copyright if you do
442 not accept this License. Therefore, by modifying or propagating a
443 covered work, you indicate your acceptance of this License to do so.
444
445 10. Automatic Licensing of Downstream Recipients.
446
447 Each time you convey a covered work, the recipient automatically
448 receives a license from the original licensors, to run, modify and
449 propagate that work, subject to this License. You are not responsible
450 for enforcing compliance by third parties with this License.
451
452 An "entity transaction" is a transaction transferring control of an
453 organization, or substantially all assets of one, or subdividing an
454 organization, or merging organizations. If propagation of a covered
455 work results from an entity transaction, each party to that
456 transaction who receives a copy of the work also receives whatever
457 licenses to the work the party's predecessor in interest had or could
458 give under the previous paragraph, plus a right to possession of the
459 Corresponding Source of the work from the predecessor in interest, if
460 the predecessor has it or can get it with reasonable efforts.
461
462 You may not impose any further restrictions on the exercise of the
463 rights granted or affirmed under this License. For example, you may
464 not impose a license fee, royalty, or other charge for exercise of
465 rights granted under this License, and you may not initiate litigation
466 (including a cross-claim or counterclaim in a lawsuit) alleging that
467 any patent claim is infringed by making, using, selling, offering for
468 sale, or importing the Program or any portion of it.
469
470 11. Patents.
471
472 A "contributor" is a copyright holder who authorizes use under this
473 License of the Program or a work on which the Program is based. The
474 work thus licensed is called the contributor's "contributor version".
475
476 A contributor's "essential patent claims" are all patent claims
477 owned or controlled by the contributor, whether already acquired or
478 hereafter acquired, that would be infringed by some manner, permitted
479 by this License, of making, using, or selling its contributor version,
480 but do not include claims that would be infringed only as a
481 consequence of further modification of the contributor version. For
482 purposes of this definition, "control" includes the right to grant
483 patent sublicenses in a manner consistent with the requirements of
484 this License.
485
486 Each contributor grants you a non-exclusive, worldwide, royalty-free
487 patent license under the contributor's essential patent claims, to
488 make, use, sell, offer for sale, import and otherwise run, modify and
489 propagate the contents of its contributor version.
490
491 In the following three paragraphs, a "patent license" is any express
492 agreement or commitment, however denominated, not to enforce a patent
493 (such as an express permission to practice a patent or covenant not to
494 sue for patent infringement). To "grant" such a patent license to a
495 party means to make such an agreement or commitment not to enforce a
496 patent against the party.
497
498 If you convey a covered work, knowingly relying on a patent license,
499 and the Corresponding Source of the work is not available for anyone
500 to copy, free of charge and under the terms of this License, through a
501 publicly available network server or other readily accessible means,
502 then you must either (1) cause the Corresponding Source to be so
503 available, or (2) arrange to deprive yourself of the benefit of the
504 patent license for this particular work, or (3) arrange, in a manner
505 consistent with the requirements of this License, to extend the patent
506 license to downstream recipients. "Knowingly relying" means you have
507 actual knowledge that, but for the patent license, your conveying the
508 covered work in a country, or your recipient's use of the covered work
509 in a country, would infringe one or more identifiable patents in that
510 country that you have reason to believe are valid.
511
512 If, pursuant to or in connection with a single transaction or
513 arrangement, you convey, or propagate by procuring conveyance of, a
514 covered work, and grant a patent license to some of the parties
515 receiving the covered work authorizing them to use, propagate, modify
516 or convey a specific copy of the covered work, then the patent license
517 you grant is automatically extended to all recipients of the covered
518 work and works based on it.
519
520 A patent license is "discriminatory" if it does not include within
521 the scope of its coverage, prohibits the exercise of, or is
522 conditioned on the non-exercise of one or more of the rights that are
523 specifically granted under this License. You may not convey a covered
524 work if you are a party to an arrangement with a third party that is
525 in the business of distributing software, under which you make payment
526 to the third party based on the extent of your activity of conveying
527 the work, and under which the third party grants, to any of the
528 parties who would receive the covered work from you, a discriminatory
529 patent license (a) in connection with copies of the covered work
530 conveyed by you (or copies made from those copies), or (b) primarily
531 for and in connection with specific products or compilations that
532 contain the covered work, unless you entered into that arrangement,
533 or that patent license was granted, prior to 28 March 2007.
534
535 Nothing in this License shall be construed as excluding or limiting
536 any implied license or other defenses to infringement that may
537 otherwise be available to you under applicable patent law.
538
539 12. No Surrender of Others' Freedom.
540
541 If conditions are imposed on you (whether by court order, agreement or
542 otherwise) that contradict the conditions of this License, they do not
543 excuse you from the conditions of this License. If you cannot convey a
544 covered work so as to satisfy simultaneously your obligations under this
545 License and any other pertinent obligations, then as a consequence you may
546 not convey it at all. For example, if you agree to terms that obligate you
547 to collect a royalty for further conveying from those to whom you convey
548 the Program, the only way you could satisfy both those terms and this
549 License would be to refrain entirely from conveying the Program.
550
551 13. Use with the GNU Affero General Public License.
552
553 Notwithstanding any other provision of this License, you have
554 permission to link or combine any covered work with a work licensed
555 under version 3 of the GNU Affero General Public License into a single
556 combined work, and to convey the resulting work. The terms of this
557 License will continue to apply to the part which is the covered work,
558 but the special requirements of the GNU Affero General Public License,
559 section 13, concerning interaction through a network will apply to the
560 combination as such.
561
562 14. Revised Versions of this License.
563
564 The Free Software Foundation may publish revised and/or new versions of
565 the GNU General Public License from time to time. Such new versions will
566 be similar in spirit to the present version, but may differ in detail to
567 address new problems or concerns.
568
569 Each version is given a distinguishing version number. If the
570 Program specifies that a certain numbered version of the GNU General
571 Public License "or any later version" applies to it, you have the
572 option of following the terms and conditions either of that numbered
573 version or of any later version published by the Free Software
574 Foundation. If the Program does not specify a version number of the
575 GNU General Public License, you may choose any version ever published
576 by the Free Software Foundation.
577
578 If the Program specifies that a proxy can decide which future
579 versions of the GNU General Public License can be used, that proxy's
580 public statement of acceptance of a version permanently authorizes you
581 to choose that version for the Program.
582
583 Later license versions may give you additional or different
584 permissions. However, no additional obligations are imposed on any
585 author or copyright holder as a result of your choosing to follow a
586 later version.
587
588 15. Disclaimer of Warranty.
589
590 THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
591 APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
592 HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
593 OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
594 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
595 PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
596 IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
597 ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
598
599 16. Limitation of Liability.
600
601 IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
602 WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
603 THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
604 GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
605 USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
606 DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
607 PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
608 EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
609 SUCH DAMAGES.
610
611 17. Interpretation of Sections 15 and 16.
612
613 If the disclaimer of warranty and limitation of liability provided
614 above cannot be given local legal effect according to their terms,
615 reviewing courts shall apply local law that most closely approximates
616 an absolute waiver of all civil liability in connection with the
617 Program, unless a warranty or assumption of liability accompanies a
618 copy of the Program in return for a fee.
619
620 END OF TERMS AND CONDITIONS
621
622 How to Apply These Terms to Your New Programs
623
624 If you develop a new program, and you want it to be of the greatest
625 possible use to the public, the best way to achieve this is to make it
626 free software which everyone can redistribute and change under these terms.
627
628 To do so, attach the following notices to the program. It is safest
629 to attach them to the start of each source file to most effectively
630 state the exclusion of warranty; and each file should have at least
631 the "copyright" line and a pointer to where the full notice is found.
632
633 <one line to give the program's name and a brief idea of what it does.>
634 Copyright (C) <year> <name of author>
635
636 This program is free software: you can redistribute it and/or modify
637 it under the terms of the GNU General Public License as published by
638 the Free Software Foundation, either version 3 of the License, or
639 (at your option) any later version.
640
641 This program is distributed in the hope that it will be useful,
642 but WITHOUT ANY WARRANTY; without even the implied warranty of
643 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
644 GNU General Public License for more details.
645
646 You should have received a copy of the GNU General Public License
647 along with this program. If not, see <http://www.gnu.org/licenses/>.
648
649 Also add information on how to contact you by electronic and paper mail.
650
651 If the program does terminal interaction, make it output a short
652 notice like this when it starts in an interactive mode:
653
654 <program> Copyright (C) <year> <name of author>
655 This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
656 This is free software, and you are welcome to redistribute it
657 under certain conditions; type `show c' for details.
658
659 The hypothetical commands `show w' and `show c' should show the appropriate
660 parts of the General Public License. Of course, your program's commands
661 might be different; for a GUI interface, you would use an "about box".
662
663 You should also get your employer (if you work as a programmer) or school,
664 if any, to sign a "copyright disclaimer" for the program, if necessary.
665 For more information on this, and how to apply and follow the GNU GPL, see
666 <http://www.gnu.org/licenses/>.
667
668 The GNU General Public License does not permit incorporating your program
669 into proprietary programs. If your program is a subroutine library, you
670 may consider it more useful to permit linking proprietary applications with
671 the library. If this is what you want to do, use the GNU Lesser General
672 Public License instead of this License. But first, please read
673 <http://www.gnu.org/philosophy/why-not-lgpl.html>.
0 GNU GENERAL PUBLIC LICENSE
1 Version 3, 29 June 2007
2
3 Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
4 Everyone is permitted to copy and distribute verbatim copies
5 of this license document, but changing it is not allowed.
6
7 Preamble
8
9 The GNU General Public License is a free, copyleft license for
10 software and other kinds of works.
11
12 The licenses for most software and other practical works are designed
13 to take away your freedom to share and change the works. By contrast,
14 the GNU General Public License is intended to guarantee your freedom to
15 share and change all versions of a program--to make sure it remains free
16 software for all its users. We, the Free Software Foundation, use the
17 GNU General Public License for most of our software; it applies also to
18 any other work released this way by its authors. You can apply it to
19 your programs, too.
20
21 When we speak of free software, we are referring to freedom, not
22 price. Our General Public Licenses are designed to make sure that you
23 have the freedom to distribute copies of free software (and charge for
24 them if you wish), that you receive source code or can get it if you
25 want it, that you can change the software or use pieces of it in new
26 free programs, and that you know you can do these things.
27
28 To protect your rights, we need to prevent others from denying you
29 these rights or asking you to surrender the rights. Therefore, you have
30 certain responsibilities if you distribute copies of the software, or if
31 you modify it: responsibilities to respect the freedom of others.
32
33 For example, if you distribute copies of such a program, whether
34 gratis or for a fee, you must pass on to the recipients the same
35 freedoms that you received. You must make sure that they, too, receive
36 or can get the source code. And you must show them these terms so they
37 know their rights.
38
39 Developers that use the GNU GPL protect your rights with two steps:
40 (1) assert copyright on the software, and (2) offer you this License
41 giving you legal permission to copy, distribute and/or modify it.
42
43 For the developers' and authors' protection, the GPL clearly explains
44 that there is no warranty for this free software. For both users' and
45 authors' sake, the GPL requires that modified versions be marked as
46 changed, so that their problems will not be attributed erroneously to
47 authors of previous versions.
48
49 Some devices are designed to deny users access to install or run
50 modified versions of the software inside them, although the manufacturer
51 can do so. This is fundamentally incompatible with the aim of
52 protecting users' freedom to change the software. The systematic
53 pattern of such abuse occurs in the area of products for individuals to
54 use, which is precisely where it is most unacceptable. Therefore, we
55 have designed this version of the GPL to prohibit the practice for those
56 products. If such problems arise substantially in other domains, we
57 stand ready to extend this provision to those domains in future versions
58 of the GPL, as needed to protect the freedom of users.
59
60 Finally, every program is threatened constantly by software patents.
61 States should not allow patents to restrict development and use of
62 software on general-purpose computers, but in those that do, we wish to
63 avoid the special danger that patents applied to a free program could
64 make it effectively proprietary. To prevent this, the GPL assures that
65 patents cannot be used to render the program non-free.
66
67 The precise terms and conditions for copying, distribution and
68 modification follow.
69
70 TERMS AND CONDITIONS
71
72 0. Definitions.
73
74 "This License" refers to version 3 of the GNU General Public License.
75
76 "Copyright" also means copyright-like laws that apply to other kinds of
77 works, such as semiconductor masks.
78
79 "The Program" refers to any copyrightable work licensed under this
80 License. Each licensee is addressed as "you". "Licensees" and
81 "recipients" may be individuals or organizations.
82
83 To "modify" a work means to copy from or adapt all or part of the work
84 in a fashion requiring copyright permission, other than the making of an
85 exact copy. The resulting work is called a "modified version" of the
86 earlier work or a work "based on" the earlier work.
87
88 A "covered work" means either the unmodified Program or a work based
89 on the Program.
90
91 To "propagate" a work means to do anything with it that, without
92 permission, would make you directly or secondarily liable for
93 infringement under applicable copyright law, except executing it on a
94 computer or modifying a private copy. Propagation includes copying,
95 distribution (with or without modification), making available to the
96 public, and in some countries other activities as well.
97
98 To "convey" a work means any kind of propagation that enables other
99 parties to make or receive copies. Mere interaction with a user through
100 a computer network, with no transfer of a copy, is not conveying.
101
102 An interactive user interface displays "Appropriate Legal Notices"
103 to the extent that it includes a convenient and prominently visible
104 feature that (1) displays an appropriate copyright notice, and (2)
105 tells the user that there is no warranty for the work (except to the
106 extent that warranties are provided), that licensees may convey the
107 work under this License, and how to view a copy of this License. If
108 the interface presents a list of user commands or options, such as a
109 menu, a prominent item in the list meets this criterion.
110
111 1. Source Code.
112
113 The "source code" for a work means the preferred form of the work
114 for making modifications to it. "Object code" means any non-source
115 form of a work.
116
117 A "Standard Interface" means an interface that either is an official
118 standard defined by a recognized standards body, or, in the case of
119 interfaces specified for a particular programming language, one that
120 is widely used among developers working in that language.
121
122 The "System Libraries" of an executable work include anything, other
123 than the work as a whole, that (a) is included in the normal form of
124 packaging a Major Component, but which is not part of that Major
125 Component, and (b) serves only to enable use of the work with that
126 Major Component, or to implement a Standard Interface for which an
127 implementation is available to the public in source code form. A
128 "Major Component", in this context, means a major essential component
129 (kernel, window system, and so on) of the specific operating system
130 (if any) on which the executable work runs, or a compiler used to
131 produce the work, or an object code interpreter used to run it.
132
133 The "Corresponding Source" for a work in object code form means all
134 the source code needed to generate, install, and (for an executable
135 work) run the object code and to modify the work, including scripts to
136 control those activities. However, it does not include the work's
137 System Libraries, or general-purpose tools or generally available free
138 programs which are used unmodified in performing those activities but
139 which are not part of the work. For example, Corresponding Source
140 includes interface definition files associated with source files for
141 the work, and the source code for shared libraries and dynamically
142 linked subprograms that the work is specifically designed to require,
143 such as by intimate data communication or control flow between those
144 subprograms and other parts of the work.
145
146 The Corresponding Source need not include anything that users
147 can regenerate automatically from other parts of the Corresponding
148 Source.
149
150 The Corresponding Source for a work in source code form is that
151 same work.
152
153 2. Basic Permissions.
154
155 All rights granted under this License are granted for the term of
156 copyright on the Program, and are irrevocable provided the stated
157 conditions are met. This License explicitly affirms your unlimited
158 permission to run the unmodified Program. The output from running a
159 covered work is covered by this License only if the output, given its
160 content, constitutes a covered work. This License acknowledges your
161 rights of fair use or other equivalent, as provided by copyright law.
162
163 You may make, run and propagate covered works that you do not
164 convey, without conditions so long as your license otherwise remains
165 in force. You may convey covered works to others for the sole purpose
166 of having them make modifications exclusively for you, or provide you
167 with facilities for running those works, provided that you comply with
168 the terms of this License in conveying all material for which you do
169 not control copyright. Those thus making or running the covered works
170 for you must do so exclusively on your behalf, under your direction
171 and control, on terms that prohibit them from making any copies of
172 your copyrighted material outside their relationship with you.
173
174 Conveying under any other circumstances is permitted solely under
175 the conditions stated below. Sublicensing is not allowed; section 10
176 makes it unnecessary.
177
178 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
179
180 No covered work shall be deemed part of an effective technological
181 measure under any applicable law fulfilling obligations under article
182 11 of the WIPO copyright treaty adopted on 20 December 1996, or
183 similar laws prohibiting or restricting circumvention of such
184 measures.
185
186 When you convey a covered work, you waive any legal power to forbid
187 circumvention of technological measures to the extent such circumvention
188 is effected by exercising rights under this License with respect to
189 the covered work, and you disclaim any intention to limit operation or
190 modification of the work as a means of enforcing, against the work's
191 users, your or third parties' legal rights to forbid circumvention of
192 technological measures.
193
194 4. Conveying Verbatim Copies.
195
196 You may convey verbatim copies of the Program's source code as you
197 receive it, in any medium, provided that you conspicuously and
198 appropriately publish on each copy an appropriate copyright notice;
199 keep intact all notices stating that this License and any
200 non-permissive terms added in accord with section 7 apply to the code;
201 keep intact all notices of the absence of any warranty; and give all
202 recipients a copy of this License along with the Program.
203
204 You may charge any price or no price for each copy that you convey,
205 and you may offer support or warranty protection for a fee.
206
207 5. Conveying Modified Source Versions.
208
209 You may convey a work based on the Program, or the modifications to
210 produce it from the Program, in the form of source code under the
211 terms of section 4, provided that you also meet all of these conditions:
212
213 a) The work must carry prominent notices stating that you modified
214 it, and giving a relevant date.
215
216 b) The work must carry prominent notices stating that it is
217 released under this License and any conditions added under section
218 7. This requirement modifies the requirement in section 4 to
219 "keep intact all notices".
220
221 c) You must license the entire work, as a whole, under this
222 License to anyone who comes into possession of a copy. This
223 License will therefore apply, along with any applicable section 7
224 additional terms, to the whole of the work, and all its parts,
225 regardless of how they are packaged. This License gives no
226 permission to license the work in any other way, but it does not
227 invalidate such permission if you have separately received it.
228
229 d) If the work has interactive user interfaces, each must display
230 Appropriate Legal Notices; however, if the Program has interactive
231 interfaces that do not display Appropriate Legal Notices, your
232 work need not make them do so.
233
234 A compilation of a covered work with other separate and independent
235 works, which are not by their nature extensions of the covered work,
236 and which are not combined with it such as to form a larger program,
237 in or on a volume of a storage or distribution medium, is called an
238 "aggregate" if the compilation and its resulting copyright are not
239 used to limit the access or legal rights of the compilation's users
240 beyond what the individual works permit. Inclusion of a covered work
241 in an aggregate does not cause this License to apply to the other
242 parts of the aggregate.
243
244 6. Conveying Non-Source Forms.
245
246 You may convey a covered work in object code form under the terms
247 of sections 4 and 5, provided that you also convey the
248 machine-readable Corresponding Source under the terms of this License,
249 in one of these ways:
250
251 a) Convey the object code in, or embodied in, a physical product
252 (including a physical distribution medium), accompanied by the
253 Corresponding Source fixed on a durable physical medium
254 customarily used for software interchange.
255
256 b) Convey the object code in, or embodied in, a physical product
257 (including a physical distribution medium), accompanied by a
258 written offer, valid for at least three years and valid for as
259 long as you offer spare parts or customer support for that product
260 model, to give anyone who possesses the object code either (1) a
261 copy of the Corresponding Source for all the software in the
262 product that is covered by this License, on a durable physical
263 medium customarily used for software interchange, for a price no
264 more than your reasonable cost of physically performing this
265 conveying of source, or (2) access to copy the
266 Corresponding Source from a network server at no charge.
267
268 c) Convey individual copies of the object code with a copy of the
269 written offer to provide the Corresponding Source. This
270 alternative is allowed only occasionally and noncommercially, and
271 only if you received the object code with such an offer, in accord
272 with subsection 6b.
273
274 d) Convey the object code by offering access from a designated
275 place (gratis or for a charge), and offer equivalent access to the
276 Corresponding Source in the same way through the same place at no
277 further charge. You need not require recipients to copy the
278 Corresponding Source along with the object code. If the place to
279 copy the object code is a network server, the Corresponding Source
280 may be on a different server (operated by you or a third party)
281 that supports equivalent copying facilities, provided you maintain
282 clear directions next to the object code saying where to find the
283 Corresponding Source. Regardless of what server hosts the
284 Corresponding Source, you remain obligated to ensure that it is
285 available for as long as needed to satisfy these requirements.
286
287 e) Convey the object code using peer-to-peer transmission, provided
288 you inform other peers where the object code and Corresponding
289 Source of the work are being offered to the general public at no
290 charge under subsection 6d.
291
292 A separable portion of the object code, whose source code is excluded
293 from the Corresponding Source as a System Library, need not be
294 included in conveying the object code work.
295
296 A "User Product" is either (1) a "consumer product", which means any
297 tangible personal property which is normally used for personal, family,
298 or household purposes, or (2) anything designed or sold for incorporation
299 into a dwelling. In determining whether a product is a consumer product,
300 doubtful cases shall be resolved in favor of coverage. For a particular
301 product received by a particular user, "normally used" refers to a
302 typical or common use of that class of product, regardless of the status
303 of the particular user or of the way in which the particular user
304 actually uses, or expects or is expected to use, the product. A product
305 is a consumer product regardless of whether the product has substantial
306 commercial, industrial or non-consumer uses, unless such uses represent
307 the only significant mode of use of the product.
308
309 "Installation Information" for a User Product means any methods,
310 procedures, authorization keys, or other information required to install
311 and execute modified versions of a covered work in that User Product from
312 a modified version of its Corresponding Source. The information must
313 suffice to ensure that the continued functioning of the modified object
314 code is in no case prevented or interfered with solely because
315 modification has been made.
316
317 If you convey an object code work under this section in, or with, or
318 specifically for use in, a User Product, and the conveying occurs as
319 part of a transaction in which the right of possession and use of the
320 User Product is transferred to the recipient in perpetuity or for a
321 fixed term (regardless of how the transaction is characterized), the
322 Corresponding Source conveyed under this section must be accompanied
323 by the Installation Information. But this requirement does not apply
324 if neither you nor any third party retains the ability to install
325 modified object code on the User Product (for example, the work has
326 been installed in ROM).
327
328 The requirement to provide Installation Information does not include a
329 requirement to continue to provide support service, warranty, or updates
330 for a work that has been modified or installed by the recipient, or for
331 the User Product in which it has been modified or installed. Access to a
332 network may be denied when the modification itself materially and
333 adversely affects the operation of the network or violates the rules and
334 protocols for communication across the network.
335
336 Corresponding Source conveyed, and Installation Information provided,
337 in accord with this section must be in a format that is publicly
338 documented (and with an implementation available to the public in
339 source code form), and must require no special password or key for
340 unpacking, reading or copying.
341
342 7. Additional Terms.
343
344 "Additional permissions" are terms that supplement the terms of this
345 License by making exceptions from one or more of its conditions.
346 Additional permissions that are applicable to the entire Program shall
347 be treated as though they were included in this License, to the extent
348 that they are valid under applicable law. If additional permissions
349 apply only to part of the Program, that part may be used separately
350 under those permissions, but the entire Program remains governed by
351 this License without regard to the additional permissions.
352
353 When you convey a copy of a covered work, you may at your option
354 remove any additional permissions from that copy, or from any part of
355 it. (Additional permissions may be written to require their own
356 removal in certain cases when you modify the work.) You may place
357 additional permissions on material, added by you to a covered work,
358 for which you have or can give appropriate copyright permission.
359
360 Notwithstanding any other provision of this License, for material you
361 add to a covered work, you may (if authorized by the copyright holders of
362 that material) supplement the terms of this License with terms:
363
364 a) Disclaiming warranty or limiting liability differently from the
365 terms of sections 15 and 16 of this License; or
366
367 b) Requiring preservation of specified reasonable legal notices or
368 author attributions in that material or in the Appropriate Legal
369 Notices displayed by works containing it; or
370
371 c) Prohibiting misrepresentation of the origin of that material, or
372 requiring that modified versions of such material be marked in
373 reasonable ways as different from the original version; or
374
375 d) Limiting the use for publicity purposes of names of licensors or
376 authors of the material; or
377
378 e) Declining to grant rights under trademark law for use of some
379 trade names, trademarks, or service marks; or
380
381 f) Requiring indemnification of licensors and authors of that
382 material by anyone who conveys the material (or modified versions of
383 it) with contractual assumptions of liability to the recipient, for
384 any liability that these contractual assumptions directly impose on
385 those licensors and authors.
386
387 All other non-permissive additional terms are considered "further
388 restrictions" within the meaning of section 10. If the Program as you
389 received it, or any part of it, contains a notice stating that it is
390 governed by this License along with a term that is a further
391 restriction, you may remove that term. If a license document contains
392 a further restriction but permits relicensing or conveying under this
393 License, you may add to a covered work material governed by the terms
394 of that license document, provided that the further restriction does
395 not survive such relicensing or conveying.
396
397 If you add terms to a covered work in accord with this section, you
398 must place, in the relevant source files, a statement of the
399 additional terms that apply to those files, or a notice indicating
400 where to find the applicable terms.
401
402 Additional terms, permissive or non-permissive, may be stated in the
403 form of a separately written license, or stated as exceptions;
404 the above requirements apply either way.
405
406 8. Termination.
407
408 You may not propagate or modify a covered work except as expressly
409 provided under this License. Any attempt otherwise to propagate or
410 modify it is void, and will automatically terminate your rights under
411 this License (including any patent licenses granted under the third
412 paragraph of section 11).
413
414 However, if you cease all violation of this License, then your
415 license from a particular copyright holder is reinstated (a)
416 provisionally, unless and until the copyright holder explicitly and
417 finally terminates your license, and (b) permanently, if the copyright
418 holder fails to notify you of the violation by some reasonable means
419 prior to 60 days after the cessation.
420
421 Moreover, your license from a particular copyright holder is
422 reinstated permanently if the copyright holder notifies you of the
423 violation by some reasonable means, this is the first time you have
424 received notice of violation of this License (for any work) from that
425 copyright holder, and you cure the violation prior to 30 days after
426 your receipt of the notice.
427
428 Termination of your rights under this section does not terminate the
429 licenses of parties who have received copies or rights from you under
430 this License. If your rights have been terminated and not permanently
431 reinstated, you do not qualify to receive new licenses for the same
432 material under section 10.
433
434 9. Acceptance Not Required for Having Copies.
435
436 You are not required to accept this License in order to receive or
437 run a copy of the Program. Ancillary propagation of a covered work
438 occurring solely as a consequence of using peer-to-peer transmission
439 to receive a copy likewise does not require acceptance. However,
440 nothing other than this License grants you permission to propagate or
441 modify any covered work. These actions infringe copyright if you do
442 not accept this License. Therefore, by modifying or propagating a
443 covered work, you indicate your acceptance of this License to do so.
444
445 10. Automatic Licensing of Downstream Recipients.
446
447 Each time you convey a covered work, the recipient automatically
448 receives a license from the original licensors, to run, modify and
449 propagate that work, subject to this License. You are not responsible
450 for enforcing compliance by third parties with this License.
451
452 An "entity transaction" is a transaction transferring control of an
453 organization, or substantially all assets of one, or subdividing an
454 organization, or merging organizations. If propagation of a covered
455 work results from an entity transaction, each party to that
456 transaction who receives a copy of the work also receives whatever
457 licenses to the work the party's predecessor in interest had or could
458 give under the previous paragraph, plus a right to possession of the
459 Corresponding Source of the work from the predecessor in interest, if
460 the predecessor has it or can get it with reasonable efforts.
461
462 You may not impose any further restrictions on the exercise of the
463 rights granted or affirmed under this License. For example, you may
464 not impose a license fee, royalty, or other charge for exercise of
465 rights granted under this License, and you may not initiate litigation
466 (including a cross-claim or counterclaim in a lawsuit) alleging that
467 any patent claim is infringed by making, using, selling, offering for
468 sale, or importing the Program or any portion of it.
469
470 11. Patents.
471
472 A "contributor" is a copyright holder who authorizes use under this
473 License of the Program or a work on which the Program is based. The
474 work thus licensed is called the contributor's "contributor version".
475
476 A contributor's "essential patent claims" are all patent claims
477 owned or controlled by the contributor, whether already acquired or
478 hereafter acquired, that would be infringed by some manner, permitted
479 by this License, of making, using, or selling its contributor version,
480 but do not include claims that would be infringed only as a
481 consequence of further modification of the contributor version. For
482 purposes of this definition, "control" includes the right to grant
483 patent sublicenses in a manner consistent with the requirements of
484 this License.
485
486 Each contributor grants you a non-exclusive, worldwide, royalty-free
487 patent license under the contributor's essential patent claims, to
488 make, use, sell, offer for sale, import and otherwise run, modify and
489 propagate the contents of its contributor version.
490
491 In the following three paragraphs, a "patent license" is any express
492 agreement or commitment, however denominated, not to enforce a patent
493 (such as an express permission to practice a patent or covenant not to
494 sue for patent infringement). To "grant" such a patent license to a
495 party means to make such an agreement or commitment not to enforce a
496 patent against the party.
497
498 If you convey a covered work, knowingly relying on a patent license,
499 and the Corresponding Source of the work is not available for anyone
500 to copy, free of charge and under the terms of this License, through a
501 publicly available network server or other readily accessible means,
502 then you must either (1) cause the Corresponding Source to be so
503 available, or (2) arrange to deprive yourself of the benefit of the
504 patent license for this particular work, or (3) arrange, in a manner
505 consistent with the requirements of this License, to extend the patent
506 license to downstream recipients. "Knowingly relying" means you have
507 actual knowledge that, but for the patent license, your conveying the
508 covered work in a country, or your recipient's use of the covered work
509 in a country, would infringe one or more identifiable patents in that
510 country that you have reason to believe are valid.
511
512 If, pursuant to or in connection with a single transaction or
513 arrangement, you convey, or propagate by procuring conveyance of, a
514 covered work, and grant a patent license to some of the parties
515 receiving the covered work authorizing them to use, propagate, modify
516 or convey a specific copy of the covered work, then the patent license
517 you grant is automatically extended to all recipients of the covered
518 work and works based on it.
519
520 A patent license is "discriminatory" if it does not include within
521 the scope of its coverage, prohibits the exercise of, or is
522 conditioned on the non-exercise of one or more of the rights that are
523 specifically granted under this License. You may not convey a covered
524 work if you are a party to an arrangement with a third party that is
525 in the business of distributing software, under which you make payment
526 to the third party based on the extent of your activity of conveying
527 the work, and under which the third party grants, to any of the
528 parties who would receive the covered work from you, a discriminatory
529 patent license (a) in connection with copies of the covered work
530 conveyed by you (or copies made from those copies), or (b) primarily
531 for and in connection with specific products or compilations that
532 contain the covered work, unless you entered into that arrangement,
533 or that patent license was granted, prior to 28 March 2007.
534
535 Nothing in this License shall be construed as excluding or limiting
536 any implied license or other defenses to infringement that may
537 otherwise be available to you under applicable patent law.
538
539 12. No Surrender of Others' Freedom.
540
541 If conditions are imposed on you (whether by court order, agreement or
542 otherwise) that contradict the conditions of this License, they do not
543 excuse you from the conditions of this License. If you cannot convey a
544 covered work so as to satisfy simultaneously your obligations under this
545 License and any other pertinent obligations, then as a consequence you may
546 not convey it at all. For example, if you agree to terms that obligate you
547 to collect a royalty for further conveying from those to whom you convey
548 the Program, the only way you could satisfy both those terms and this
549 License would be to refrain entirely from conveying the Program.
550
551 13. Use with the GNU Affero General Public License.
552
553 Notwithstanding any other provision of this License, you have
554 permission to link or combine any covered work with a work licensed
555 under version 3 of the GNU Affero General Public License into a single
556 combined work, and to convey the resulting work. The terms of this
557 License will continue to apply to the part which is the covered work,
558 but the special requirements of the GNU Affero General Public License,
559 section 13, concerning interaction through a network will apply to the
560 combination as such.
561
562 14. Revised Versions of this License.
563
564 The Free Software Foundation may publish revised and/or new versions of
565 the GNU General Public License from time to time. Such new versions will
566 be similar in spirit to the present version, but may differ in detail to
567 address new problems or concerns.
568
569 Each version is given a distinguishing version number. If the
570 Program specifies that a certain numbered version of the GNU General
571 Public License "or any later version" applies to it, you have the
572 option of following the terms and conditions either of that numbered
573 version or of any later version published by the Free Software
574 Foundation. If the Program does not specify a version number of the
575 GNU General Public License, you may choose any version ever published
576 by the Free Software Foundation.
577
578 If the Program specifies that a proxy can decide which future
579 versions of the GNU General Public License can be used, that proxy's
580 public statement of acceptance of a version permanently authorizes you
581 to choose that version for the Program.
582
583 Later license versions may give you additional or different
584 permissions. However, no additional obligations are imposed on any
585 author or copyright holder as a result of your choosing to follow a
586 later version.
587
588 15. Disclaimer of Warranty.
589
590 THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
591 APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
592 HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
593 OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
594 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
595 PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
596 IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
597 ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
598
599 16. Limitation of Liability.
600
601 IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
602 WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
603 THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
604 GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
605 USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
606 DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
607 PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
608 EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
609 SUCH DAMAGES.
610
611 17. Interpretation of Sections 15 and 16.
612
613 If the disclaimer of warranty and limitation of liability provided
614 above cannot be given local legal effect according to their terms,
615 reviewing courts shall apply local law that most closely approximates
616 an absolute waiver of all civil liability in connection with the
617 Program, unless a warranty or assumption of liability accompanies a
618 copy of the Program in return for a fee.
619
620 END OF TERMS AND CONDITIONS
621
622 How to Apply These Terms to Your New Programs
623
624 If you develop a new program, and you want it to be of the greatest
625 possible use to the public, the best way to achieve this is to make it
626 free software which everyone can redistribute and change under these terms.
627
628 To do so, attach the following notices to the program. It is safest
629 to attach them to the start of each source file to most effectively
630 state the exclusion of warranty; and each file should have at least
631 the "copyright" line and a pointer to where the full notice is found.
632
633 <one line to give the program's name and a brief idea of what it does.>
634 Copyright (C) <year> <name of author>
635
636 This program is free software: you can redistribute it and/or modify
637 it under the terms of the GNU General Public License as published by
638 the Free Software Foundation, either version 3 of the License, or
639 (at your option) any later version.
640
641 This program is distributed in the hope that it will be useful,
642 but WITHOUT ANY WARRANTY; without even the implied warranty of
643 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
644 GNU General Public License for more details.
645
646 You should have received a copy of the GNU General Public License
647 along with this program. If not, see <http://www.gnu.org/licenses/>.
648
649 Also add information on how to contact you by electronic and paper mail.
650
651 If the program does terminal interaction, make it output a short
652 notice like this when it starts in an interactive mode:
653
654 <program> Copyright (C) <year> <name of author>
655 This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
656 This is free software, and you are welcome to redistribute it
657 under certain conditions; type `show c' for details.
658
659 The hypothetical commands `show w' and `show c' should show the appropriate
660 parts of the General Public License. Of course, your program's commands
661 might be different; for a GUI interface, you would use an "about box".
662
663 You should also get your employer (if you work as a programmer) or school,
664 if any, to sign a "copyright disclaimer" for the program, if necessary.
665 For more information on this, and how to apply and follow the GNU GPL, see
666 <http://www.gnu.org/licenses/>.
667
668 The GNU General Public License does not permit incorporating your program
669 into proprietary programs. If your program is a subroutine library, you
670 may consider it more useful to permit linking proprietary applications with
671 the library. If this is what you want to do, use the GNU Lesser General
672 Public License instead of this License. But first, please read
673 <http://www.gnu.org/philosophy/why-not-lgpl.html>.
0 # FastQC
1 FastQC is a program designed to spot potential problems in high througput sequencing datasets. It runs a set of analyses on one or more raw sequence files in fastq or bam format and produces a report which summarises the results.
2
3 ![FastQC Screenshot](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc.png)
4
5 FastQC will highlight any areas where this library looks unusual and where you should take a closer look. The program is not tied to any specific type of sequencing technique and can be used to look at libraries coming from a large number of different experiment types (Genomic Sequencing, ChIP-Seq, RNA-Seq, BS-Seq etc etc).
6
7 This project page contains the source code for the application and is only really useful only to people wanting to develop new functionality or trace bugs in FastQC. If you just want to run the program then you want to go to the [**project web page**](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) where you can download the compiled pacakges for Windows, OSX and Linux.
0 FastQC - A Quality Control application for FastQ files
1 ------------------------------------------------------
2
3 Most high throughput sequencers generate output in FastQ format. This
4 format combines the base calls for the sequence which was generated with
5 an encoded quality value for each base which says how confident the
6 sequencer was that the base call generated was correct.
7
8 Before proceeding with the analysis of a sequence data set it is
9 a good idea to do some basic quality control checks on the raw data
10 to ensure that there are no hidden problems which might be more
11 difficult to detect at a later stage.
12
13 FastQC is an application which takes a FastQ file and runs a series
14 of tests on it to generate a comprehensive QC report. This will
15 tell you if there is anything unusual about your sequence. Each
16 test is flagged as a pass, warning or fail depending on how far it
17 departs from what you'd expect from a normal large dataset with no
18 significant biases. It's important to stress that warnings or even
19 failures do not necessarily mean that there is a problem with your
20 data, only that it is unusual. It is possible that the biological
21 nature of your sample means that you would expect this particular
22 bias in your results.
23
24 FastQC can be run either as an interactive graphical application
25 which allows you to view results for multiple files in a single
26 application. Alternatively you can run the program in a non
27 interactive way (say as part of a pipeline) which will generate
28 an HTML report for each file you process.
29
30 FastQC is a cross-platform application, written in java. In theory it
31 should run on any platform which has a suitable java runtime environment.
32 Having said that we've only tested in on Windows, MacOSX and Linux
33 running the Oracle v1.6 to 1.8 JREs. Please let us know what happened if
34 you try running it on other platforms / JREs. Please see the detailed
35 instructions in the INSTALL.txt document to tell you how to get a
36 suitable java version to run FastQC on your system.
37
38 If you have any comments about FastQC we would like to hear them. You
39 can either enter them in our bug tracking system at:
40
41 http://www.bioinformatics.babraham.ac.uk/bugzilla/
42
43 ..or send them directly to simon.andrews@babraham.ac.uk.
0 FastQC - A Quality Control application for FastQ files
1 ------------------------------------------------------
2
3 Most high throughput sequencers generate output in FastQ format. This
4 format combines the base calls for the sequence which was generated with
5 an encoded quality value for each base which says how confident the
6 sequencer was that the base call generated was correct.
7
8 Before proceeding with the analysis of a sequence data set it is
9 a good idea to do some basic quality control checks on the raw data
10 to ensure that there are no hidden problems which might be more
11 difficult to detect at a later stage.
12
13 FastQC is an application which takes a FastQ file and runs a series
14 of tests on it to generate a comprehensive QC report. This will
15 tell you if there is anything unusual about your sequence. Each
16 test is flagged as a pass, warning or fail depending on how far it
17 departs from what you'd expect from a normal large dataset with no
18 significant biases. It's important to stress that warnings or even
19 failures do not necessarily mean that there is a problem with your
20 data, only that it is unusual. It is possible that the biological
21 nature of your sample means that you would expect this particular
22 bias in your results.
23
24 FastQC can be run either as an interactive graphical application
25 which allows you to view results for multiple files in a single
26 application. Alternatively you can run the program in a non
27 interactive way (say as part of a pipeline) which will generate
28 an HTML report for each file you process.
29
30 FastQC is a cross-platform application, written in java. In theory it
31 should run on any platform which has a suitable java runtime environment.
32 Having said that we've only tested in on Windows, MacOSX and Linux
33 running the Oracle v1.6 to 1.8 JREs. Please let us know what happened if
34 you try running it on other platforms / JREs. Please see the detailed
35 instructions in the INSTALL.txt document to tell you how to get a
36 suitable java version to run FastQC on your system.
37
38 If you have any comments about FastQC we would like to hear them. You
39 can either enter them into the github bug tracker at:
40
41 https://github.com/s-andrews/FastQC/issues/
42
43 ..or send them directly to simon.andrews@babraham.ac.uk.
0 RELEASE NOTES FOR FastQC v0.11.7
1 --------------------------------
2
3 This is a bugfix release for a bug introduced in 0.11.6. Specifically
4 this version would crash if the first sequence in a file was <12bp
5 (or less than the length of the longest adapter if a custom adapters
6 file was being used).
7
8
9 RELEASE NOTES FOR FastQC v0.11.6
10 --------------------------------
11
12 This update fixes some bugs and updates some of the functionality to
13 accommodate changes in some of the sequencing platforms.
14
15 There is one major change which is that by default we now disable the
16 kmer module. With the inclusion of the adapter plot the value of the
17 information in the Kmer plot is often not great, and it is easy to
18 confound it if there are any over-represented sequences, or primer
19 compositional bias. Overall therefore we consider it best to not
20 routinely include this module.
21
22 If you want to turn this module back on, then simply edit the
23 limits.txt file in the Configuration folder of the FastQC installation
24 and change the line near the top which says:
25
26 kmer ignore 1
27
28 ..to..
29
30 kmer ignore 0
31
32 ..and the module will be re-enabled.
33
34 Other changes in this release are:
35
36 1) Fixed a bug which prematurely abandoned the adapter content plot
37 when long custom adapters were being used.
38
39 2) Changed the cutoff for the maximum number of tiles to allow for
40 the novaseq which has lots of them.
41
42 3) Fixed a bug in the parsing of tile numbers on some illumina
43 sequencers
44
45 4) Added some new Clontech sequences to the contaminants list.
46
47 5) Made the --nanopore option work with the new multi-folder ONT
48 folder structure
49
50 6) Added an option to specify a file name when streaming data into
51 FastQC
52
53 7) Added new RDF paths to check for fastq data in nanopore fast5 files
54
55 8) Fix parsing of newer nanopore base names to correctly collate sequences
56
57 9) Fixed a typo in the documentation for the per tile plot documentation
58
59 10) Added a --min-length option to ignore short sequences making it easier
60 to generate directly comparable statistics between runs.
61
62
063 RELEASE NOTES FOR FastQC v0.11.5
164 --------------------------------
265
0
1 @media screen {
2 div.summary {
3 width: 18em;
4 position:fixed;
5 top: 3em;
6 margin:1em 0 0 1em;
7 }
8
9 div.main {
10 display:block;
11 position:absolute;
12 overflow:auto;
13 height:auto;
14 width:auto;
15 top:4.5em;
16 bottom:2.3em;
17 left:18em;
18 right:0;
19 border-left: 1px solid #CCC;
20 padding:0 0 0 1em;
21 background-color: white;
22 z-index:1;
23 }
24
25 div.header {
26 background-color: #EEE;
27 border:0;
28 margin:0;
29 padding: 0.5em;
30 font-size: 200%;
31 font-weight: bold;
32 position:fixed;
33 width:100%;
34 top:0;
35 left:0;
36 z-index:2;
37 }
38
39 div.footer {
40 background-color: #EEE;
41 border:0;
42 margin:0;
43 padding:0.5em;
44 height: 1.3em;
45 overflow:hidden;
46 font-size: 100%;
47 font-weight: bold;
48 position:fixed;
49 bottom:0;
50 width:100%;
51 z-index:2;
52 }
53
54 img.indented {
55 margin-left: 3em;
56 }
57 }
58
59 @media print {
60 img {
61 max-width:100% !important;
62 page-break-inside: avoid;
63 }
64 h2, h3 {
65 page-break-after: avoid;
66 }
67 div.header {
68 background-color: #FFF;
69 }
70
71 }
72
73 body {
74 font-family: sans-serif;
75 color: #000;
76 background-color: #FFF;
77 border: 0;
78 margin: 0;
79 padding: 0;
80 }
81
82 div.header {
83 border:0;
84 margin:0;
85 padding: 0.5em;
86 font-size: 200%;
87 font-weight: bold;
88 width:100%;
89 }
90
91 #header_title {
92 display:inline-block;
93 float:left;
94 clear:left;
95 }
96 #header_filename {
97 display:inline-block;
98 float:right;
99 clear:right;
100 font-size: 50%;
101 margin-right:2em;
102 text-align: right;
103 }
104
105 div.header h3 {
106 font-size: 50%;
107 margin-bottom: 0;
108 }
109
110 div.summary ul {
111 padding-left:0;
112 list-style-type:none;
113 }
114
115 div.summary ul li img {
116 margin-bottom:-0.5em;
117 margin-top:0.5em;
118 }
119
120 div.main {
121 background-color: white;
122 }
123
124 div.module {
125 padding-bottom:1.5em;
126 padding-top:1.5em;
127 }
128
129 div.footer {
130 background-color: #EEE;
131 border:0;
132 margin:0;
133 padding: 0.5em;
134 font-size: 100%;
135 font-weight: bold;
136 width:100%;
137 }
138
139
140 a {
141 color: #000080;
142 }
143
144 a:hover {
145 color: #800000;
146 }
147
148 h2 {
149 color: #800000;
150 padding-bottom: 0;
151 margin-bottom: 0;
152 clear:left;
153 }
154
155 table {
156 margin-left: 3em;
157 text-align: center;
158 }
159
160 th {
161 text-align: center;
162 background-color: #000080;
163 color: #FFF;
164 padding: 0.4em;
165 }
166
167 td {
168 font-family: monospace;
169 text-align: left;
170 background-color: #EEE;
171 color: #000;
172 padding: 0.4em;
173 }
174
175 img {
176 padding-top: 0;
177 margin-top: 0;
178 border-top: 0;
179 }
180
181
182 p {
183 padding-top: 0;
184 margin-top: 0;
185 }
0
1 @media screen {
2 div.summary {
3 width: 18em;
4 position:fixed;
5 top: 3em;
6 margin:1em 0 0 1em;
7 }
8
9 div.main {
10 display:block;
11 position:absolute;
12 overflow:auto;
13 height:auto;
14 width:auto;
15 top:4.5em;
16 bottom:2.3em;
17 left:18em;
18 right:0;
19 border-left: 1px solid #CCC;
20 padding:0 0 0 1em;
21 background-color: white;
22 z-index:1;
23 }
24
25 div.header {
26 background-color: #EEE;
27 border:0;
28 margin:0;
29 padding: 0.5em;
30 font-size: 200%;
31 font-weight: bold;
32 position:fixed;
33 width:100%;
34 top:0;
35 left:0;
36 z-index:2;
37 }
38
39 div.footer {
40 background-color: #EEE;
41 border:0;
42 margin:0;
43 padding:0.5em;
44 height: 1.3em;
45 overflow:hidden;
46 font-size: 100%;
47 font-weight: bold;
48 position:fixed;
49 bottom:0;
50 width:100%;
51 z-index:2;
52 }
53
54 img.indented {
55 margin-left: 3em;
56 }
57 }
58
59 @media print {
60 img {
61 max-width:100% !important;
62 page-break-inside: avoid;
63 }
64 h2, h3 {
65 page-break-after: avoid;
66 }
67 div.header {
68 background-color: #FFF;
69 }
70
71 }
72
73 body {
74 font-family: sans-serif;
75 color: #000;
76 background-color: #FFF;
77 border: 0;
78 margin: 0;
79 padding: 0;
80 }
81
82 div.header {
83 border:0;
84 margin:0;
85 padding: 0.5em;
86 font-size: 200%;
87 font-weight: bold;
88 width:100%;
89 }
90
91 #header_title {
92 display:inline-block;
93 float:left;
94 clear:left;
95 }
96 #header_filename {
97 display:inline-block;
98 float:right;
99 clear:right;
100 font-size: 50%;
101 margin-right:2em;
102 text-align: right;
103 }
104
105 div.header h3 {
106 font-size: 50%;
107 margin-bottom: 0;
108 }
109
110 div.summary ul {
111 padding-left:0;
112 list-style-type:none;
113 }
114
115 div.summary ul li img {
116 margin-bottom:-0.5em;
117 margin-top:0.5em;
118 }
119
120 div.main {
121 background-color: white;
122 }
123
124 div.module {
125 padding-bottom:1.5em;
126 padding-top:1.5em;
127 }
128
129 div.footer {
130 background-color: #EEE;
131 border:0;
132 margin:0;
133 padding: 0.5em;
134 font-size: 100%;
135 font-weight: bold;
136 width:100%;
137 }
138
139
140 a {
141 color: #000080;
142 }
143
144 a:hover {
145 color: #800000;
146 }
147
148 h2 {
149 color: #800000;
150 padding-bottom: 0;
151 margin-bottom: 0;
152 clear:left;
153 }
154
155 table {
156 margin-left: 3em;
157 text-align: center;
158 }
159
160 th {
161 text-align: center;
162 background-color: #000080;
163 color: #FFF;
164 padding: 0.4em;
165 }
166
167 td {
168 font-family: monospace;
169 text-align: left;
170 background-color: #EEE;
171 color: #000;
172 padding: 0.4em;
173 }
174
175 img {
176 padding-top: 0;
177 margin-top: 0;
178 border-top: 0;
179 }
180
181
182 p {
183 padding-top: 0;
184 margin-top: 0;
185 }
44 use Getopt::Long;
55
66 #####################################################################################
7 # Copyright Copyright 2010-15 Simon Andrews #
7 # Copyright Copyright 2010-17 Simon Andrews #
88 # #
99 # This file is part of FastQC. #
1010 # #
7979 my $kmer_size;
8080 my $temp_directory;
8181 my $java_bin = 'java';
82 my $min_length;
8283
8384 my $result = GetOptions('version' => \$version,
8485 'help' => \$help,
9899 'limits=s' => \$limits,
99100 'dir=s' => \$temp_directory,
100101 'java=s' => \$java_bin,
102 'min_length=i' => \$min_length,
101103 );
102104
103105 # Check the simple stuff first
148150 }
149151 push @java_args, "-Djava.io.tmpdir=$temp_directory";
150152
153 }
154
155 if ($min_length) {
156 push @java_args ,"-Dfastqc.min_length=$min_length";
151157 }
152158
153159 if ($threads) {
347353 really long reads, and your plots may end up a ridiculous size.
348354 You have been warned!
349355
356 --min_length Sets an artificial lower limit on the length of the sequence
357 to be shown in the report. As long as you set this to a value
358 greater or equal to your longest read length then this will be
359 the sequence length used to create your read groups. This can
360 be useful for making directly comaparable statistics from
361 datasets with somewhat variable read lengths.
362
350363 -f --format Bypasses the normal sequence file format detection and
351364 forces the program to use the specified format. Valid
352365 formats are bam,sam,bam_mapped,sam_mapped and fastq
0 java -Xmx250m -classpath .;./sam-1.103.jar;./jbzip2-0.9.jar uk.ac.babraham.FastQC.FastQCApplication
0 java -Xmx250m -classpath .;./sam-1.103.jar;./jbzip2-0.9.jar uk.ac.babraham.FastQC.FastQCApplication
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Analysis;
20
21 import uk.ac.babraham.FastQC.Modules.QCModule;
22 import uk.ac.babraham.FastQC.Sequence.SequenceFile;
23
24 public interface AnalysisListener {
25
26 public void analysisStarted(SequenceFile file);
27 public void analysisUpdated(SequenceFile file, int sequencesProcessed, int percentComplete);
28 public void analysisComplete(SequenceFile file, QCModule [] results);
29 public void analysisExceptionReceived(SequenceFile file, Exception e);
30 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Analysis;
20
21 import uk.ac.babraham.FastQC.Modules.QCModule;
22 import uk.ac.babraham.FastQC.Sequence.SequenceFile;
23
24 public interface AnalysisListener {
25
26 public void analysisStarted(SequenceFile file);
27 public void analysisUpdated(SequenceFile file, int sequencesProcessed, int percentComplete);
28 public void analysisComplete(SequenceFile file, QCModule [] results);
29 public void analysisExceptionReceived(SequenceFile file, Exception e);
30 }
00 /**
1 * Copyright Copyright 2010-15 Simon Andrews
1 * Copyright Copyright 2010-17 Simon Andrews
22 *
33 * This file is part of FastQC.
44 *
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Analysis;
20
21 import java.util.ArrayList;
22 import java.util.Iterator;
23 import java.util.List;
24
25 import uk.ac.babraham.FastQC.Modules.QCModule;
26 import uk.ac.babraham.FastQC.Sequence.Sequence;
27 import uk.ac.babraham.FastQC.Sequence.SequenceFile;
28 import uk.ac.babraham.FastQC.Sequence.SequenceFormatException;
29
30 public class AnalysisRunner implements Runnable {
31
32 private SequenceFile file;
33 private QCModule [] modules;
34 private List<AnalysisListener> listeners = new ArrayList<AnalysisListener>();
35 private int percentComplete = 0;
36
37 public AnalysisRunner (SequenceFile file) {
38 this.file = file;
39 }
40
41 public void addAnalysisListener (AnalysisListener l) {
42 if (l != null && !listeners.contains(l)) {
43 listeners.add(l);
44 }
45 }
46
47 public void removeAnalysisListener (AnalysisListener l) {
48 if (l != null && listeners.contains(l)) {
49 listeners.remove(l);
50 }
51 }
52
53
54 public void startAnalysis (QCModule [] modules) {
55 this.modules = modules;
56 for (int i=0;i<modules.length;i++) {
57 modules[i].reset();
58 }
59 AnalysisQueue.getInstance().addToQueue(this);
60 }
61
62 public void run() {
63
64 Iterator<AnalysisListener> i = listeners.iterator();
65 while (i.hasNext()) {
66 i.next().analysisStarted(file);
67 }
68
69
70 int seqCount = 0;
71 while (file.hasNext()) {
72 ++seqCount;
73 Sequence seq;
74 try {
75 seq = file.next();
76 }
77 catch (SequenceFormatException e) {
78 i = listeners.iterator();
79 while (i.hasNext()) {
80 i.next().analysisExceptionReceived(file,e);
81 }
82 return;
83 }
84
85 for (int m=0;m<modules.length;m++) {
86 if (seq.isFiltered() && modules[m].ignoreFilteredSequences()) continue;
87 modules[m].processSequence(seq);
88 }
89
90 if (seqCount % 1000 == 0) {
91 if (file.getPercentComplete() >= percentComplete+5) {
92
93 percentComplete = (((int)file.getPercentComplete())/5)*5;
94
95 i = listeners.iterator();
96 while (i.hasNext()) {
97 i.next().analysisUpdated(file,seqCount,percentComplete);
98 }
99 try {
100 Thread.sleep(10);
101 }
102 catch (InterruptedException e) {}
103 }
104 }
105 }
106
107 i = listeners.iterator();
108 while (i.hasNext()) {
109 i.next().analysisComplete(file,modules);
110 }
111
112 }
113
114 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Analysis;
20
21 import java.util.ArrayList;
22 import java.util.Iterator;
23 import java.util.List;
24
25 import uk.ac.babraham.FastQC.Modules.QCModule;
26 import uk.ac.babraham.FastQC.Sequence.Sequence;
27 import uk.ac.babraham.FastQC.Sequence.SequenceFile;
28 import uk.ac.babraham.FastQC.Sequence.SequenceFormatException;
29
30 public class AnalysisRunner implements Runnable {
31
32 private SequenceFile file;
33 private QCModule [] modules;
34 private List<AnalysisListener> listeners = new ArrayList<AnalysisListener>();
35 private int percentComplete = 0;
36
37 public AnalysisRunner (SequenceFile file) {
38 this.file = file;
39 }
40
41 public void addAnalysisListener (AnalysisListener l) {
42 if (l != null && !listeners.contains(l)) {
43 listeners.add(l);
44 }
45 }
46
47 public void removeAnalysisListener (AnalysisListener l) {
48 if (l != null && listeners.contains(l)) {
49 listeners.remove(l);
50 }
51 }
52
53
54 public void startAnalysis (QCModule [] modules) {
55 this.modules = modules;
56 for (int i=0;i<modules.length;i++) {
57 modules[i].reset();
58 }
59 AnalysisQueue.getInstance().addToQueue(this);
60 }
61
62 public void run() {
63
64 Iterator<AnalysisListener> i = listeners.iterator();
65 while (i.hasNext()) {
66 i.next().analysisStarted(file);
67 }
68
69
70 int seqCount = 0;
71 while (file.hasNext()) {
72 ++seqCount;
73 Sequence seq;
74 try {
75 seq = file.next();
76 }
77 catch (SequenceFormatException e) {
78 i = listeners.iterator();
79 while (i.hasNext()) {
80 i.next().analysisExceptionReceived(file,e);
81 }
82 return;
83 }
84
85 for (int m=0;m<modules.length;m++) {
86 if (seq.isFiltered() && modules[m].ignoreFilteredSequences()) continue;
87 modules[m].processSequence(seq);
88 }
89
90 if (seqCount % 1000 == 0) {
91 if (file.getPercentComplete() >= percentComplete+5) {
92
93 percentComplete = (((int)file.getPercentComplete())/5)*5;
94
95 i = listeners.iterator();
96 while (i.hasNext()) {
97 i.next().analysisUpdated(file,seqCount,percentComplete);
98 }
99 try {
100 Thread.sleep(10);
101 }
102 catch (InterruptedException e) {}
103 }
104 }
105 }
106
107 i = listeners.iterator();
108 while (i.hasNext()) {
109 i.next().analysisComplete(file,modules);
110 }
111
112 }
113
114 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Analysis;
20
21 import java.io.File;
22 import java.io.IOException;
23 import java.util.Vector;
24 import java.util.concurrent.atomic.AtomicInteger;
25
26 import uk.ac.babraham.FastQC.FastQCConfig;
27 import uk.ac.babraham.FastQC.Modules.ModuleFactory;
28 import uk.ac.babraham.FastQC.Modules.QCModule;
29 import uk.ac.babraham.FastQC.Report.HTMLReportArchive;
30 import uk.ac.babraham.FastQC.Sequence.SequenceFactory;
31 import uk.ac.babraham.FastQC.Sequence.SequenceFile;
32 import uk.ac.babraham.FastQC.Utilities.CasavaBasename;
33 import uk.ac.babraham.FastQC.Utilities.NanoporeBasename;
34
35 public class OfflineRunner implements AnalysisListener {
36
37 private AtomicInteger filesRemaining;
38 private boolean showUpdates = true;
39
40 public OfflineRunner (String [] filenames) {
41
42 // See if we need to show updates
43 showUpdates = !FastQCConfig.getInstance().quiet;
44
45 Vector<File> files = new Vector<File>();
46
47 // We make a special case if they supply a single filename
48 // which is stdin. In this case we'll take data piped to us
49 // rather than trying to read the actual file. We'll also
50 // skip the existence check.
51
52 if (filenames.length == 1 && filenames[0].equals("stdin")) {
53 files.add(new File("stdin"));
54 }
55 else {
56 for (int f=0;f<filenames.length;f++) {
57 File file = new File(filenames[f]);
58
59 if (!file.exists() || ! file.canRead()) {
60 System.err.println("Skipping '"+filenames[f]+"' which didn't exist, or couldn't be read");
61 continue;
62 }
63
64 if (FastQCConfig.getInstance().nano && file.isDirectory()) {
65 File [] fast5files = file.listFiles();
66 for (int i=0;i<fast5files.length;i++) {
67 if (fast5files[i].getName().endsWith(".fast5")) {
68 files.add(fast5files[i]);
69 }
70 }
71 }
72 else {
73 files.add(file);
74 }
75 }
76 }
77
78
79 File [][] fileGroups;
80
81 // See if we need to group together files from a casava group
82 if (FastQCConfig.getInstance().casava) {
83 fileGroups = CasavaBasename.getCasavaGroups(files.toArray(new File[0]));
84 }
85 else if (FastQCConfig.getInstance().nano) {
86 fileGroups = NanoporeBasename.getNanoporeGroups(files.toArray(new File[0]));
87 }
88 else {
89 fileGroups = new File [files.size()][1];
90 for (int f=0;f<files.size();f++) {
91 fileGroups[f][0] = files.elementAt(f);
92 }
93 }
94
95
96 filesRemaining = new AtomicInteger(fileGroups.length);
97
98 for (int i=0;i<fileGroups.length;i++) {
99
100 try {
101 processFile(fileGroups[i]);
102 }
103 catch (Exception e) {
104 System.err.println("Failed to process "+fileGroups[i][0]);
105 e.printStackTrace();
106 filesRemaining.decrementAndGet();
107 }
108 }
109
110 // We need to hold this class open as otherwise the main method
111 // exits when it's finished.
112 while (filesRemaining.intValue() > 0) {
113 try {
114 Thread.sleep(1000);
115 }
116 catch (InterruptedException e) {}
117 }
118 System.exit(0);
119
120 }
121
122 public void processFile (File [] files) throws Exception {
123 for (int f=0;f<files.length;f++) {
124 if (!files[f].getName().equals("stdin") && !files[f].exists()) {
125 throw new IOException(files[f].getName()+" doesn't exist");
126 }
127 }
128 SequenceFile sequenceFile = SequenceFactory.getSequenceFile(files);
129
130 AnalysisRunner runner = new AnalysisRunner(sequenceFile);
131 runner.addAnalysisListener(this);
132
133 QCModule [] module_list = ModuleFactory.getStandardModuleList();
134
135 runner.startAnalysis(module_list);
136
137 }
138
139 public void analysisComplete(SequenceFile file, QCModule[] results) {
140 File reportFile;
141
142 if (showUpdates) System.out.println("Analysis complete for "+file.name());
143
144
145 if (FastQCConfig.getInstance().output_dir != null) {
146 String fileName = file.getFile().getName().replaceAll("\\.gz$","").replaceAll("\\.bz2$","").replaceAll("\\.txt$","").replaceAll("\\.fastq$", "").replaceAll("\\.fq$", "").replaceAll("\\.csfastq$", "").replaceAll("\\.sam$", "").replaceAll("\\.bam$", "")+"_fastqc.html";
147 reportFile = new File(FastQCConfig.getInstance().output_dir+"/"+fileName);
148 }
149 else {
150 reportFile = new File(file.getFile().getAbsolutePath().replaceAll("\\.gz$","").replaceAll("\\.bz2$","").replaceAll("\\.txt$","").replaceAll("\\.fastq$", "").replaceAll("\\.fq$", "").replaceAll("\\.csfastq$", "").replaceAll("\\.sam$", "").replaceAll("\\.bam$", "")+"_fastqc.html");
151 }
152
153 try {
154 new HTMLReportArchive(file, results, reportFile);
155 }
156 catch (Exception e) {
157 analysisExceptionReceived(file, e);
158 return;
159 }
160 filesRemaining.decrementAndGet();
161
162 }
163
164 public void analysisUpdated(SequenceFile file, int sequencesProcessed, int percentComplete) {
165
166 if (percentComplete % 5 == 0) {
167 if (percentComplete == 105) {
168 if (showUpdates) System.err.println("It seems our guess for the total number of records wasn't very good. Sorry about that.");
169 }
170 if (percentComplete > 100) {
171 if (showUpdates) System.err.println("Still going at "+percentComplete+"% complete for "+file.name());
172 }
173 else {
174 if (showUpdates) System.err.println("Approx "+percentComplete+"% complete for "+file.name());
175 }
176 }
177 }
178
179 public void analysisExceptionReceived(SequenceFile file, Exception e) {
180 System.err.println("Failed to process file "+file.name());
181 e.printStackTrace();
182 filesRemaining.decrementAndGet();
183 }
184
185 public void analysisStarted(SequenceFile file) {
186 if (showUpdates) System.err.println("Started analysis of "+file.name());
187
188 }
189
190 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Analysis;
20
21 import java.io.File;
22 import java.io.IOException;
23 import java.util.Vector;
24 import java.util.concurrent.atomic.AtomicInteger;
25
26 import uk.ac.babraham.FastQC.FastQCConfig;
27 import uk.ac.babraham.FastQC.Modules.ModuleFactory;
28 import uk.ac.babraham.FastQC.Modules.QCModule;
29 import uk.ac.babraham.FastQC.Report.HTMLReportArchive;
30 import uk.ac.babraham.FastQC.Sequence.SequenceFactory;
31 import uk.ac.babraham.FastQC.Sequence.SequenceFile;
32 import uk.ac.babraham.FastQC.Utilities.CasavaBasename;
33 import uk.ac.babraham.FastQC.Utilities.NanoporeBasename;
34
35 public class OfflineRunner implements AnalysisListener {
36
37 private AtomicInteger filesRemaining;
38 private boolean showUpdates = true;
39
40 public OfflineRunner (String [] filenames) {
41
42 // See if we need to show updates
43 showUpdates = !FastQCConfig.getInstance().quiet;
44
45 Vector<File> files = new Vector<File>();
46
47 // We make a special case if they supply a single filename
48 // which is stdin. In this case we'll take data piped to us
49 // rather than trying to read the actual file. We'll also
50 // skip the existence check.
51
52 if (filenames.length == 1 && filenames[0].startsWith("stdin")) {
53 files.add(new File(filenames[0]));
54 }
55 else {
56 for (int f=0;f<filenames.length;f++) {
57 File file = new File(filenames[f]);
58
59 if (!file.exists() || ! file.canRead()) {
60 System.err.println("Skipping '"+filenames[f]+"' which didn't exist, or couldn't be read");
61 continue;
62 }
63
64 if (FastQCConfig.getInstance().nano && file.isDirectory()) {
65 File [] fast5files = file.listFiles();
66 for (int i=0;i<fast5files.length;i++) {
67 if (fast5files[i].getName().endsWith(".fast5")) {
68 files.add(fast5files[i]);
69 }
70 }
71
72 // In newer nanopore software instances the fast5 files are
73 // put into subdirectories of the main one specified so we
74 // also need to look into those as well.
75 for (int i=0;i<fast5files.length;i++) {
76 if (fast5files[i].isDirectory()) {
77 File [] subFast5files = fast5files[i].listFiles();
78
79 for (int j=0;j<subFast5files.length;i++) {
80 if (subFast5files[j].getName().endsWith(".fast5")) {
81 files.add(subFast5files[j]);
82 }
83 }
84
85 }
86 }
87
88 }
89 else {
90 files.add(file);
91 }
92 }
93 }
94
95
96 File [][] fileGroups;
97
98 // See if we need to group together files from a casava group
99 if (FastQCConfig.getInstance().casava) {
100 fileGroups = CasavaBasename.getCasavaGroups(files.toArray(new File[0]));
101 }
102 else if (FastQCConfig.getInstance().nano) {
103 fileGroups = NanoporeBasename.getNanoporeGroups(files.toArray(new File[0]));
104 }
105 else {
106 fileGroups = new File [files.size()][1];
107 for (int f=0;f<files.size();f++) {
108 fileGroups[f][0] = files.elementAt(f);
109 }
110 }
111
112
113 filesRemaining = new AtomicInteger(fileGroups.length);
114
115 boolean somethingFailed = false;
116
117 for (int i=0;i<fileGroups.length;i++) {
118
119 try {
120 processFile(fileGroups[i]);
121 }
122 catch (Exception e) {
123 System.err.println("Failed to process "+fileGroups[i][0]);
124 e.printStackTrace();
125 filesRemaining.decrementAndGet();
126 somethingFailed = true;
127 }
128 }
129
130 // We need to hold this class open as otherwise the main method
131 // exits when it's finished.
132 while (filesRemaining.intValue() > 0) {
133 try {
134 Thread.sleep(1000);
135 }
136 catch (InterruptedException e) {}
137 }
138 if (somethingFailed) {
139 System.exit(1);
140 }
141 System.exit(0);
142
143 }
144
145 public void processFile (File [] files) throws Exception {
146 for (int f=0;f<files.length;f++) {
147 if (!files[f].getName().startsWith("stdin") && !files[f].exists()) {
148 throw new IOException(files[f].getName()+" doesn't exist");
149 }
150 }
151 SequenceFile sequenceFile = SequenceFactory.getSequenceFile(files);
152
153 AnalysisRunner runner = new AnalysisRunner(sequenceFile);
154 runner.addAnalysisListener(this);
155
156 QCModule [] module_list = ModuleFactory.getStandardModuleList();
157
158 runner.startAnalysis(module_list);
159
160 }
161
162 public void analysisComplete(SequenceFile file, QCModule[] results) {
163 File reportFile;
164
165 if (showUpdates) System.out.println("Analysis complete for "+file.name());
166
167
168 if (FastQCConfig.getInstance().output_dir != null) {
169 String fileName = file.getFile().getName().replaceAll("stdin:","").replaceAll("\\.gz$","").replaceAll("\\.bz2$","").replaceAll("\\.txt$","").replaceAll("\\.fastq$", "").replaceAll("\\.fq$", "").replaceAll("\\.csfastq$", "").replaceAll("\\.sam$", "").replaceAll("\\.bam$", "")+"_fastqc.html";
170 reportFile = new File(FastQCConfig.getInstance().output_dir+"/"+fileName);
171 }
172 else {
173 reportFile = new File(file.getFile().getAbsolutePath().replaceAll("stdin:","").replaceAll("\\.gz$","").replaceAll("\\.bz2$","").replaceAll("\\.txt$","").replaceAll("\\.fastq$", "").replaceAll("\\.fq$", "").replaceAll("\\.csfastq$", "").replaceAll("\\.sam$", "").replaceAll("\\.bam$", "")+"_fastqc.html");
174 }
175
176 try {
177 new HTMLReportArchive(file, results, reportFile);
178 }
179 catch (Exception e) {
180 analysisExceptionReceived(file, e);
181 return;
182 }
183 filesRemaining.decrementAndGet();
184
185 }
186
187 public void analysisUpdated(SequenceFile file, int sequencesProcessed, int percentComplete) {
188
189 if (percentComplete % 5 == 0) {
190 if (percentComplete == 105) {
191 if (showUpdates) System.err.println("It seems our guess for the total number of records wasn't very good. Sorry about that.");
192 }
193 if (percentComplete > 100) {
194 if (showUpdates) System.err.println("Still going at "+percentComplete+"% complete for "+file.name());
195 }
196 else {
197 if (showUpdates) System.err.println("Approx "+percentComplete+"% complete for "+file.name());
198 }
199 }
200 }
201
202 public void analysisExceptionReceived(SequenceFile file, Exception e) {
203 System.err.println("Failed to process file "+file.name());
204 e.printStackTrace();
205 filesRemaining.decrementAndGet();
206 }
207
208 public void analysisStarted(SequenceFile file) {
209 if (showUpdates) System.err.println("Started analysis of "+file.name());
210
211 }
212
213 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Dialogs;
20
21 import javax.swing.*;
22
23 import uk.ac.babraham.FastQC.FastQCApplication;
24
25 import java.awt.*;
26 import java.awt.event.*;
27
28 /**
29 * Shows the generic about dialog giving details of the current version
30 * and copyright assignments. This is just a thin shell around the
31 * SeqMonkTitlePanel which actually holds the relevant information and
32 * which is also used on the welcome screen.
33 */
34 public class AboutDialog extends JDialog {
35 private static final long serialVersionUID = 1L;
36
37 /**
38 * Instantiates a new about dialog.
39 *
40 * @param a The SeqMonk application.
41 */
42 public AboutDialog(FastQCApplication a) {
43 super(a);
44 setTitle("About FastQC...");
45 Container cont = getContentPane();
46 cont.setLayout(new BorderLayout());
47
48 add(new FastQCTitlePanel(),BorderLayout.CENTER);
49
50 JPanel buttonPanel = new JPanel();
51
52 JButton closeButton = new JButton("Close");
53 getRootPane().setDefaultButton(closeButton);
54 closeButton.addActionListener(new ActionListener() {
55 public void actionPerformed(ActionEvent arg0) {
56 setVisible(false);
57 dispose();
58 }
59 });
60 buttonPanel.add(closeButton);
61
62 cont.add(buttonPanel,BorderLayout.SOUTH);
63
64 setSize(650,230);
65 setLocationRelativeTo(a);
66 setResizable(false);
67 setVisible(true);
68 }
69
70 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Dialogs;
20
21 import javax.swing.*;
22
23 import uk.ac.babraham.FastQC.FastQCApplication;
24
25 import java.awt.*;
26 import java.awt.event.*;
27
28 /**
29 * Shows the generic about dialog giving details of the current version
30 * and copyright assignments. This is just a thin shell around the
31 * SeqMonkTitlePanel which actually holds the relevant information and
32 * which is also used on the welcome screen.
33 */
34 public class AboutDialog extends JDialog {
35 private static final long serialVersionUID = 1L;
36
37 /**
38 * Instantiates a new about dialog.
39 *
40 * @param a The SeqMonk application.
41 */
42 public AboutDialog(FastQCApplication a) {
43 super(a);
44 setTitle("About FastQC...");
45 Container cont = getContentPane();
46 cont.setLayout(new BorderLayout());
47
48 add(new FastQCTitlePanel(),BorderLayout.CENTER);
49
50 JPanel buttonPanel = new JPanel();
51
52 JButton closeButton = new JButton("Close");
53 getRootPane().setDefaultButton(closeButton);
54 closeButton.addActionListener(new ActionListener() {
55 public void actionPerformed(ActionEvent arg0) {
56 setVisible(false);
57 dispose();
58 }
59 });
60 buttonPanel.add(closeButton);
61
62 cont.add(buttonPanel,BorderLayout.SOUTH);
63
64 setSize(650,230);
65 setLocationRelativeTo(a);
66 setResizable(false);
67 setVisible(true);
68 }
69
70 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Dialogs;
20
21 import java.awt.BorderLayout;
22 import java.awt.Color;
23 import java.awt.Font;
24 import java.awt.Graphics;
25 import java.awt.Graphics2D;
26 import java.awt.GridBagConstraints;
27 import java.awt.GridBagLayout;
28 import java.awt.Insets;
29 import java.awt.RenderingHints;
30
31 import javax.swing.BorderFactory;
32 import javax.swing.ImageIcon;
33 import javax.swing.JLabel;
34 import javax.swing.JPanel;
35 import javax.swing.JTextField;
36
37 import uk.ac.babraham.FastQC.FastQCApplication;
38
39 /**
40 * The Class SeqMonkTitlePanel.
41 */
42
43 public class FastQCTitlePanel extends JPanel {
44 private static final long serialVersionUID = 1L;
45
46 /**
47 * Provides a small panel which gives details of the FastQC version
48 * and copyright. Used in both the welcome panel and the about dialog.
49 */
50 public FastQCTitlePanel () {
51 setLayout(new BorderLayout(5,1));
52
53 ImageIcon logo = new ImageIcon(ClassLoader.getSystemResource("uk/ac/babraham/FastQC/Resources/fastqc_icon_100.png"));
54 JPanel logoPanel = new JPanel();
55 logoPanel.add(new JLabel("",logo,JLabel.CENTER));
56 logoPanel.setBorder(BorderFactory.createEmptyBorder(3, 3, 3, 3));
57 add(logoPanel,BorderLayout.WEST);
58 JPanel c = new JPanel();
59 c.setLayout(new GridBagLayout());
60
61 GridBagConstraints constraints = new GridBagConstraints();
62 constraints.gridx=1;
63 constraints.gridy=1;
64 constraints.weightx = 1;
65 constraints.weighty=1;
66 constraints.insets = new Insets(3, 3, 0, 0);
67 constraints.fill = GridBagConstraints.NONE;
68
69 JLabel program = new SmoothJLabel("FastQC High Throughput Sequence QC Report",JLabel.CENTER);
70 program.setFont(new Font("Dialog",Font.BOLD,18));
71 program.setForeground(new Color(200,0,0));
72 c.add(program,constraints);
73
74 constraints.gridy++;
75 JLabel version = new SmoothJLabel("Version: "+FastQCApplication.VERSION, JLabel.CENTER);
76 version.setFont(new Font("Dialog",Font.BOLD,15));
77 version.setForeground(new Color(0,0,200));
78 c.add(version,constraints);
79
80 constraints.gridy++;
81 // Use a text field so they can copy this
82 JTextField website = new JTextField(" www.bioinformatics.babraham.ac.uk/projects/ ");
83 website.setFont(new Font("Dialog",Font.PLAIN,14));
84 website.setEditable(false);
85 website.setBorder(null);
86 website.setOpaque(false);
87 website.setHorizontalAlignment(JTextField.CENTER);
88 c.add(website,constraints);
89 constraints.gridy++;
90
91 JLabel copyright = new JLabel("\u00a9 Simon Andrews, Pierre Lindenbaum, Brian Howard, Phil Ewels 2011-15,", JLabel.CENTER);
92 copyright.setFont(new Font("Dialog",Font.PLAIN,14));
93 c.add(copyright,constraints);
94 constraints.gridy++;
95
96 JLabel copyright2 = new JLabel("Picard BAM/SAM reader \u00a9The Broad Institute, 2013", JLabel.CENTER);
97 copyright2.setFont(new Font("Dialog",Font.PLAIN,10));
98 c.add(copyright2,constraints);
99 constraints.gridy++;
100
101 JLabel copyright3 = new JLabel("BZip decompression \u00a9Matthew J. Francis, 2011", JLabel.CENTER);
102 copyright3.setFont(new Font("Dialog",Font.PLAIN,10));
103 c.add(copyright3,constraints);
104 constraints.gridy++;
105
106 JLabel copyright4 = new JLabel("Base64 encoding \u00a9Robert Harder, 2012", JLabel.CENTER);
107 copyright4.setFont(new Font("Dialog",Font.PLAIN,10));
108 c.add(copyright4,constraints);
109 constraints.gridy++;
110
111 JLabel copyright5 = new JLabel("Java HDF5 reader \u00a9ETH, CISD and SIS, 2007-14", JLabel.CENTER);
112 copyright5.setFont(new Font("Dialog",Font.PLAIN,10));
113 c.add(copyright5,constraints);
114
115 add(c,BorderLayout.CENTER);
116 }
117
118 /**
119 * A JLabel with anti-aliasing enabled. Takes the same constructor
120 * arguments as JLabel
121 */
122 private class SmoothJLabel extends JLabel {
123
124 /**
125 * Creates a new label
126 *
127 * @param text The text
128 * @param position The JLabel constant position for alignment
129 */
130 public SmoothJLabel (String text, int position) {
131 super(text,position);
132 }
133
134 /* (non-Javadoc)
135 * @see javax.swing.JComponent#paintComponent(java.awt.Graphics)
136 */
137 public void paintComponent (Graphics g) {
138 if (g instanceof Graphics2D) {
139 ((Graphics2D)g).setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
140 }
141 super.paintComponent(g);
142 }
143
144 }
145
146 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Dialogs;
20
21 import java.awt.BorderLayout;
22 import java.awt.Color;
23 import java.awt.Font;
24 import java.awt.Graphics;
25 import java.awt.Graphics2D;
26 import java.awt.GridBagConstraints;
27 import java.awt.GridBagLayout;
28 import java.awt.Insets;
29 import java.awt.RenderingHints;
30
31 import javax.swing.BorderFactory;
32 import javax.swing.ImageIcon;
33 import javax.swing.JLabel;
34 import javax.swing.JPanel;
35 import javax.swing.JTextField;
36
37 import uk.ac.babraham.FastQC.FastQCApplication;
38
39 /**
40 * The Class SeqMonkTitlePanel.
41 */
42
43 public class FastQCTitlePanel extends JPanel {
44 private static final long serialVersionUID = 1L;
45
46 /**
47 * Provides a small panel which gives details of the FastQC version
48 * and copyright. Used in both the welcome panel and the about dialog.
49 */
50 public FastQCTitlePanel () {
51 setLayout(new BorderLayout(5,1));
52
53 ImageIcon logo = new ImageIcon(ClassLoader.getSystemResource("uk/ac/babraham/FastQC/Resources/fastqc_icon_100.png"));
54 JPanel logoPanel = new JPanel();
55 logoPanel.add(new JLabel("",logo,JLabel.CENTER));
56 logoPanel.setBorder(BorderFactory.createEmptyBorder(3, 3, 3, 3));
57 add(logoPanel,BorderLayout.WEST);
58 JPanel c = new JPanel();
59 c.setLayout(new GridBagLayout());
60
61 GridBagConstraints constraints = new GridBagConstraints();
62 constraints.gridx=1;
63 constraints.gridy=1;
64 constraints.weightx = 1;
65 constraints.weighty=1;
66 constraints.insets = new Insets(3, 3, 0, 0);
67 constraints.fill = GridBagConstraints.NONE;
68
69 JLabel program = new SmoothJLabel("FastQC High Throughput Sequence QC Report",JLabel.CENTER);
70 program.setFont(new Font("Dialog",Font.BOLD,18));
71 program.setForeground(new Color(200,0,0));
72 c.add(program,constraints);
73
74 constraints.gridy++;
75 JLabel version = new SmoothJLabel("Version: "+FastQCApplication.VERSION, JLabel.CENTER);
76 version.setFont(new Font("Dialog",Font.BOLD,15));
77 version.setForeground(new Color(0,0,200));
78 c.add(version,constraints);
79
80 constraints.gridy++;
81 // Use a text field so they can copy this
82 JTextField website = new JTextField(" www.bioinformatics.babraham.ac.uk/projects/ ");
83 website.setFont(new Font("Dialog",Font.PLAIN,14));
84 website.setEditable(false);
85 website.setBorder(null);
86 website.setOpaque(false);
87 website.setHorizontalAlignment(JTextField.CENTER);
88 c.add(website,constraints);
89 constraints.gridy++;
90
91 JLabel copyright = new JLabel("\u00a9 Simon Andrews, Pierre Lindenbaum, Brian Howard, Phil Ewels 2011-17,", JLabel.CENTER);
92 copyright.setFont(new Font("Dialog",Font.PLAIN,14));
93 c.add(copyright,constraints);
94 constraints.gridy++;
95
96 JLabel copyright2 = new JLabel("Picard BAM/SAM reader \u00a9The Broad Institute, 2013", JLabel.CENTER);
97 copyright2.setFont(new Font("Dialog",Font.PLAIN,10));
98 c.add(copyright2,constraints);
99 constraints.gridy++;
100
101 JLabel copyright3 = new JLabel("BZip decompression \u00a9Matthew J. Francis, 2011", JLabel.CENTER);
102 copyright3.setFont(new Font("Dialog",Font.PLAIN,10));
103 c.add(copyright3,constraints);
104 constraints.gridy++;
105
106 JLabel copyright4 = new JLabel("Base64 encoding \u00a9Robert Harder, 2012", JLabel.CENTER);
107 copyright4.setFont(new Font("Dialog",Font.PLAIN,10));
108 c.add(copyright4,constraints);
109 constraints.gridy++;
110
111 JLabel copyright5 = new JLabel("Java HDF5 reader \u00a9ETH, CISD and SIS, 2007-14", JLabel.CENTER);
112 copyright5.setFont(new Font("Dialog",Font.PLAIN,10));
113 c.add(copyright5,constraints);
114
115 add(c,BorderLayout.CENTER);
116 }
117
118 /**
119 * A JLabel with anti-aliasing enabled. Takes the same constructor
120 * arguments as JLabel
121 */
122 private class SmoothJLabel extends JLabel {
123
124 /**
125 * Creates a new label
126 *
127 * @param text The text
128 * @param position The JLabel constant position for alignment
129 */
130 public SmoothJLabel (String text, int position) {
131 super(text,position);
132 }
133
134 /* (non-Javadoc)
135 * @see javax.swing.JComponent#paintComponent(java.awt.Graphics)
136 */
137 public void paintComponent (Graphics g) {
138 if (g instanceof Graphics2D) {
139 ((Graphics2D)g).setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
140 }
141 super.paintComponent(g);
142 }
143
144 }
145
146 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Dialogs;
20
21 import java.awt.GridBagConstraints;
22 import java.awt.GridBagLayout;
23 import java.awt.Insets;
24
25 import javax.swing.JLabel;
26 import javax.swing.JPanel;
27
28 @SuppressWarnings("serial")
29 public class WelcomePanel extends JPanel {
30 private static final long serialVersionUID = 1L;
31
32 public WelcomePanel () {
33 setLayout(new GridBagLayout());
34 GridBagConstraints gbc = new GridBagConstraints();
35
36 gbc.gridx=1;
37 gbc.gridy=1;
38 gbc.weightx=0.5;
39 gbc.weighty=0.99;
40
41 add(new JPanel(),gbc);
42 gbc.gridy++;
43 gbc.weighty=0.01;
44
45 gbc.insets = new Insets(10, 10, 10, 10);
46 gbc.fill = GridBagConstraints.NONE;
47
48 add(new FastQCTitlePanel(),gbc);
49
50 gbc.gridy++;
51 gbc.weighty=0.5;
52
53 add(new JLabel("Use File > Open to select the sequence file you want to check"),gbc);
54
55 gbc.gridy++;
56 gbc.weighty=0.99;
57 add(new JPanel(),gbc);
58
59 }
60 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Dialogs;
20
21 import java.awt.GridBagConstraints;
22 import java.awt.GridBagLayout;
23 import java.awt.Insets;
24
25 import javax.swing.JLabel;
26 import javax.swing.JPanel;
27
28 @SuppressWarnings("serial")
29 public class WelcomePanel extends JPanel {
30 private static final long serialVersionUID = 1L;
31
32 public WelcomePanel () {
33 setLayout(new GridBagLayout());
34 GridBagConstraints gbc = new GridBagConstraints();
35
36 gbc.gridx=1;
37 gbc.gridy=1;
38 gbc.weightx=0.5;
39 gbc.weighty=0.99;
40
41 add(new JPanel(),gbc);
42 gbc.gridy++;
43 gbc.weighty=0.01;
44
45 gbc.insets = new Insets(10, 10, 10, 10);
46 gbc.fill = GridBagConstraints.NONE;
47
48 add(new FastQCTitlePanel(),gbc);
49
50 gbc.gridy++;
51 gbc.weighty=0.5;
52
53 add(new JLabel("Use File > Open to select the sequence file you want to check"),gbc);
54
55 gbc.gridy++;
56 gbc.weighty=0.99;
57 add(new JPanel(),gbc);
58
59 }
60 }
00 /**
1 * Copyright Copyright 2010-15 Simon Andrews
1 * Copyright Copyright 2010-17 Simon Andrews
22 *
33 * This file is part of FastQC.
44 *
5353
5454 public class FastQCApplication extends JFrame {
5555
56 public static final String VERSION = "0.11.5";
56 public static final String VERSION = "0.11.7";
5757
5858 private JTabbedPane fileTabs;
5959 private WelcomePanel welcomePanel;
234234 JOptionPane.showMessageDialog(this, "No FastQ files are open yet", "Can't save report", JOptionPane.ERROR_MESSAGE);
235235 return;
236236 }
237 chooser.setSelectedFile(new File(((ResultsPanel)fileTabs.getSelectedComponent()).sequenceFile().getFile().getName().replaceAll(".gz$","").replaceAll(".bz2$","").replaceAll(".txt$","").replaceAll(".fastq$", "").replaceAll(".fq$", "").replaceAll(".sam$", "").replaceAll(".bam$", "")+"_fastqc.html"));
237 chooser.setSelectedFile(new File(((ResultsPanel)fileTabs.getSelectedComponent()).sequenceFile().getFile().getName().replaceAll("stdin:","").replaceAll(".gz$","").replaceAll(".bz2$","").replaceAll(".txt$","").replaceAll(".fastq$", "").replaceAll(".fq$", "").replaceAll(".sam$", "").replaceAll(".bam$", "")+"_fastqc.html"));
238238 chooser.setMultiSelectionEnabled(false);
239239 chooser.setFileFilter(new FileFilter() {
240240
00 /**
1 * Copyright Copyright 2012-15 Simon Andrews
1 * Copyright Copyright 2012-17 Simon Andrews
22 *
33 * This file is part of FastQC.
44 *
4040 public File contaminant_file = null;
4141 public File adapter_file = null;
4242 public File limits_file = null;
43 public int minLength = 0;
4344
4445 private FastQCConfig () {
4546
5253 }
5354
5455 // Contaminant file
55 if (System.getProperty("fastqc.contmainant_file") != null) {
56 if (System.getProperty("fastqc.contaminant_file") != null) {
5657 contaminant_file = new File(System.getProperty("fastqc.contaminant_file"));
5758 if (!(contaminant_file.exists() && contaminant_file.canRead())) {
5859 throw new IllegalArgumentException("Contaminant file "+contaminant_file+" doesn't exist or can't be read");
8384 }
8485 }
8586
86 // Threads
87 // Kmer size
8788 if (System.getProperty("fastqc.kmer_size") != null) {
8889 kmer_size = Integer.parseInt(System.getProperty("fastqc.kmer_size"));
8990 }
91
92
93 // Min length
94 if (System.getProperty("fastqc.min_length") != null) {
95 minLength = Integer.parseInt(System.getProperty("fastqc.min_length"));
96 }
97
9098
9199 // Quiet
92100 if (System.getProperty("fastqc.quiet") != null && System.getProperty("fastqc.quiet").equals("true")) {
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC;
20
21 import java.awt.Toolkit;
22 import java.awt.event.ActionEvent;
23 import java.awt.event.ActionListener;
24 import java.awt.event.KeyEvent;
25 import java.io.File;
26 import java.io.UnsupportedEncodingException;
27 import java.net.URLDecoder;
28
29 import javax.swing.JMenu;
30 import javax.swing.JMenuBar;
31 import javax.swing.JMenuItem;
32 import javax.swing.JOptionPane;
33 import javax.swing.KeyStroke;
34
35 import uk.ac.babraham.FastQC.Dialogs.AboutDialog;
36 import uk.ac.babraham.FastQC.Help.HelpDialog;
37
38 public class FastQCMenuBar extends JMenuBar implements ActionListener {
39
40 private FastQCApplication application;
41
42 public FastQCMenuBar (FastQCApplication application) {
43 this.application = application;
44
45 JMenu fileMenu = new JMenu("File");
46 fileMenu.setMnemonic(KeyEvent.VK_F);
47
48 JMenuItem fileOpen = new JMenuItem("Open...");
49 fileOpen.setMnemonic(KeyEvent.VK_O);
50 fileOpen.setAccelerator(KeyStroke.getKeyStroke('O', Toolkit.getDefaultToolkit().getMenuShortcutKeyMask()));
51 fileOpen.setActionCommand("open");
52 fileOpen.addActionListener(this);
53 fileMenu.add(fileOpen);
54
55 fileMenu.addSeparator();
56
57 JMenuItem fileSave = new JMenuItem("Save report...");
58 fileSave.setMnemonic(KeyEvent.VK_S);
59 fileSave.setAccelerator(KeyStroke.getKeyStroke('S', Toolkit.getDefaultToolkit().getMenuShortcutKeyMask()));
60 fileSave.setActionCommand("save");
61 fileSave.addActionListener(this);
62 fileMenu.add(fileSave);
63
64 fileMenu.addSeparator();
65
66 JMenuItem fileClose = new JMenuItem("Close");
67 fileClose.setMnemonic(KeyEvent.VK_C);
68 fileClose.setAccelerator(KeyStroke.getKeyStroke('W', Toolkit.getDefaultToolkit().getMenuShortcutKeyMask()));
69 fileClose.setActionCommand("close");
70 fileClose.addActionListener(this);
71 fileMenu.add(fileClose);
72
73
74 JMenuItem fileCloseAll = new JMenuItem("Close All");
75 fileCloseAll.setMnemonic(KeyEvent.VK_A);
76 fileCloseAll.setActionCommand("close_all");
77 fileCloseAll.addActionListener(this);
78 fileMenu.add(fileCloseAll);
79
80
81 fileMenu.addSeparator();
82
83 JMenuItem fileExit = new JMenuItem("Exit");
84 fileExit.setMnemonic(KeyEvent.VK_X);
85 fileExit.setActionCommand("exit");
86 fileExit.addActionListener(this);
87 fileMenu.add(fileExit);
88
89 add(fileMenu);
90
91 JMenu helpMenu = new JMenu("Help");
92 helpMenu.setMnemonic(KeyEvent.VK_H);
93
94 JMenuItem helpContents = new JMenuItem("Contents...");
95 helpContents.setMnemonic(KeyEvent.VK_C);
96 helpContents.setActionCommand("help_contents");
97 helpContents.addActionListener(this);
98 helpMenu.add(helpContents);
99
100 helpMenu.addSeparator();
101
102 JMenuItem helpAbout = new JMenuItem("About FastQC");
103 helpAbout.setMnemonic(KeyEvent.VK_A);
104 helpAbout.setActionCommand("about");
105 helpAbout.addActionListener(this);
106
107 helpMenu.add(helpAbout);
108
109 add(helpMenu);
110
111 }
112
113 public void actionPerformed(ActionEvent e) {
114
115 String command = e.getActionCommand();
116
117 if (command.equals("exit")) {
118 System.exit(0);
119 }
120 else if (command.equals("open")) {
121 application.openFile();
122 }
123 else if (command.equals("save")) {
124 application.saveReport();
125 }
126 else if (command.equals("close")) {
127 application.close();
128 }
129 else if (command.equals("close_all")) {
130 application.closeAll();
131 }
132 else if (command.equals("help_contents")) {
133 try {
134 new HelpDialog(application,new File(URLDecoder.decode(ClassLoader.getSystemResource("Help").getFile(),"UTF-8")));
135 }
136 catch (UnsupportedEncodingException e1) {
137 e1.printStackTrace();
138 }
139 }
140 else if (command.equals("about")) {
141 new AboutDialog(application);
142 }
143 else {
144 JOptionPane.showMessageDialog(application, "Unknown menu command "+command, "Unknown command", JOptionPane.ERROR_MESSAGE);
145 }
146 }
147
148 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC;
20
21 import java.awt.Toolkit;
22 import java.awt.event.ActionEvent;
23 import java.awt.event.ActionListener;
24 import java.awt.event.KeyEvent;
25 import java.io.File;
26 import java.io.UnsupportedEncodingException;
27 import java.net.URLDecoder;
28
29 import javax.swing.JMenu;
30 import javax.swing.JMenuBar;
31 import javax.swing.JMenuItem;
32 import javax.swing.JOptionPane;
33 import javax.swing.KeyStroke;
34
35 import uk.ac.babraham.FastQC.Dialogs.AboutDialog;
36 import uk.ac.babraham.FastQC.Help.HelpDialog;
37
38 public class FastQCMenuBar extends JMenuBar implements ActionListener {
39
40 private FastQCApplication application;
41
42 public FastQCMenuBar (FastQCApplication application) {
43 this.application = application;
44
45 JMenu fileMenu = new JMenu("File");
46 fileMenu.setMnemonic(KeyEvent.VK_F);
47
48 JMenuItem fileOpen = new JMenuItem("Open...");
49 fileOpen.setMnemonic(KeyEvent.VK_O);
50 fileOpen.setAccelerator(KeyStroke.getKeyStroke('O', Toolkit.getDefaultToolkit().getMenuShortcutKeyMask()));
51 fileOpen.setActionCommand("open");
52 fileOpen.addActionListener(this);
53 fileMenu.add(fileOpen);
54
55 fileMenu.addSeparator();
56
57 JMenuItem fileSave = new JMenuItem("Save report...");
58 fileSave.setMnemonic(KeyEvent.VK_S);
59 fileSave.setAccelerator(KeyStroke.getKeyStroke('S', Toolkit.getDefaultToolkit().getMenuShortcutKeyMask()));
60 fileSave.setActionCommand("save");
61 fileSave.addActionListener(this);
62 fileMenu.add(fileSave);
63
64 fileMenu.addSeparator();
65
66 JMenuItem fileClose = new JMenuItem("Close");
67 fileClose.setMnemonic(KeyEvent.VK_C);
68 fileClose.setAccelerator(KeyStroke.getKeyStroke('W', Toolkit.getDefaultToolkit().getMenuShortcutKeyMask()));
69 fileClose.setActionCommand("close");
70 fileClose.addActionListener(this);
71 fileMenu.add(fileClose);
72
73
74 JMenuItem fileCloseAll = new JMenuItem("Close All");
75 fileCloseAll.setMnemonic(KeyEvent.VK_A);
76 fileCloseAll.setActionCommand("close_all");
77 fileCloseAll.addActionListener(this);
78 fileMenu.add(fileCloseAll);
79
80
81 fileMenu.addSeparator();
82
83 JMenuItem fileExit = new JMenuItem("Exit");
84 fileExit.setMnemonic(KeyEvent.VK_X);
85 fileExit.setActionCommand("exit");
86 fileExit.addActionListener(this);
87 fileMenu.add(fileExit);
88
89 add(fileMenu);
90
91 JMenu helpMenu = new JMenu("Help");
92 helpMenu.setMnemonic(KeyEvent.VK_H);
93
94 JMenuItem helpContents = new JMenuItem("Contents...");
95 helpContents.setMnemonic(KeyEvent.VK_C);
96 helpContents.setActionCommand("help_contents");
97 helpContents.addActionListener(this);
98 helpMenu.add(helpContents);
99
100 helpMenu.addSeparator();
101
102 JMenuItem helpAbout = new JMenuItem("About FastQC");
103 helpAbout.setMnemonic(KeyEvent.VK_A);
104 helpAbout.setActionCommand("about");
105 helpAbout.addActionListener(this);
106
107 helpMenu.add(helpAbout);
108
109 add(helpMenu);
110
111 }
112
113 public void actionPerformed(ActionEvent e) {
114
115 String command = e.getActionCommand();
116
117 if (command.equals("exit")) {
118 System.exit(0);
119 }
120 else if (command.equals("open")) {
121 application.openFile();
122 }
123 else if (command.equals("save")) {
124 application.saveReport();
125 }
126 else if (command.equals("close")) {
127 application.close();
128 }
129 else if (command.equals("close_all")) {
130 application.closeAll();
131 }
132 else if (command.equals("help_contents")) {
133 try {
134 new HelpDialog(application,new File(URLDecoder.decode(ClassLoader.getSystemResource("Help").getFile(),"UTF-8")));
135 }
136 catch (UnsupportedEncodingException e1) {
137 e1.printStackTrace();
138 }
139 }
140 else if (command.equals("about")) {
141 new AboutDialog(application);
142 }
143 else {
144 JOptionPane.showMessageDialog(application, "Unknown menu command "+command, "Unknown command", JOptionPane.ERROR_MESSAGE);
145 }
146 }
147
148 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.FileFilters;
20
21 import java.io.File;
22
23 import javax.swing.filechooser.FileFilter;
24
25 public class BAMFileFilter extends FileFilter {
26
27 public boolean accept(File f) {
28 return true;
29 }
30
31 public String getDescription() {
32 return "BAM/SAM Files (all entries)";
33 }
34
35 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.FileFilters;
20
21 import java.io.File;
22
23 import javax.swing.filechooser.FileFilter;
24
25 public class BAMFileFilter extends FileFilter {
26
27 public boolean accept(File f) {
28 return true;
29 }
30
31 public String getDescription() {
32 return "BAM/SAM Files (all entries)";
33 }
34
35 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.FileFilters;
20
21 import java.io.File;
22
23 import javax.swing.filechooser.FileFilter;
24
25 public class CasavaFastQFileFilter extends FileFilter {
26
27 public boolean accept(File f) {
28 if (f.isDirectory() || f.getName().endsWith(".fastq.gz")) {
29 return true;
30 }
31 return false;
32 }
33
34 public String getDescription() {
35 return "Casava FastQ Files";
36 }
37
38 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.FileFilters;
20
21 import java.io.File;
22
23 import javax.swing.filechooser.FileFilter;
24
25 public class CasavaFastQFileFilter extends FileFilter {
26
27 public boolean accept(File f) {
28 if (f.isDirectory() || f.getName().endsWith(".fastq.gz")) {
29 return true;
30 }
31 return false;
32 }
33
34 public String getDescription() {
35 return "Casava FastQ Files";
36 }
37
38 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.FileFilters;
20
21 import java.io.File;
22
23 import javax.swing.filechooser.FileFilter;
24
25 public class FastQFileFilter extends FileFilter {
26
27 public boolean accept(File f) {
28 return true;
29 }
30
31 public String getDescription() {
32 return "FastQ Files";
33 }
34
35 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.FileFilters;
20
21 import java.io.File;
22
23 import javax.swing.filechooser.FileFilter;
24
25 public class FastQFileFilter extends FileFilter {
26
27 public boolean accept(File f) {
28 return true;
29 }
30
31 public String getDescription() {
32 return "FastQ Files";
33 }
34
35 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.FileFilters;
20
21 import java.io.File;
22
23 import javax.swing.filechooser.FileFilter;
24
25 public class GobyFileFilter extends FileFilter {
26
27 public boolean accept(File f) {
28 return true;
29 }
30
31 public String getDescription() {
32 return "Goby Files (all entries)";
33 }
34
35 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.FileFilters;
20
21 import java.io.File;
22
23 import javax.swing.filechooser.FileFilter;
24
25 public class GobyFileFilter extends FileFilter {
26
27 public boolean accept(File f) {
28 return true;
29 }
30
31 public String getDescription() {
32 return "Goby Files (all entries)";
33 }
34
35 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.FileFilters;
20
21 import java.io.File;
22
23 import javax.swing.filechooser.FileFilter;
24
25 public class MappedBAMFileFilter extends FileFilter {
26
27 public boolean accept(File f) {
28 return true;
29 }
30
31 public String getDescription() {
32 return "BAM/SAM Files (only mapped entries)";
33 }
34
35 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.FileFilters;
20
21 import java.io.File;
22
23 import javax.swing.filechooser.FileFilter;
24
25 public class MappedBAMFileFilter extends FileFilter {
26
27 public boolean accept(File f) {
28 return true;
29 }
30
31 public String getDescription() {
32 return "BAM/SAM Files (only mapped entries)";
33 }
34
35 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.FileFilters;
20
21 import java.io.File;
22
23 import javax.swing.filechooser.FileFilter;
24
25 public class SequenceFileFilter extends FileFilter {
26
27 public boolean accept(File f) {
28 if (f.isDirectory()
29 || f.getName().toLowerCase().endsWith(".txt.gz")
30 || f.getName().toLowerCase().endsWith(".fastq.gz")
31 || f.getName().toLowerCase().endsWith(".fq.gz")
32 || f.getName().toLowerCase().endsWith(".fq")
33 || f.getName().toLowerCase().endsWith(".txt.bz2")
34 || f.getName().toLowerCase().endsWith(".fastq.bz2")
35 || f.getName().toLowerCase().endsWith(".txt")
36 || f.getName().toLowerCase().endsWith(".fastq")
37 || f.getName().toLowerCase().endsWith(".bam")
38 || f.getName().toLowerCase().endsWith(".sam")
39 || f.getName().toLowerCase().endsWith(".compact-reads")
40 || f.getName().toLowerCase().endsWith(".goby")
41
42 ) {
43 return true;
44 }
45 else {
46 return false;
47 }
48 }
49
50 public String getDescription() {
51 return "Sequence Files";
52 }
53
54 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.FileFilters;
20
21 import java.io.File;
22
23 import javax.swing.filechooser.FileFilter;
24
25 public class SequenceFileFilter extends FileFilter {
26
27 public boolean accept(File f) {
28 if (f.isDirectory()
29 || f.getName().toLowerCase().endsWith(".txt.gz")
30 || f.getName().toLowerCase().endsWith(".fastq.gz")
31 || f.getName().toLowerCase().endsWith(".fq.gz")
32 || f.getName().toLowerCase().endsWith(".fq")
33 || f.getName().toLowerCase().endsWith(".txt.bz2")
34 || f.getName().toLowerCase().endsWith(".fastq.bz2")
35 || f.getName().toLowerCase().endsWith(".txt")
36 || f.getName().toLowerCase().endsWith(".fastq")
37 || f.getName().toLowerCase().endsWith(".bam")
38 || f.getName().toLowerCase().endsWith(".sam")
39 || f.getName().toLowerCase().endsWith(".compact-reads")
40 || f.getName().toLowerCase().endsWith(".goby")
41
42 ) {
43 return true;
44 }
45 else {
46 return false;
47 }
48 }
49
50 public String getDescription() {
51 return "Sequence Files";
52 }
53
54 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Graphs;
20
21 import java.util.Vector;
22
23 import uk.ac.babraham.FastQC.FastQCConfig;
24
25 /**
26 * The base group class is a simple way to create a set of bins into
27 * which positions within a read can be put such that early positions
28 * get a group to themselves and later positions get averaged so that
29 * general trends can still be observed.
30 *
31 */
32 public class BaseGroup {
33
34 private int lowerCount;
35 private int upperCount;
36
37 public static BaseGroup [] makeBaseGroups (int maxLength) {
38
39 if (FastQCConfig.getInstance().nogroup) {
40 return(makeUngroupedGroups(maxLength));
41 }
42 else if (FastQCConfig.getInstance().expgroup) {
43 return(makeExponentialBaseGroups(maxLength));
44 }
45 else {
46 return(makeLinearBaseGroups(maxLength));
47 }
48 }
49
50 public static BaseGroup [] makeUngroupedGroups (int maxLength) {
51
52 int startingBase = 1;
53 int interval = 1;
54
55 Vector<BaseGroup> groups = new Vector<BaseGroup>();
56
57 while (startingBase <= maxLength) {
58
59 int endBase = startingBase+(interval-1);
60 if (endBase > maxLength) endBase = maxLength;
61
62 BaseGroup bg = new BaseGroup(startingBase, endBase);
63 groups.add(bg);
64
65 startingBase += interval;
66 }
67
68 return groups.toArray(new BaseGroup[0]);
69
70 }
71
72 public static BaseGroup [] makeExponentialBaseGroups (int maxLength) {
73
74 int startingBase = 1;
75 int interval = 1;
76
77 Vector<BaseGroup> groups = new Vector<BaseGroup>();
78
79 while (startingBase <= maxLength) {
80
81 int endBase = startingBase+(interval-1);
82 if (endBase > maxLength) endBase = maxLength;
83
84 BaseGroup bg = new BaseGroup(startingBase, endBase);
85 groups.add(bg);
86
87 startingBase += interval;
88
89 // See if we need to increase the interval
90 if (startingBase == 10 && maxLength > 75) {
91 interval = 5;
92 }
93 if (startingBase == 50 && maxLength > 200) {
94 interval = 10;
95 }
96 if (startingBase == 100 && maxLength > 300) {
97 interval = 50;
98 }
99 if (startingBase == 500 && maxLength > 1000) {
100 interval = 100;
101 }
102 if (startingBase == 1000 && maxLength > 2000) {
103 interval = 500;
104 }
105
106
107 }
108
109 return groups.toArray(new BaseGroup[0]);
110
111 }
112
113 private static int getLinearInterval (int length) {
114 // The the first 9bp as individual residues since odd stuff
115 // can happen there, then we find a grouping value which gives
116 // us a total set of groups below 75. We limit the intervals
117 // we try to sensible whole numbers.
118
119 int [] baseValues = new int [] {2,5,10};
120 int multiplier = 1;
121
122 while (true) {
123 for (int b=0;b<baseValues.length;b++) {
124 int interval = baseValues[b] * multiplier;
125 int groupCount = 9 + ((length-9)/interval);
126 if ((length-9) % interval != 0) {
127 groupCount += 1;
128 }
129
130 if (groupCount < 75) return interval;
131 }
132
133 multiplier *= 10;
134
135 if (multiplier == 10000000) {
136 throw new IllegalStateException("Couldn't find a sensible interval grouping for length '"+length+"'");
137 }
138 }
139
140 }
141
142 public static BaseGroup [] makeLinearBaseGroups (int maxLength) {
143
144 // For lengths below 75bp we just return everything.
145 if (maxLength <= 75) return makeUngroupedGroups(maxLength);
146
147 // We need to work out what interval we're going to use.
148
149 int interval = getLinearInterval(maxLength);
150
151
152 int startingBase = 1;
153
154 Vector<BaseGroup> groups = new Vector<BaseGroup>();
155
156 while (startingBase <= maxLength) {
157
158 int endBase = startingBase+(interval-1);
159
160 if (startingBase < 10) endBase = startingBase;
161
162 if (startingBase == 10 && interval > 10) {
163 endBase = interval-1;
164 }
165
166 if (endBase > maxLength) endBase = maxLength;
167
168 BaseGroup bg = new BaseGroup(startingBase, endBase);
169 groups.add(bg);
170
171 if (startingBase < 10) {
172 startingBase +=1;
173 }
174 else if (startingBase == 10 && interval > 10) {
175 startingBase = interval;
176 }
177 else {
178 startingBase += interval;
179 }
180
181 }
182
183 return groups.toArray(new BaseGroup[0]);
184
185 }
186
187
188 /**
189 *
190 * @param lowerCount
191 * @param upperCount
192 */
193
194 private BaseGroup (int lowerCount, int upperCount) {
195 this.lowerCount = lowerCount;
196 this.upperCount = upperCount;
197 }
198
199 public int lowerCount () {
200 return lowerCount;
201 }
202
203 public int upperCount () {
204 return upperCount;
205 }
206
207 public boolean containsValue (int value) {
208 return value>=lowerCount && value<=upperCount;
209 }
210
211 public String toString () {
212 if (lowerCount == upperCount) {
213 return ""+lowerCount;
214 }
215 else {
216 return ""+lowerCount+"-"+upperCount;
217 }
218 }
219
220 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Graphs;
20
21 import java.util.Vector;
22
23 import uk.ac.babraham.FastQC.FastQCConfig;
24
25 /**
26 * The base group class is a simple way to create a set of bins into
27 * which positions within a read can be put such that early positions
28 * get a group to themselves and later positions get averaged so that
29 * general trends can still be observed.
30 *
31 */
32 public class BaseGroup {
33
34 private int lowerCount;
35 private int upperCount;
36
37 public static BaseGroup [] makeBaseGroups (int maxLength) {
38
39
40 // They might have set a fixed max length. If the observed
41 // length is longer than this then tough - they'll have to deal
42 // with it, but if not then we'll use the global value instead
43 // of theirs
44
45 if (FastQCConfig.getInstance().minLength > maxLength) {
46 maxLength = FastQCConfig.getInstance().minLength;
47 }
48
49 if (FastQCConfig.getInstance().nogroup) {
50 return(makeUngroupedGroups(maxLength));
51 }
52
53
54 if (FastQCConfig.getInstance().nogroup) {
55 return(makeUngroupedGroups(maxLength));
56 }
57 else if (FastQCConfig.getInstance().expgroup) {
58 return(makeExponentialBaseGroups(maxLength));
59 }
60 else {
61 return(makeLinearBaseGroups(maxLength));
62 }
63 }
64
65 public static BaseGroup [] makeUngroupedGroups (int maxLength) {
66
67 int startingBase = 1;
68 int interval = 1;
69
70 Vector<BaseGroup> groups = new Vector<BaseGroup>();
71
72 while (startingBase <= maxLength) {
73
74 int endBase = startingBase+(interval-1);
75 if (endBase > maxLength) endBase = maxLength;
76
77 BaseGroup bg = new BaseGroup(startingBase, endBase);
78 groups.add(bg);
79
80 startingBase += interval;
81 }
82
83 return groups.toArray(new BaseGroup[0]);
84
85 }
86
87 public static BaseGroup [] makeExponentialBaseGroups (int maxLength) {
88
89 int startingBase = 1;
90 int interval = 1;
91
92 Vector<BaseGroup> groups = new Vector<BaseGroup>();
93
94 while (startingBase <= maxLength) {
95
96 int endBase = startingBase+(interval-1);
97 if (endBase > maxLength) endBase = maxLength;
98
99 BaseGroup bg = new BaseGroup(startingBase, endBase);
100 groups.add(bg);
101
102 startingBase += interval;
103
104 // See if we need to increase the interval
105 if (startingBase == 10 && maxLength > 75) {
106 interval = 5;
107 }
108 if (startingBase == 50 && maxLength > 200) {
109 interval = 10;
110 }
111 if (startingBase == 100 && maxLength > 300) {
112 interval = 50;
113 }
114 if (startingBase == 500 && maxLength > 1000) {
115 interval = 100;
116 }
117 if (startingBase == 1000 && maxLength > 2000) {
118 interval = 500;
119 }
120
121
122 }
123
124 return groups.toArray(new BaseGroup[0]);
125
126 }
127
128 private static int getLinearInterval (int length) {
129 // The the first 9bp as individual residues since odd stuff
130 // can happen there, then we find a grouping value which gives
131 // us a total set of groups below 75. We limit the intervals
132 // we try to sensible whole numbers.
133
134 int [] baseValues = new int [] {2,5,10};
135 int multiplier = 1;
136
137 while (true) {
138 for (int b=0;b<baseValues.length;b++) {
139 int interval = baseValues[b] * multiplier;
140 int groupCount = 9 + ((length-9)/interval);
141 if ((length-9) % interval != 0) {
142 groupCount += 1;
143 }
144
145 if (groupCount < 75) return interval;
146 }
147
148 multiplier *= 10;
149
150 if (multiplier == 10000000) {
151 throw new IllegalStateException("Couldn't find a sensible interval grouping for length '"+length+"'");
152 }
153 }
154
155 }
156
157 public static BaseGroup [] makeLinearBaseGroups (int maxLength) {
158
159 // For lengths below 75bp we just return everything.
160 if (maxLength <= 75) return makeUngroupedGroups(maxLength);
161
162 // We need to work out what interval we're going to use.
163
164 int interval = getLinearInterval(maxLength);
165
166
167 int startingBase = 1;
168
169 Vector<BaseGroup> groups = new Vector<BaseGroup>();
170
171 while (startingBase <= maxLength) {
172
173 int endBase = startingBase+(interval-1);
174
175 if (startingBase < 10) endBase = startingBase;
176
177 if (startingBase == 10 && interval > 10) {
178 endBase = interval-1;
179 }
180
181 if (endBase > maxLength) endBase = maxLength;
182
183 BaseGroup bg = new BaseGroup(startingBase, endBase);
184 groups.add(bg);
185
186 if (startingBase < 10) {
187 startingBase +=1;
188 }
189 else if (startingBase == 10 && interval > 10) {
190 startingBase = interval;
191 }
192 else {
193 startingBase += interval;
194 }
195
196 }
197
198 return groups.toArray(new BaseGroup[0]);
199
200 }
201
202
203 /**
204 *
205 * @param lowerCount
206 * @param upperCount
207 */
208
209 private BaseGroup (int lowerCount, int upperCount) {
210 this.lowerCount = lowerCount;
211 this.upperCount = upperCount;
212 }
213
214 public int lowerCount () {
215 return lowerCount;
216 }
217
218 public int upperCount () {
219 return upperCount;
220 }
221
222 public boolean containsValue (int value) {
223 return value>=lowerCount && value<=upperCount;
224 }
225
226 public String toString () {
227 if (lowerCount == upperCount) {
228 return ""+lowerCount;
229 }
230 else {
231 return ""+lowerCount+"-"+upperCount;
232 }
233 }
234
235 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Graphs;
20
21 import java.awt.BasicStroke;
22 import java.awt.Color;
23 import java.awt.Dimension;
24 import java.awt.Graphics;
25 import java.awt.Graphics2D;
26 import java.awt.RenderingHints;
27
28 import javax.swing.JPanel;
29
30 public class LineGraph extends JPanel {
31
32 private String [] xTitles;
33 private String xLabel;
34 private String [] xCategories;
35 private double [][] data;
36 private String graphTitle;
37 private double minY;
38 private double maxY;
39 private double yInterval;
40
41 private static final Color [] COLOURS = new Color[] {new Color(220,0,0), new Color(0,0,220), new Color(0,220,0), Color.DARK_GRAY, Color.MAGENTA, Color.ORANGE,Color.YELLOW,Color.CYAN,Color.PINK,Color.LIGHT_GRAY};
42
43 public LineGraph (double [] [] data, double minY, double maxY, String xLabel, String [] xTitles, int [] xCategories, String graphTitle) {
44 this(data,minY,maxY,xLabel,xTitles,new String[0],graphTitle);
45 this.xCategories = new String [xCategories.length];
46 for (int i=0;i<xCategories.length;i++) {
47 this.xCategories[i] = ""+xCategories[i];
48 }
49
50 }
51
52 public LineGraph (double [] [] data, double minY, double maxY, String xLabel, String [] xTitles, String [] xCategories, String graphTitle) {
53 this.data = data;
54 this.minY = minY;
55 this.maxY = maxY;
56 this.xTitles = xTitles;
57 this.xLabel = xLabel;
58 this.xCategories = xCategories;
59 this.graphTitle = graphTitle;
60 this.yInterval = findOptimalYInterval(maxY);
61 }
62
63 private double findOptimalYInterval(double max) {
64
65 int base = 1;
66 double [] divisions = new double [] {1,2,2.5,5};
67
68 while (true) {
69
70 for (int d=0;d<divisions.length;d++) {
71 double tester = base * divisions[d];
72 if (max / tester <= 10) {
73 return tester;
74 }
75 }
76
77 base *=10;
78
79 }
80
81
82
83 }
84
85 public Dimension getPreferredSize () {
86 return new Dimension(800,600);
87 }
88
89 public Dimension getMinimumSize () {
90 return new Dimension(100,200);
91 }
92
93 public void paint (Graphics g) {
94 super.paint(g);
95
96 g.setColor(Color.WHITE);
97 g.fillRect(0, 0, getWidth(), getHeight());
98 g.setColor(Color.BLACK);
99
100 int lastY = 0;
101
102 double yStart;
103
104 if (minY % yInterval == 0) {
105 yStart = minY;
106 }
107 else {
108 yStart = yInterval * (((int)minY/yInterval)+1);
109 }
110
111 int xOffset = 0;
112
113 for (double i=yStart;i<=maxY;i+=yInterval) {
114 String label = ""+i;
115 label = label.replaceAll(".0$", ""); // Don't leave trailing .0s where we don't need them.
116 int width = g.getFontMetrics().stringWidth(label);
117 if (width > xOffset) {
118 xOffset = width;
119 }
120
121 g.drawString(label, 2, getY(i)+(g.getFontMetrics().getAscent()/2));
122 }
123
124 // Give the x axis a bit of breathing space
125 xOffset += 5;
126
127 // Draw the graph title
128 int titleWidth = g.getFontMetrics().stringWidth(graphTitle);
129 g.drawString(graphTitle, (xOffset + ((getWidth()-(xOffset+10))/2)) - (titleWidth/2), 30);
130
131
132 // Now draw the axes
133 g.drawLine(xOffset, getHeight()-40, getWidth()-10,getHeight()-40);
134 g.drawLine(xOffset, getHeight()-40, xOffset, 40);
135
136 // Draw the xLabel under the xAxis
137 g.drawString(xLabel, (getWidth()/2) - (g.getFontMetrics().stringWidth(xLabel)/2), getHeight()-5);
138
139
140 // Now draw the data points
141 int baseWidth = (getWidth()-(xOffset+10))/data[0].length;
142 if (baseWidth<1) baseWidth=1;
143
144 // System.out.println("Base Width is "+baseWidth);
145
146 // First draw faint boxes over alternating bases so you can see which is which
147
148 // Let's find the longest label, and then work out how often we can draw labels
149
150 int lastXLabelEnd = 0;
151
152 for (int i=0;i<data[0].length;i++) {
153 if (i%2 != 0) {
154 g.setColor(new Color(230, 230, 230));
155 g.fillRect(xOffset+(baseWidth*i), 40, baseWidth, getHeight()-80);
156 }
157 g.setColor(Color.BLACK);
158 String baseNumber = ""+xCategories[i];
159 int baseNumberWidth = g.getFontMetrics().stringWidth(baseNumber);
160 int baseNumberPosition = (baseWidth/2)+xOffset+(baseWidth*i)-(baseNumberWidth/2);
161
162 if (baseNumberPosition > lastXLabelEnd) {
163 g.drawString(baseNumber,baseNumberPosition, getHeight()-25);
164 lastXLabelEnd = baseNumberPosition+baseNumberWidth+5;
165 }
166 }
167
168 // Now draw horizontal lines across from the y axis
169
170 g.setColor(new Color(180,180,180));
171 for (double i=yStart;i<=maxY;i+=yInterval) {
172 g.drawLine(xOffset, getY(i), getWidth()-10, getY(i));
173 }
174 g.setColor(Color.BLACK);
175
176 // Now draw the datasets
177
178 if (g instanceof Graphics2D) {
179 ((Graphics2D)g).setStroke(new BasicStroke(2));
180 ((Graphics2D)g).setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
181 }
182
183 for (int d=0;d<data.length;d++) {
184 g.setColor(COLOURS[d % COLOURS.length]);
185
186 lastY = getY(data[d][0]);
187 for (int i=1;i<data[d].length;i++) {
188 int thisY = getY(data[d][i]);
189 g.drawLine((baseWidth/2)+xOffset+(baseWidth*(i-1)), lastY, (baseWidth/2)+xOffset+(baseWidth*i), thisY);
190 lastY = thisY;
191 }
192
193 }
194
195 // Now draw the data legend
196
197 if (g instanceof Graphics2D) {
198 ((Graphics2D)g).setStroke(new BasicStroke(1));
199 ((Graphics2D)g).setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_OFF);
200 }
201
202
203 // First we need to find the widest label
204 int widestLabel = 0;
205 for (int t=0;t<xTitles.length;t++) {
206 int width = g.getFontMetrics().stringWidth(xTitles[t]);
207 if (width > widestLabel) widestLabel = width;
208 }
209
210 // Add 3px either side for a bit of space;
211 widestLabel += 6;
212
213 // First draw a box to put the legend in
214 g.setColor(Color.WHITE);
215 g.fillRect((getWidth()-10)-widestLabel, 40, widestLabel, 3+(20*xTitles.length));
216 g.setColor(Color.LIGHT_GRAY);
217 g.drawRect((getWidth()-10)-widestLabel, 40, widestLabel, 3+(20*xTitles.length));
218
219 // Now draw the actual labels
220 for (int t=0;t<xTitles.length;t++) {
221 g.setColor(COLOURS[t]);
222 g.drawString(xTitles[t], ((getWidth()-10)-widestLabel)+3, 40+(20*(t+1)));
223 }
224
225
226
227
228 }
229
230 private int getY(double y) {
231 return (getHeight()-40) - (int)(((getHeight()-80)/(maxY-minY))*y);
232 }
233
234 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Graphs;
20
21 import java.awt.BasicStroke;
22 import java.awt.Color;
23 import java.awt.Dimension;
24 import java.awt.Graphics;
25 import java.awt.Graphics2D;
26 import java.awt.RenderingHints;
27
28 import javax.swing.JPanel;
29
30 public class LineGraph extends JPanel {
31
32 private String [] xTitles;
33 private String xLabel;
34 private String [] xCategories;
35 private double [][] data;
36 private String graphTitle;
37 private double minY;
38 private double maxY;
39 private double yInterval;
40
41 private static final Color [] COLOURS = new Color[] {new Color(220,0,0), new Color(0,0,220), new Color(0,220,0), Color.DARK_GRAY, Color.MAGENTA, Color.ORANGE,Color.YELLOW,Color.CYAN,Color.PINK,Color.LIGHT_GRAY};
42
43 public LineGraph (double [] [] data, double minY, double maxY, String xLabel, String [] xTitles, int [] xCategories, String graphTitle) {
44 this(data,minY,maxY,xLabel,xTitles,new String[0],graphTitle);
45 this.xCategories = new String [xCategories.length];
46 for (int i=0;i<xCategories.length;i++) {
47 this.xCategories[i] = ""+xCategories[i];
48 }
49
50 }
51
52 public LineGraph (double [] [] data, double minY, double maxY, String xLabel, String [] xTitles, String [] xCategories, String graphTitle) {
53 this.data = data;
54 this.minY = minY;
55 this.maxY = maxY;
56 this.xTitles = xTitles;
57 this.xLabel = xLabel;
58 this.xCategories = xCategories;
59 this.graphTitle = graphTitle;
60 this.yInterval = findOptimalYInterval(maxY);
61 }
62
63 private double findOptimalYInterval(double max) {
64
65 int base = 1;
66 double [] divisions = new double [] {1,2,2.5,5};
67
68 while (true) {
69
70 for (int d=0;d<divisions.length;d++) {
71 double tester = base * divisions[d];
72 if (max / tester <= 10) {
73 return tester;
74 }
75 }
76
77 base *=10;
78
79 }
80
81
82
83 }
84
85 public Dimension getPreferredSize () {
86 return new Dimension(800,600);
87 }
88
89 public Dimension getMinimumSize () {
90 return new Dimension(100,200);
91 }
92
93 public void paint (Graphics g) {
94 super.paint(g);
95
96 g.setColor(Color.WHITE);
97 g.fillRect(0, 0, getWidth(), getHeight());
98 g.setColor(Color.BLACK);
99
100 int lastY = 0;
101
102 double yStart;
103
104 if (minY % yInterval == 0) {
105 yStart = minY;
106 }
107 else {
108 yStart = yInterval * (((int)minY/yInterval)+1);
109 }
110
111 int xOffset = 0;
112
113 for (double i=yStart;i<=maxY;i+=yInterval) {
114 String label = ""+i;
115 label = label.replaceAll(".0$", ""); // Don't leave trailing .0s where we don't need them.
116 int width = g.getFontMetrics().stringWidth(label);
117 if (width > xOffset) {
118 xOffset = width;
119 }
120
121 g.drawString(label, 2, getY(i)+(g.getFontMetrics().getAscent()/2));
122 }
123
124 // Give the x axis a bit of breathing space
125 xOffset += 5;
126
127 // Draw the graph title
128 int titleWidth = g.getFontMetrics().stringWidth(graphTitle);
129 g.drawString(graphTitle, (xOffset + ((getWidth()-(xOffset+10))/2)) - (titleWidth/2), 30);
130
131
132 // Now draw the axes
133 g.drawLine(xOffset, getHeight()-40, getWidth()-10,getHeight()-40);
134 g.drawLine(xOffset, getHeight()-40, xOffset, 40);
135
136 // Draw the xLabel under the xAxis
137 g.drawString(xLabel, (getWidth()/2) - (g.getFontMetrics().stringWidth(xLabel)/2), getHeight()-5);
138
139
140 // Now draw the data points
141 int baseWidth = (getWidth()-(xOffset+10))/data[0].length;
142 if (baseWidth<1) baseWidth=1;
143
144 // System.out.println("Base Width is "+baseWidth);
145
146 // First draw faint boxes over alternating bases so you can see which is which
147
148 // Let's find the longest label, and then work out how often we can draw labels
149
150 int lastXLabelEnd = 0;
151
152 for (int i=0;i<data[0].length;i++) {
153 if (i%2 != 0) {
154 g.setColor(new Color(230, 230, 230));
155 g.fillRect(xOffset+(baseWidth*i), 40, baseWidth, getHeight()-80);
156 }
157 g.setColor(Color.BLACK);
158 String baseNumber = ""+xCategories[i];
159 int baseNumberWidth = g.getFontMetrics().stringWidth(baseNumber);
160 int baseNumberPosition = (baseWidth/2)+xOffset+(baseWidth*i)-(baseNumberWidth/2);
161
162 if (baseNumberPosition > lastXLabelEnd) {
163 g.drawString(baseNumber,baseNumberPosition, getHeight()-25);
164 lastXLabelEnd = baseNumberPosition+baseNumberWidth+5;
165 }
166 }
167
168 // Now draw horizontal lines across from the y axis
169
170 g.setColor(new Color(180,180,180));
171 for (double i=yStart;i<=maxY;i+=yInterval) {
172 g.drawLine(xOffset, getY(i), getWidth()-10, getY(i));
173 }
174 g.setColor(Color.BLACK);
175
176 // Now draw the datasets
177
178 if (g instanceof Graphics2D) {
179 ((Graphics2D)g).setStroke(new BasicStroke(2));
180 ((Graphics2D)g).setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
181 }
182
183 for (int d=0;d<data.length;d++) {
184 g.setColor(COLOURS[d % COLOURS.length]);
185
186 lastY = getY(data[d][0]);
187 for (int i=1;i<data[d].length;i++) {
188 int thisY = getY(data[d][i]);
189 g.drawLine((baseWidth/2)+xOffset+(baseWidth*(i-1)), lastY, (baseWidth/2)+xOffset+(baseWidth*i), thisY);
190 lastY = thisY;
191 }
192
193 }
194
195 // Now draw the data legend
196
197 if (g instanceof Graphics2D) {
198 ((Graphics2D)g).setStroke(new BasicStroke(1));
199 ((Graphics2D)g).setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_OFF);
200 }
201
202
203 // First we need to find the widest label
204 int widestLabel = 0;
205 for (int t=0;t<xTitles.length;t++) {
206 int width = g.getFontMetrics().stringWidth(xTitles[t]);
207 if (width > widestLabel) widestLabel = width;
208 }
209
210 // Add 3px either side for a bit of space;
211 widestLabel += 6;
212
213 // First draw a box to put the legend in
214 g.setColor(Color.WHITE);
215 g.fillRect((getWidth()-10)-widestLabel, 40, widestLabel, 3+(20*xTitles.length));
216 g.setColor(Color.LIGHT_GRAY);
217 g.drawRect((getWidth()-10)-widestLabel, 40, widestLabel, 3+(20*xTitles.length));
218
219 // Now draw the actual labels
220 for (int t=0;t<xTitles.length;t++) {
221 g.setColor(COLOURS[t % COLOURS.length]);
222 g.drawString(xTitles[t], ((getWidth()-10)-widestLabel)+3, 40+(20*(t+1)));
223 }
224
225
226
227
228 }
229
230 private int getY(double y) {
231 return (getHeight()-40) - (int)(((getHeight()-80)/(maxY-minY))*y);
232 }
233
234 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Graphs;
20
21 import java.awt.Color;
22 import java.awt.Graphics;
23
24 import javax.swing.JPanel;
25
26 public class QualityBoxPlot extends JPanel {
27
28 private double [] means;
29 private double [] medians;
30 private double [] lowest;
31 private double [] highest;
32 private double [] lowerQuartile;
33 private double [] upperQuartile;
34 private String [] xLabels;
35 private String graphTitle;
36 private double minY;
37 private double maxY;
38 private double yInterval;
39
40 private static final Color GOOD = new Color(195,230,195);
41 private static final Color BAD = new Color(230,220,195);
42 private static final Color UGLY = new Color(230,195,195);
43
44 private static final Color GOOD_DARK = new Color(175,230,175);
45 private static final Color BAD_DARK = new Color(230,215,175);
46 private static final Color UGLY_DARK = new Color(230,175,175);
47
48 public QualityBoxPlot (double [] means, double [] medians, double [] lowest, double [] highest, double [] lowerQuartile, double [] upperQuartile, double minY, double maxY, double yInterval, String [] xLabels, String graphTitle) {
49
50 this.means = means;
51 this.medians = medians;
52 this.lowest = lowest;
53 this.highest = highest;
54 this.lowerQuartile = lowerQuartile;
55 this.upperQuartile = upperQuartile;
56 this.minY = minY;
57 this.maxY = maxY;
58 this.yInterval = yInterval;
59 this.xLabels = xLabels;
60 this.graphTitle = graphTitle;
61 this.yInterval = yInterval;
62 }
63
64 public void paint (Graphics g) {
65 super.paint(g);
66
67 g.setColor(Color.WHITE);
68 g.fillRect(0, 0, getWidth(), getHeight());
69 g.setColor(Color.BLACK);
70
71 int lastY = 0;
72
73 double yStart;
74
75 if (minY % yInterval == 0) {
76 yStart = minY;
77 }
78 else {
79 yStart = yInterval * (((int)minY/yInterval)+1);
80 }
81
82 int xOffset = 0;
83
84 for (double i=yStart;i<=maxY;i+=yInterval) {
85 String label = ""+i;
86 label = label.replaceAll(".0$", "");
87 int width = g.getFontMetrics().stringWidth(label);
88 if (width > xOffset) {
89 xOffset = width;
90 }
91
92 g.drawString(label, 2, getY(i)+(g.getFontMetrics().getAscent()/2));
93 }
94
95 // Give the x axis a bit of breathing space
96 xOffset += 5;
97
98
99
100 g.setColor(Color.BLACK);
101
102
103 // Draw the graph title
104 int titleWidth = g.getFontMetrics().stringWidth(graphTitle);
105 g.drawString(graphTitle, (xOffset + ((getWidth()-(xOffset+10))/2)) - (titleWidth/2), 30);
106
107
108
109 // Work out the width of the x axis bins
110 int baseWidth = (getWidth()-(xOffset+10))/means.length;
111 if (baseWidth<1) baseWidth = 1;
112
113 // First draw faint boxes over alternating bases so you can see which is which
114
115 int lastXLabelEnd = 0;
116
117 for (int i=0;i<means.length;i++) { // Now draw some background colours which show good / bad quality
118 if (i%2 != 0) {
119 g.setColor(UGLY);
120 }
121 else {
122 g.setColor(UGLY_DARK);
123 }
124
125 g.fillRect(xOffset+(baseWidth*i), getY(20), baseWidth, getY(yStart)-getY(20));
126
127 if (i%2 != 0) {
128 g.setColor(BAD);
129 }
130 else {
131 g.setColor(BAD_DARK);
132 }
133
134 g.fillRect(xOffset+(baseWidth*i), getY(28), baseWidth, getY(20)-getY(28));
135
136 if (i%2 != 0) {
137 g.setColor(GOOD);
138 }
139 else {
140 g.setColor(GOOD_DARK);
141 }
142
143 g.fillRect(xOffset+(baseWidth*i), getY(maxY), baseWidth, getY(28)-getY(maxY));
144
145 g.setColor(Color.BLACK);
146 int baseNumberWidth = g.getFontMetrics().stringWidth(xLabels[i]);
147 int labelStart = ((baseWidth/2)+xOffset+(baseWidth*i))-(baseNumberWidth/2);
148
149 if (labelStart > lastXLabelEnd) {
150 g.drawString(xLabels[i], labelStart, getHeight()-25);
151 lastXLabelEnd = labelStart+g.getFontMetrics().stringWidth(xLabels[i])+5;
152 }
153 }
154
155 // Now draw the axes
156 g.drawLine(xOffset, getHeight()-40, getWidth()-10,getHeight()-40);
157 g.drawLine(xOffset, getHeight()-40, xOffset, 40);
158 g.drawString("Position in read (bp)", (getWidth()/2) - (g.getFontMetrics().stringWidth("Position in read (bp)")/2), getHeight()-5);
159
160 // Now draw the boxplots
161
162 for (int i=0;i<medians.length;i++) {
163
164 int boxBottomY = getY(lowerQuartile[i]);
165 int boxTopY = getY(upperQuartile[i]);
166 int lowerWhiskerY = getY(lowest[i]);
167 int upperWhiskerY = getY(highest[i]);
168 int medianY = getY(medians[i]);
169
170 // System.out.println("For base "+i+" values are BoxBottom="+lowerQuartile[i]+" boxTop="+upperQuartile[i]+" whiskerBottom="+lowest[i]+" whiskerTop="+highest[i]+" median="+medians[i]);
171 // System.out.println("For base "+i+" Yvalues are BoxBottom="+boxBottomY+" boxTop="+boxTopY+" whiskerBottom="+lowerWhiskerY+" whiskerTop="+upperWhiskerY+" median="+medianY);
172
173 // Draw the main box
174 g.setColor(new Color(240,240,0));
175 g.fillRect(xOffset+(baseWidth*i)+2, boxTopY, baseWidth-4, boxBottomY-boxTopY);
176 g.setColor(Color.BLACK);
177 g.drawRect(xOffset+(baseWidth*i)+2, boxTopY, baseWidth-4, boxBottomY-boxTopY);
178
179 // Draw the upper whisker
180 g.drawLine(xOffset+(baseWidth*i)+(baseWidth/2), upperWhiskerY, xOffset+(baseWidth*i)+(baseWidth/2), boxTopY);
181 g.drawLine(xOffset+(baseWidth*i)+2, upperWhiskerY, xOffset+(baseWidth*(i+1))-2, upperWhiskerY);
182
183 // Draw the lower whisker
184 g.drawLine(xOffset+(baseWidth*i)+(baseWidth/2), lowerWhiskerY, xOffset+(baseWidth*i)+(baseWidth/2), boxBottomY);
185 g.drawLine(xOffset+(baseWidth*i)+2, lowerWhiskerY, xOffset+(baseWidth*(i+1))-2, lowerWhiskerY);
186
187 // Draw the median line
188 g.setColor(new Color(200,0,0));
189 g.drawLine(xOffset+(baseWidth*i)+2, medianY, (xOffset+(baseWidth*(i+1)))-2,medianY);
190
191
192 }
193
194 // Now overlay the means
195 g.setColor(new Color(0,0,200));
196 lastY = getY(means[0]);
197 for (int i=1;i<means.length;i++) {
198 int thisY = getY(means[i]);
199 g.drawLine((baseWidth/2)+xOffset+(baseWidth*(i-1)), lastY, (baseWidth/2)+xOffset+(baseWidth*i), thisY);
200 lastY = thisY;
201 }
202
203 }
204
205 public int getY(double y) {
206 return (getHeight()-40) - (int)(((getHeight()-80)/(maxY-minY))*(y-minY));
207 }
208
209 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Graphs;
20
21 import java.awt.Color;
22 import java.awt.Graphics;
23
24 import javax.swing.JPanel;
25
26 public class QualityBoxPlot extends JPanel {
27
28 private double [] means;
29 private double [] medians;
30 private double [] lowest;
31 private double [] highest;
32 private double [] lowerQuartile;
33 private double [] upperQuartile;
34 private String [] xLabels;
35 private String graphTitle;
36 private double minY;
37 private double maxY;
38 private double yInterval;
39
40 private static final Color GOOD = new Color(195,230,195);
41 private static final Color BAD = new Color(230,220,195);
42 private static final Color UGLY = new Color(230,195,195);
43
44 private static final Color GOOD_DARK = new Color(175,230,175);
45 private static final Color BAD_DARK = new Color(230,215,175);
46 private static final Color UGLY_DARK = new Color(230,175,175);
47
48 public QualityBoxPlot (double [] means, double [] medians, double [] lowest, double [] highest, double [] lowerQuartile, double [] upperQuartile, double minY, double maxY, double yInterval, String [] xLabels, String graphTitle) {
49
50 this.means = means;
51 this.medians = medians;
52 this.lowest = lowest;
53 this.highest = highest;
54 this.lowerQuartile = lowerQuartile;
55 this.upperQuartile = upperQuartile;
56 this.minY = minY;
57 this.maxY = maxY;
58 this.yInterval = yInterval;
59 this.xLabels = xLabels;
60 this.graphTitle = graphTitle;
61 this.yInterval = yInterval;
62 }
63
64 public void paint (Graphics g) {
65 super.paint(g);
66
67 g.setColor(Color.WHITE);
68 g.fillRect(0, 0, getWidth(), getHeight());
69 g.setColor(Color.BLACK);
70
71 int lastY = 0;
72
73 double yStart;
74
75 if (minY % yInterval == 0) {
76 yStart = minY;
77 }
78 else {
79 yStart = yInterval * (((int)minY/yInterval)+1);
80 }
81
82 int xOffset = 0;
83
84 for (double i=yStart;i<=maxY;i+=yInterval) {
85 String label = ""+i;
86 label = label.replaceAll(".0$", "");
87 int width = g.getFontMetrics().stringWidth(label);
88 if (width > xOffset) {
89 xOffset = width;
90 }
91
92 g.drawString(label, 2, getY(i)+(g.getFontMetrics().getAscent()/2));
93 }
94
95 // Give the x axis a bit of breathing space
96 xOffset += 5;
97
98
99
100 g.setColor(Color.BLACK);
101
102
103 // Draw the graph title
104 int titleWidth = g.getFontMetrics().stringWidth(graphTitle);
105 g.drawString(graphTitle, (xOffset + ((getWidth()-(xOffset+10))/2)) - (titleWidth/2), 30);
106
107
108
109 // Work out the width of the x axis bins
110 int baseWidth = (getWidth()-(xOffset+10))/means.length;
111 if (baseWidth<1) baseWidth = 1;
112
113 // First draw faint boxes over alternating bases so you can see which is which
114
115 int lastXLabelEnd = 0;
116
117 for (int i=0;i<means.length;i++) { // Now draw some background colours which show good / bad quality
118 if (i%2 != 0) {
119 g.setColor(UGLY);
120 }
121 else {
122 g.setColor(UGLY_DARK);
123 }
124
125 g.fillRect(xOffset+(baseWidth*i), getY(20), baseWidth, getY(yStart)-getY(20));
126
127 if (i%2 != 0) {
128 g.setColor(BAD);
129 }
130 else {
131 g.setColor(BAD_DARK);
132 }
133
134 g.fillRect(xOffset+(baseWidth*i), getY(28), baseWidth, getY(20)-getY(28));
135
136 if (i%2 != 0) {
137 g.setColor(GOOD);
138 }
139 else {
140 g.setColor(GOOD_DARK);
141 }
142
143 g.fillRect(xOffset+(baseWidth*i), getY(maxY), baseWidth, getY(28)-getY(maxY));
144
145 g.setColor(Color.BLACK);
146 int baseNumberWidth = g.getFontMetrics().stringWidth(xLabels[i]);
147 int labelStart = ((baseWidth/2)+xOffset+(baseWidth*i))-(baseNumberWidth/2);
148
149 if (labelStart > lastXLabelEnd) {
150 g.drawString(xLabels[i], labelStart, getHeight()-25);
151 lastXLabelEnd = labelStart+g.getFontMetrics().stringWidth(xLabels[i])+5;
152 }
153 }
154
155 // Now draw the axes
156 g.drawLine(xOffset, getHeight()-40, getWidth()-10,getHeight()-40);
157 g.drawLine(xOffset, getHeight()-40, xOffset, 40);
158 g.drawString("Position in read (bp)", (getWidth()/2) - (g.getFontMetrics().stringWidth("Position in read (bp)")/2), getHeight()-5);
159
160 // Now draw the boxplots
161
162 for (int i=0;i<medians.length;i++) {
163
164 int boxBottomY = getY(lowerQuartile[i]);
165 int boxTopY = getY(upperQuartile[i]);
166 int lowerWhiskerY = getY(lowest[i]);
167 int upperWhiskerY = getY(highest[i]);
168 int medianY = getY(medians[i]);
169
170 // System.out.println("For base "+i+" values are BoxBottom="+lowerQuartile[i]+" boxTop="+upperQuartile[i]+" whiskerBottom="+lowest[i]+" whiskerTop="+highest[i]+" median="+medians[i]);
171 // System.out.println("For base "+i+" Yvalues are BoxBottom="+boxBottomY+" boxTop="+boxTopY+" whiskerBottom="+lowerWhiskerY+" whiskerTop="+upperWhiskerY+" median="+medianY);
172
173 // Draw the main box
174 g.setColor(new Color(240,240,0));
175 g.fillRect(xOffset+(baseWidth*i)+2, boxTopY, baseWidth-4, boxBottomY-boxTopY);
176 g.setColor(Color.BLACK);
177 g.drawRect(xOffset+(baseWidth*i)+2, boxTopY, baseWidth-4, boxBottomY-boxTopY);
178
179 // Draw the upper whisker
180 g.drawLine(xOffset+(baseWidth*i)+(baseWidth/2), upperWhiskerY, xOffset+(baseWidth*i)+(baseWidth/2), boxTopY);
181 g.drawLine(xOffset+(baseWidth*i)+2, upperWhiskerY, xOffset+(baseWidth*(i+1))-2, upperWhiskerY);
182
183 // Draw the lower whisker
184 g.drawLine(xOffset+(baseWidth*i)+(baseWidth/2), lowerWhiskerY, xOffset+(baseWidth*i)+(baseWidth/2), boxBottomY);
185 g.drawLine(xOffset+(baseWidth*i)+2, lowerWhiskerY, xOffset+(baseWidth*(i+1))-2, lowerWhiskerY);
186
187 // Draw the median line
188 g.setColor(new Color(200,0,0));
189 g.drawLine(xOffset+(baseWidth*i)+2, medianY, (xOffset+(baseWidth*(i+1)))-2,medianY);
190
191
192 }
193
194 // Now overlay the means
195 g.setColor(new Color(0,0,200));
196 lastY = getY(means[0]);
197 for (int i=1;i<means.length;i++) {
198 int thisY = getY(means[i]);
199 g.drawLine((baseWidth/2)+xOffset+(baseWidth*(i-1)), lastY, (baseWidth/2)+xOffset+(baseWidth*i), thisY);
200 lastY = thisY;
201 }
202
203 }
204
205 public int getY(double y) {
206 return (getHeight()-40) - (int)(((getHeight()-80)/(maxY-minY))*(y-minY));
207 }
208
209 }
0 /**
1 * Copyright Copyright 2013-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19
20 package uk.ac.babraham.FastQC.Graphs;
21
22 import java.awt.BasicStroke;
23 import java.awt.Color;
24 import java.awt.Graphics;
25 import java.awt.Graphics2D;
26 import java.awt.RenderingHints;
27
28 import javax.swing.JPanel;
29
30 import uk.ac.babraham.FastQC.Modules.ModuleConfig;
31 import uk.ac.babraham.FastQC.Utilities.HotColdColourGradient;
32
33 public class TileGraph extends JPanel {
34
35 private String [] xLabels;
36 private int [] tiles;
37 private double [][]tileBaseMeans;
38 private HotColdColourGradient gradient = new HotColdColourGradient();
39
40 public TileGraph (String [] xLabels, int [] tiles, double [][] tileBaseMeans) {
41 this.xLabels = xLabels;
42 this.tiles = tiles;
43 this.tileBaseMeans = tileBaseMeans;
44
45 }
46
47
48 private int getY(double y) {
49 return (getHeight()-40) - (int)(((getHeight()-80)/(double)(tiles.length))*y);
50 }
51
52
53 public void paint (Graphics g) {
54 super.paint(g);
55
56 g.setColor(Color.WHITE);
57 g.fillRect(0, 0, getWidth(), getHeight());
58 g.setColor(Color.BLACK);
59
60 int lastY = 0;
61
62 int xOffset = 0;
63
64 for (int i=0;i<tiles.length;i++) {
65 String label = ""+tiles[i];
66 int width = g.getFontMetrics().stringWidth(label);
67 if (width > xOffset) {
68 xOffset = width;
69 }
70
71 int thisY = getY(i);
72 if (i>0 && thisY+g.getFontMetrics().getAscent() > lastY) continue;
73
74 g.drawString(label, 2, getY(i));
75 lastY = thisY;
76 }
77
78 // Give the x axis a bit of breathing space
79 xOffset += 5;
80
81 // Draw the graph title
82 String graphTitle = "Quality per tile";
83 int titleWidth = g.getFontMetrics().stringWidth(graphTitle);
84 g.drawString(graphTitle, (xOffset + ((getWidth()-(xOffset+10))/2)) - (titleWidth/2), 30);
85
86
87 // Now draw the axes
88 g.drawLine(xOffset, getHeight()-40, getWidth()-10,getHeight()-40);
89 g.drawLine(xOffset, getHeight()-40, xOffset, 40);
90
91 // Draw the xLabel under the xAxis
92 String xLabel = "Position in read (bp)";
93 g.drawString(xLabel, (getWidth()/2) - (g.getFontMetrics().stringWidth(xLabel)/2), getHeight()-5);
94
95
96 // Now draw the data points
97 int baseWidth = (getWidth()-(xOffset+10))/xLabels.length;
98 if (baseWidth<1) baseWidth=1;
99
100 // System.out.println("Base Width is "+baseWidth);
101
102 // First draw faint boxes over alternating bases so you can see which is which
103
104 // Let's find the longest label, and then work out how often we can draw labels
105
106 int lastXLabelEnd = 0;
107 g.setColor(Color.BLACK);
108
109 for (int base=0;base<xLabels.length;base++) {
110
111 String baseNumber = ""+xLabels[base];
112 int baseNumberWidth = g.getFontMetrics().stringWidth(baseNumber);
113 int baseNumberPosition = (baseWidth/2)+xOffset+(baseWidth*base)-(baseNumberWidth/2);
114
115 if (baseNumberPosition > lastXLabelEnd) {
116 g.drawString(baseNumber,baseNumberPosition, getHeight()-25);
117 lastXLabelEnd = baseNumberPosition+baseNumberWidth+5;
118 }
119 }
120
121 // Now draw the datasets
122
123 if (g instanceof Graphics2D) {
124 ((Graphics2D)g).setStroke(new BasicStroke(2));
125 ((Graphics2D)g).setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
126 }
127
128 for (int tile=0;tile<tiles.length;tile++) {
129 for (int base=0;base<xLabels.length;base++) {
130
131 g.setColor(getColour(tile,base));
132
133 int x=xOffset+(baseWidth*base);
134 int y=getY(tile+1);
135 g.fillRect(x, y, baseWidth, getY(tile)-getY(tile+1));
136
137 }
138
139 }
140
141
142 }
143
144 private Color getColour(int tile, int base) {
145 return gradient.getColor(0-tileBaseMeans[tile][base], 0, ModuleConfig.getParam("tile", "error"));
146 }
147
148
149 }
0 /**
1 * Copyright Copyright 2013-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19
20 package uk.ac.babraham.FastQC.Graphs;
21
22 import java.awt.BasicStroke;
23 import java.awt.Color;
24 import java.awt.Graphics;
25 import java.awt.Graphics2D;
26 import java.awt.RenderingHints;
27
28 import javax.swing.JPanel;
29
30 import uk.ac.babraham.FastQC.Modules.ModuleConfig;
31 import uk.ac.babraham.FastQC.Utilities.HotColdColourGradient;
32
33 public class TileGraph extends JPanel {
34
35 private String [] xLabels;
36 private int [] tiles;
37 private double [][]tileBaseMeans;
38 private HotColdColourGradient gradient = new HotColdColourGradient();
39
40 public TileGraph (String [] xLabels, int [] tiles, double [][] tileBaseMeans) {
41 this.xLabels = xLabels;
42 this.tiles = tiles;
43 this.tileBaseMeans = tileBaseMeans;
44
45 }
46
47
48 private int getY(double y) {
49 return (getHeight()-40) - (int)(((getHeight()-80)/(double)(tiles.length))*y);
50 }
51
52
53 public void paint (Graphics g) {
54 super.paint(g);
55
56 g.setColor(Color.WHITE);
57 g.fillRect(0, 0, getWidth(), getHeight());
58 g.setColor(Color.BLACK);
59
60 int lastY = 0;
61
62 int xOffset = 0;
63
64 for (int i=0;i<tiles.length;i++) {
65 String label = ""+tiles[i];
66 int width = g.getFontMetrics().stringWidth(label);
67 if (width > xOffset) {
68 xOffset = width;
69 }
70
71 int thisY = getY(i);
72 if (i>0 && thisY+g.getFontMetrics().getAscent() > lastY) continue;
73
74 g.drawString(label, 2, getY(i));
75 lastY = thisY;
76 }
77
78 // Give the x axis a bit of breathing space
79 xOffset += 5;
80
81 // Draw the graph title
82 String graphTitle = "Quality per tile";
83 int titleWidth = g.getFontMetrics().stringWidth(graphTitle);
84 g.drawString(graphTitle, (xOffset + ((getWidth()-(xOffset+10))/2)) - (titleWidth/2), 30);
85
86
87 // Now draw the axes
88 g.drawLine(xOffset, getHeight()-40, getWidth()-10,getHeight()-40);
89 g.drawLine(xOffset, getHeight()-40, xOffset, 40);
90
91 // Draw the xLabel under the xAxis
92 String xLabel = "Position in read (bp)";
93 g.drawString(xLabel, (getWidth()/2) - (g.getFontMetrics().stringWidth(xLabel)/2), getHeight()-5);
94
95
96 // Now draw the data points
97 int baseWidth = (getWidth()-(xOffset+10))/xLabels.length;
98 if (baseWidth<1) baseWidth=1;
99
100 // System.out.println("Base Width is "+baseWidth);
101
102 // First draw faint boxes over alternating bases so you can see which is which
103
104 // Let's find the longest label, and then work out how often we can draw labels
105
106 int lastXLabelEnd = 0;
107 g.setColor(Color.BLACK);
108
109 for (int base=0;base<xLabels.length;base++) {
110
111 String baseNumber = ""+xLabels[base];
112 int baseNumberWidth = g.getFontMetrics().stringWidth(baseNumber);
113 int baseNumberPosition = (baseWidth/2)+xOffset+(baseWidth*base)-(baseNumberWidth/2);
114
115 if (baseNumberPosition > lastXLabelEnd) {
116 g.drawString(baseNumber,baseNumberPosition, getHeight()-25);
117 lastXLabelEnd = baseNumberPosition+baseNumberWidth+5;
118 }
119 }
120
121 // Now draw the datasets
122
123 if (g instanceof Graphics2D) {
124 ((Graphics2D)g).setStroke(new BasicStroke(2));
125 ((Graphics2D)g).setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
126 }
127
128 for (int tile=0;tile<tiles.length;tile++) {
129 for (int base=0;base<xLabels.length;base++) {
130
131 g.setColor(getColour(tile,base));
132
133 int x=xOffset+(baseWidth*base);
134 int y=getY(tile+1);
135 g.fillRect(x, y, baseWidth, getY(tile)-getY(tile+1));
136
137 }
138
139 }
140
141
142 }
143
144 private Color getColour(int tile, int base) {
145 return gradient.getColor(0-tileBaseMeans[tile][base], 0, ModuleConfig.getParam("tile", "error"));
146 }
147
148
149 }
0 /**
1 * Copyright 2009-15 Simon Andrews
2 *
3 * This file is part of SeqMonk.
4 *
5 * SeqMonk is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * SeqMonk is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with SeqMonk; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Help;
20
21 import java.awt.BorderLayout;
22 import java.awt.event.ActionEvent;
23 import java.awt.event.ActionListener;
24 import java.io.IOException;
25
26 import javax.swing.BorderFactory;
27 import javax.swing.DefaultListModel;
28 import javax.swing.JButton;
29 import javax.swing.JList;
30 import javax.swing.JPanel;
31 import javax.swing.JScrollPane;
32 import javax.swing.JTextField;
33 import javax.swing.ListSelectionModel;
34 import javax.swing.event.ListSelectionEvent;
35 import javax.swing.event.ListSelectionListener;
36
37 /**
38 * The Class HelpSearchPanel.
39 */
40 public class HelpSearchPanel extends JPanel implements ActionListener, ListSelectionListener, Runnable {
41
42 /** The root. */
43 private HelpIndexRoot root;
44
45 /** The query field. */
46 private JTextField queryField;
47
48 /** The result list. */
49 private JList resultList;
50
51 /** The list model. */
52 private DefaultListModel listModel;
53
54 /** The search button. */
55 private JButton searchButton;
56
57 /** The dialog. */
58 private HelpDialog dialog;
59
60 /** The results scroll pane. */
61 private JScrollPane resultsScrollPane;
62
63 /**
64 * Instantiates a new help search panel.
65 *
66 * @param root the root
67 * @param dialog the dialog
68 */
69 public HelpSearchPanel (HelpIndexRoot root,HelpDialog dialog) {
70 this.root = root;
71 this.dialog = dialog;
72
73 setLayout(new BorderLayout());
74
75 JPanel queryPanel = new JPanel();
76 queryPanel.setLayout(new BorderLayout());
77 queryPanel.setBorder(BorderFactory.createEmptyBorder(2,2,2,2));
78 queryField = new JTextField();
79 queryField.setActionCommand("search");
80 queryField.addActionListener(this);
81 queryPanel.add(queryField,BorderLayout.CENTER);
82 searchButton = new JButton("Search");
83 searchButton.setActionCommand("search");
84 searchButton.addActionListener(this);
85 queryPanel.add(searchButton,BorderLayout.EAST);
86 add(queryPanel,BorderLayout.NORTH);
87
88 listModel = new DefaultListModel();
89 listModel.addElement("[No search results]");
90 resultList = new JList(listModel);
91 resultList.addListSelectionListener(this);
92 resultList.setSelectionMode(ListSelectionModel.SINGLE_SELECTION);
93 resultsScrollPane = new JScrollPane(resultList);
94 add(resultsScrollPane,BorderLayout.CENTER);
95
96 }
97
98 /* (non-Javadoc)
99 * @see java.awt.event.ActionListener#actionPerformed(java.awt.event.ActionEvent)
100 */
101 public void actionPerformed(ActionEvent e) {
102 Thread t = new Thread(this);
103 t.start();
104 }
105
106 /* (non-Javadoc)
107 * @see javax.swing.event.ListSelectionListener#valueChanged(javax.swing.event.ListSelectionEvent)
108 */
109 public void valueChanged(ListSelectionEvent lse) {
110 Object o = resultList.getSelectedValue();
111 if (o != null && o instanceof HelpPage) {
112 dialog.DisplayPage((HelpPage)o);
113 }
114 }
115
116 /* (non-Javadoc)
117 * @see java.lang.Runnable#run()
118 */
119 public void run() {
120 searchButton.setEnabled(false);
121 listModel.removeAllElements();
122 if (queryField.getText().trim().length() > 0) {
123 HelpPage[] results;
124 try {
125 results = root.findPagesForTerm(queryField.getText().trim());
126 }
127 catch (IOException e) {
128 e.printStackTrace();
129 searchButton.setEnabled(true);
130 return;
131 }
132 if (results.length > 0) {
133 for (int r=0;r<results.length;r++) {
134 listModel.addElement(results[r]);
135 }
136 }
137 else {
138 listModel.addElement("[No search results]");
139 }
140 }
141
142 // This stupid rigmarole is because on OSX the updated list
143 // just won't show up for some reason. Removing the list and
144 // re-adding it forces it to always show up.
145 //
146 // It's not even enough to remake the scroll pane. You have
147 // to replace the entire JList. Aaargh!
148 remove(resultsScrollPane);
149 revalidate();
150 resultList = new JList(listModel);
151 resultList.addListSelectionListener(this);
152 resultsScrollPane = new JScrollPane(resultList);
153 add(resultsScrollPane,BorderLayout.CENTER);
154 revalidate();
155 repaint();
156
157 searchButton.setEnabled(true);
158 }
159 }
0 /**
1 * Copyright 2009-15 Simon Andrews
2 *
3 * This file is part of SeqMonk.
4 *
5 * SeqMonk is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * SeqMonk is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with SeqMonk; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Help;
20
21 import java.awt.BorderLayout;
22 import java.awt.event.ActionEvent;
23 import java.awt.event.ActionListener;
24 import java.io.IOException;
25
26 import javax.swing.BorderFactory;
27 import javax.swing.DefaultListModel;
28 import javax.swing.JButton;
29 import javax.swing.JList;
30 import javax.swing.JPanel;
31 import javax.swing.JScrollPane;
32 import javax.swing.JTextField;
33 import javax.swing.ListSelectionModel;
34 import javax.swing.event.ListSelectionEvent;
35 import javax.swing.event.ListSelectionListener;
36
37 /**
38 * The Class HelpSearchPanel.
39 */
40 public class HelpSearchPanel extends JPanel implements ActionListener, ListSelectionListener, Runnable {
41
42 /** The root. */
43 private HelpIndexRoot root;
44
45 /** The query field. */
46 private JTextField queryField;
47
48 /** The result list. */
49 private JList resultList;
50
51 /** The list model. */
52 private DefaultListModel listModel;
53
54 /** The search button. */
55 private JButton searchButton;
56
57 /** The dialog. */
58 private HelpDialog dialog;
59
60 /** The results scroll pane. */
61 private JScrollPane resultsScrollPane;
62
63 /**
64 * Instantiates a new help search panel.
65 *
66 * @param root the root
67 * @param dialog the dialog
68 */
69 public HelpSearchPanel (HelpIndexRoot root,HelpDialog dialog) {
70 this.root = root;
71 this.dialog = dialog;
72
73 setLayout(new BorderLayout());
74
75 JPanel queryPanel = new JPanel();
76 queryPanel.setLayout(new BorderLayout());
77 queryPanel.setBorder(BorderFactory.createEmptyBorder(2,2,2,2));
78 queryField = new JTextField();
79 queryField.setActionCommand("search");
80 queryField.addActionListener(this);
81 queryPanel.add(queryField,BorderLayout.CENTER);
82 searchButton = new JButton("Search");
83 searchButton.setActionCommand("search");
84 searchButton.addActionListener(this);
85 queryPanel.add(searchButton,BorderLayout.EAST);
86 add(queryPanel,BorderLayout.NORTH);
87
88 listModel = new DefaultListModel();
89 listModel.addElement("[No search results]");
90 resultList = new JList(listModel);
91 resultList.addListSelectionListener(this);
92 resultList.setSelectionMode(ListSelectionModel.SINGLE_SELECTION);
93 resultsScrollPane = new JScrollPane(resultList);
94 add(resultsScrollPane,BorderLayout.CENTER);
95
96 }
97
98 /* (non-Javadoc)
99 * @see java.awt.event.ActionListener#actionPerformed(java.awt.event.ActionEvent)
100 */
101 public void actionPerformed(ActionEvent e) {
102 Thread t = new Thread(this);
103 t.start();
104 }
105
106 /* (non-Javadoc)
107 * @see javax.swing.event.ListSelectionListener#valueChanged(javax.swing.event.ListSelectionEvent)
108 */
109 public void valueChanged(ListSelectionEvent lse) {
110 Object o = resultList.getSelectedValue();
111 if (o != null && o instanceof HelpPage) {
112 dialog.DisplayPage((HelpPage)o);
113 }
114 }
115
116 /* (non-Javadoc)
117 * @see java.lang.Runnable#run()
118 */
119 public void run() {
120 searchButton.setEnabled(false);
121 listModel.removeAllElements();
122 if (queryField.getText().trim().length() > 0) {
123 HelpPage[] results;
124 try {
125 results = root.findPagesForTerm(queryField.getText().trim());
126 }
127 catch (IOException e) {
128 e.printStackTrace();
129 searchButton.setEnabled(true);
130 return;
131 }
132 if (results.length > 0) {
133 for (int r=0;r<results.length;r++) {
134 listModel.addElement(results[r]);
135 }
136 }
137 else {
138 listModel.addElement("[No search results]");
139 }
140 }
141
142 // This stupid rigmarole is because on OSX the updated list
143 // just won't show up for some reason. Removing the list and
144 // re-adding it forces it to always show up.
145 //
146 // It's not even enough to remake the scroll pane. You have
147 // to replace the entire JList. Aaargh!
148 remove(resultsScrollPane);
149 revalidate();
150 resultList = new JList(listModel);
151 resultList.addListSelectionListener(this);
152 resultsScrollPane = new JScrollPane(resultList);
153 add(resultsScrollPane,BorderLayout.CENTER);
154 revalidate();
155 repaint();
156
157 searchButton.setEnabled(true);
158 }
159 }
00 /**
1 * Copyright Copyright 2010-15 Simon Andrews
1 * Copyright Copyright 2010-17 Simon Andrews
22 *
33 * This file is part of FastQC.
44 *
123123 // We can't display sensible results
124124 JPanel failPanel = new JPanel();
125125 failPanel.setLayout(new BorderLayout());
126 failPanel.add(new JLabel("Can't analyse adapters as read length is too short",JLabel.CENTER),BorderLayout.CENTER);
126 failPanel.add(new JLabel("Can't analyse adapters as read length is too short ("+longestAdapter+" vs "+longestSequence+")",JLabel.CENTER),BorderLayout.CENTER);
127127 return failPanel;
128128 }
129129
141141 calculated = false;
142142 ++totalCount;
143143
144 if (sequence.getSequence().length()-longestAdapter > longestSequence) {
145 longestSequence = sequence.getSequence().length()-longestAdapter;
144 // We need to be careful about making sure that a sequence is not only longer
145 // than we've seen before, but also that the last position we could find a hit
146 // is a positive position.
147
148 if (sequence.getSequence().length() > longestSequence && sequence.getSequence().length() - longestAdapter > 0) {
149 longestSequence = sequence.getSequence().length();
146150 for (int a=0;a<adapters.length;a++) {
147 adapters[a].expandLengthTo(longestSequence);
151 adapters[a].expandLengthTo(longestSequence-longestAdapter);
148152 }
149153 }
150154
154158
155159 int index = sequence.getSequence().indexOf(adapters[a].sequence());
156160 if (index >=0) {
157 for (int i=index;i<longestSequence;i++) {
161 for (int i=index;i<=longestSequence-longestAdapter;i++) {
158162 adapters[a].incrementCount(i);
159163 }
160164 }
255259 XMLStreamWriter xhtml = report.xhtmlStream();
256260
257261 xhtml.writeStartElement("p");
258 xhtml.writeCharacters("Can't analyse adapters as read length is too short");
262 xhtml.writeCharacters("Can't analyse adapters as read length is too short ("+longestAdapter+" vs "+longestSequence+")");
259263 xhtml.writeEndElement();
260264 }
261265
00 /**
1 * Copyright Copyright 2010-15 Simon Andrews
1 * Copyright Copyright 2010-17 Simon Andrews
22 *
33 * This file is part of FastQC.
44 *
8787
8888 if (name == null) name = sequence.file().name();
8989
90 name = name.replaceFirst("stdin:", "");
91
9092 // If this is a filtered sequence we simply count it and move on.
9193 if (sequence.isFiltered()) {
9294 filteredCount++;
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules;
20
21 import java.io.IOException;
22 import java.text.DecimalFormat;
23 import java.util.HashMap;
24 import java.util.Iterator;
25
26 import javax.swing.JPanel;
27 import javax.xml.stream.XMLStreamException;
28
29 import uk.ac.babraham.FastQC.Graphs.LineGraph;
30 import uk.ac.babraham.FastQC.Report.HTMLReportArchive;
31 import uk.ac.babraham.FastQC.Sequence.Sequence;
32
33 public class DuplicationLevel extends AbstractQCModule {
34
35 private OverRepresentedSeqs overrepresentedModule;
36 private double [] deduplicatedPercentages = null;
37 private double [] totalPercentages = null;
38 private double maxCount = 100;
39 private double percentDifferentSeqs = 0;
40 private String [] labels;
41 private static final DecimalFormat df = new DecimalFormat("#.##");
42
43 protected DuplicationLevel (OverRepresentedSeqs overrepresentedModule) {
44 this.overrepresentedModule = overrepresentedModule;
45 }
46
47 public String description() {
48 return "Plots the number of sequences which are duplicated to different levels";
49 }
50
51 public boolean ignoreFilteredSequences() {
52 if (ModuleConfig.getParam("duplication", "ignore") > 0) {
53 return true;
54 }
55 return false;
56 }
57
58 public boolean ignoreInReport () {
59 if (ModuleConfig.getParam("duplication", "ignore") > 0) {
60 return true;
61 }
62 return false;
63 }
64
65 protected synchronized void calculateLevels () {
66
67 if (deduplicatedPercentages != null) return;
68
69 deduplicatedPercentages = new double[16];
70 totalPercentages = new double[16];
71
72 HashMap<Integer, Integer> collatedCounts = new HashMap<Integer, Integer>();
73
74 Iterator<String> it = overrepresentedModule.sequences.keySet().iterator();
75
76 while (it.hasNext()) {
77 int thisCount = overrepresentedModule.sequences.get(it.next());
78
79 if (collatedCounts.containsKey(thisCount)) {
80 collatedCounts.put(thisCount,collatedCounts.get(thisCount)+1);
81 }
82 else {
83 collatedCounts.put(thisCount,1);
84 }
85 }
86
87 // Now we can correct each of these
88
89 HashMap<Integer, Double> correctedCounts = new HashMap<Integer, Double>();
90
91 Iterator<Integer> itr = collatedCounts.keySet().iterator();
92
93 while (itr.hasNext()) {
94 int dupLevel = itr.next();
95 int count = collatedCounts.get(dupLevel);
96
97 correctedCounts.put(dupLevel,getCorrectedCount(overrepresentedModule.countAtUniqueLimit, overrepresentedModule.count, dupLevel, count));
98
99 // System.err.println("For dup level "+dupLevel+" raw count was "+count+" corrected count was "+correctedCounts.get(dupLevel));
100
101 }
102
103 // From the corrected counts we can now work out the raw and deduplicated proportions
104
105 double dedupTotal = 0;
106 double rawTotal = 0;
107
108 Iterator<Integer> itc = correctedCounts.keySet().iterator();
109
110 while (itc.hasNext()) {
111 int dupLevel = itc.next();
112 double count = correctedCounts.get(dupLevel);
113
114 dedupTotal += count;
115 rawTotal += count * dupLevel;
116
117 int dupSlot = dupLevel - 1;
118
119 if (dupSlot > 9999) dupSlot = 15;
120 else if (dupSlot > 4999) dupSlot = 14;
121 else if (dupSlot > 999) dupSlot = 13;
122 else if (dupSlot > 499) dupSlot = 12;
123 else if (dupSlot > 99) dupSlot = 11;
124 else if (dupSlot > 49) dupSlot = 10;
125 else if (dupSlot > 9) dupSlot = 9;
126
127
128 deduplicatedPercentages[dupSlot] += count;
129 totalPercentages[dupSlot] += count * dupLevel;
130
131 }
132
133 // System.err.println("True total = "+overrepresentedModule.count+" inferred total is "+rawTotal+" dedup total is "+dedupTotal);
134
135
136 labels = new String [16];
137 for (int i=0;i<deduplicatedPercentages.length;i++) {
138 if (i<9) labels[i] = ""+(i+1);
139 else if (i==9) labels[i]=">10";
140 else if (i==10) labels[i]=">50";
141 else if (i==11) labels[i]=">100";
142 else if (i==12) labels[i]=">500";
143 else if (i==13) labels[i]=">1k";
144 else if (i==14) labels[i]=">5k";
145 else if (i==15) labels[i]=">10k";
146
147
148 deduplicatedPercentages[i] /= dedupTotal;
149 totalPercentages[i] /= rawTotal;
150 deduplicatedPercentages[i] *= 100;
151 totalPercentages[i] *= 100;
152 }
153
154
155 percentDifferentSeqs = (dedupTotal/rawTotal)*100;
156
157 }
158
159 private static double getCorrectedCount (long countAtLimit, long totalCount, int duplicationLevel, int numberOfObservations) {
160
161 // System.err.println("Count at limit = "+countAtLimit+" total = "+totalCount+" Dup level = "+duplicationLevel+" no obs = "+numberOfObservations);
162
163 // See if we can bail out early
164 if (countAtLimit == totalCount) return numberOfObservations;
165
166 // If there aren't enough sequences left to hide another sequence with this count then
167 // we can also skip the calculation
168 if (totalCount - numberOfObservations < countAtLimit) return numberOfObservations;
169
170 // If not then we need to see what the likelihood is that we had another sequence
171 // with this number of observations which we would have missed.
172
173 // We'll start by working out the probability of NOT seeing a sequence with this duplication level
174 // within the first countAtLimit sequences of numberOfObservations. This is easier than calculating
175 // the probability of seeing it.
176
177 double pNotSeeingAtLimit = 1;
178
179 for (int i=0;i<countAtLimit;i++) {
180 pNotSeeingAtLimit *= ((totalCount-i)-duplicationLevel)/(double)(totalCount-i);
181 // System.err.println("At i="+i+" p is "+pNotSeeingAtLimit);
182 }
183
184 // Now we can invert this to get the chance of seeing a sequence with this count
185 double pSeeingAtLimit = 1 - pNotSeeingAtLimit;
186
187 // Now we can assume that the number we observed can be scaled up by this proportion
188 double trueCount = numberOfObservations/pSeeingAtLimit;
189
190 return trueCount;
191
192 }
193
194
195 public JPanel getResultsPanel() {
196 if (deduplicatedPercentages == null) calculateLevels();
197
198 return new LineGraph(new double [][] {deduplicatedPercentages,totalPercentages}, 0d, maxCount, "Sequence Duplication Level",new String [] {"% Deduplicated sequences","% Total sequences"}, labels, "Percent of seqs remaining if deduplicated "+df.format(percentDifferentSeqs)+"%");
199 }
200
201 public void makeReport(HTMLReportArchive report) throws IOException,XMLStreamException {
202 if (deduplicatedPercentages == null) calculateLevels();
203
204 writeDefaultImage(report, "duplication_levels.png", "Duplication level graph", 800, 600);
205
206 StringBuffer sb = report.dataDocument();
207
208 sb.append("#Total Deduplicated Percentage\t");
209 sb.append(percentDifferentSeqs);
210 sb.append("\n");
211
212 sb.append("#Duplication Level\tPercentage of deduplicated\tPercentage of total\n");
213 for (int i=0;i<labels.length;i++) {
214 sb.append(labels[i]);
215 if (i == labels.length-1) {
216 sb.append("+");
217 }
218 sb.append("\t");
219 sb.append(deduplicatedPercentages[i]);
220 sb.append("\t");
221 sb.append(totalPercentages[i]);
222 sb.append("\n");
223 }
224
225 }
226
227 public String name() {
228 return "Sequence Duplication Levels";
229 }
230
231 public void processSequence(Sequence sequence) {
232 // We don't need to do anything since we use
233 // the data structure from the overrepresented sequences
234 // module.
235 }
236
237 public boolean raisesError() {
238 if (deduplicatedPercentages == null) calculateLevels();
239
240 // Anything over 50% duplicate gets us a error
241 if (percentDifferentSeqs < ModuleConfig.getParam("duplication", "error")) {
242 return true;
243 }
244
245 return false;
246 }
247
248 public boolean raisesWarning() {
249 if (deduplicatedPercentages == null) calculateLevels();
250
251 // Anything over 20% duplicate gets us a warning
252 if (percentDifferentSeqs < ModuleConfig.getParam("duplication", "warn")) {
253 return true;
254 }
255
256 return false;
257 }
258
259 public void reset() {
260 deduplicatedPercentages = null;
261 }
262
263 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules;
20
21 import java.io.IOException;
22 import java.text.DecimalFormat;
23 import java.util.HashMap;
24 import java.util.Iterator;
25
26 import javax.swing.JPanel;
27 import javax.xml.stream.XMLStreamException;
28
29 import uk.ac.babraham.FastQC.Graphs.LineGraph;
30 import uk.ac.babraham.FastQC.Report.HTMLReportArchive;
31 import uk.ac.babraham.FastQC.Sequence.Sequence;
32
33 public class DuplicationLevel extends AbstractQCModule {
34
35 private OverRepresentedSeqs overrepresentedModule;
36 private double [] deduplicatedPercentages = null;
37 private double [] totalPercentages = null;
38 private double maxCount = 100;
39 private double percentDifferentSeqs = 0;
40 private String [] labels;
41 private static final DecimalFormat df = new DecimalFormat("#.##");
42
43 protected DuplicationLevel (OverRepresentedSeqs overrepresentedModule) {
44 this.overrepresentedModule = overrepresentedModule;
45 }
46
47 public String description() {
48 return "Plots the number of sequences which are duplicated to different levels";
49 }
50
51 public boolean ignoreFilteredSequences() {
52 if (ModuleConfig.getParam("duplication", "ignore") > 0) {
53 return true;
54 }
55 return false;
56 }
57
58 public boolean ignoreInReport () {
59 if (ModuleConfig.getParam("duplication", "ignore") > 0) {
60 return true;
61 }
62 return false;
63 }
64
65 protected synchronized void calculateLevels () {
66
67 if (deduplicatedPercentages != null) return;
68
69 deduplicatedPercentages = new double[16];
70 totalPercentages = new double[16];
71
72 HashMap<Integer, Integer> collatedCounts = new HashMap<Integer, Integer>();
73
74 Iterator<String> it = overrepresentedModule.sequences.keySet().iterator();
75
76 while (it.hasNext()) {
77 int thisCount = overrepresentedModule.sequences.get(it.next());
78
79 if (collatedCounts.containsKey(thisCount)) {
80 collatedCounts.put(thisCount,collatedCounts.get(thisCount)+1);
81 }
82 else {
83 collatedCounts.put(thisCount,1);
84 }
85 }
86
87 // Now we can correct each of these
88
89 HashMap<Integer, Double> correctedCounts = new HashMap<Integer, Double>();
90
91 Iterator<Integer> itr = collatedCounts.keySet().iterator();
92
93 while (itr.hasNext()) {
94 int dupLevel = itr.next();
95 int count = collatedCounts.get(dupLevel);
96
97 correctedCounts.put(dupLevel,getCorrectedCount(overrepresentedModule.countAtUniqueLimit, overrepresentedModule.count, dupLevel, count));
98
99 // System.err.println("For dup level "+dupLevel+" raw count was "+count+" corrected count was "+correctedCounts.get(dupLevel));
100
101 }
102
103 // From the corrected counts we can now work out the raw and deduplicated proportions
104
105 double dedupTotal = 0;
106 double rawTotal = 0;
107
108 Iterator<Integer> itc = correctedCounts.keySet().iterator();
109
110 while (itc.hasNext()) {
111 int dupLevel = itc.next();
112 double count = correctedCounts.get(dupLevel);
113
114 dedupTotal += count;
115 rawTotal += count * dupLevel;
116
117 int dupSlot = dupLevel - 1;
118
119 if (dupSlot > 9999) dupSlot = 15;
120 else if (dupSlot > 4999) dupSlot = 14;
121 else if (dupSlot > 999) dupSlot = 13;
122 else if (dupSlot > 499) dupSlot = 12;
123 else if (dupSlot > 99) dupSlot = 11;
124 else if (dupSlot > 49) dupSlot = 10;
125 else if (dupSlot > 9) dupSlot = 9;
126
127
128 deduplicatedPercentages[dupSlot] += count;
129 totalPercentages[dupSlot] += count * dupLevel;
130
131 }
132
133 // System.err.println("True total = "+overrepresentedModule.count+" inferred total is "+rawTotal+" dedup total is "+dedupTotal);
134
135
136 labels = new String [16];
137 for (int i=0;i<deduplicatedPercentages.length;i++) {
138 if (i<9) labels[i] = ""+(i+1);
139 else if (i==9) labels[i]=">10";
140 else if (i==10) labels[i]=">50";
141 else if (i==11) labels[i]=">100";
142 else if (i==12) labels[i]=">500";
143 else if (i==13) labels[i]=">1k";
144 else if (i==14) labels[i]=">5k";
145 else if (i==15) labels[i]=">10k";
146
147
148 deduplicatedPercentages[i] /= dedupTotal;
149 totalPercentages[i] /= rawTotal;
150 deduplicatedPercentages[i] *= 100;
151 totalPercentages[i] *= 100;
152 }
153
154
155 percentDifferentSeqs = (dedupTotal/rawTotal)*100;
156
157 }
158
159 private static double getCorrectedCount (long countAtLimit, long totalCount, int duplicationLevel, int numberOfObservations) {
160
161 // System.err.println("Count at limit = "+countAtLimit+" total = "+totalCount+" Dup level = "+duplicationLevel+" no obs = "+numberOfObservations);
162
163 // See if we can bail out early
164 if (countAtLimit == totalCount) return numberOfObservations;
165
166 // If there aren't enough sequences left to hide another sequence with this count then
167 // we can also skip the calculation
168 if (totalCount - numberOfObservations < countAtLimit) return numberOfObservations;
169
170 // If not then we need to see what the likelihood is that we had another sequence
171 // with this number of observations which we would have missed.
172
173 // We'll start by working out the probability of NOT seeing a sequence with this duplication level
174 // within the first countAtLimit sequences of numberOfObservations. This is easier than calculating
175 // the probability of seeing it.
176
177 double pNotSeeingAtLimit = 1;
178
179 for (int i=0;i<countAtLimit;i++) {
180 pNotSeeingAtLimit *= ((totalCount-i)-duplicationLevel)/(double)(totalCount-i);
181 // System.err.println("At i="+i+" p is "+pNotSeeingAtLimit);
182 }
183
184 // Now we can invert this to get the chance of seeing a sequence with this count
185 double pSeeingAtLimit = 1 - pNotSeeingAtLimit;
186
187 // Now we can assume that the number we observed can be scaled up by this proportion
188 double trueCount = numberOfObservations/pSeeingAtLimit;
189
190 return trueCount;
191
192 }
193
194
195 public JPanel getResultsPanel() {
196 if (deduplicatedPercentages == null) calculateLevels();
197
198 return new LineGraph(new double [][] {deduplicatedPercentages,totalPercentages}, 0d, maxCount, "Sequence Duplication Level",new String [] {"% Deduplicated sequences","% Total sequences"}, labels, "Percent of seqs remaining if deduplicated "+df.format(percentDifferentSeqs)+"%");
199 }
200
201 public void makeReport(HTMLReportArchive report) throws IOException,XMLStreamException {
202 if (deduplicatedPercentages == null) calculateLevels();
203
204 writeDefaultImage(report, "duplication_levels.png", "Duplication level graph", 800, 600);
205
206 StringBuffer sb = report.dataDocument();
207
208 sb.append("#Total Deduplicated Percentage\t");
209 sb.append(percentDifferentSeqs);
210 sb.append("\n");
211
212 sb.append("#Duplication Level\tPercentage of deduplicated\tPercentage of total\n");
213 for (int i=0;i<labels.length;i++) {
214 sb.append(labels[i]);
215 if (i == labels.length-1) {
216 sb.append("+");
217 }
218 sb.append("\t");
219 sb.append(deduplicatedPercentages[i]);
220 sb.append("\t");
221 sb.append(totalPercentages[i]);
222 sb.append("\n");
223 }
224
225 }
226
227 public String name() {
228 return "Sequence Duplication Levels";
229 }
230
231 public void processSequence(Sequence sequence) {
232 // We don't need to do anything since we use
233 // the data structure from the overrepresented sequences
234 // module.
235 }
236
237 public boolean raisesError() {
238 if (deduplicatedPercentages == null) calculateLevels();
239
240 // Anything over 50% duplicate gets us a error
241 if (percentDifferentSeqs < ModuleConfig.getParam("duplication", "error")) {
242 return true;
243 }
244
245 return false;
246 }
247
248 public boolean raisesWarning() {
249 if (deduplicatedPercentages == null) calculateLevels();
250
251 // Anything over 20% duplicate gets us a warning
252 if (percentDifferentSeqs < ModuleConfig.getParam("duplication", "warn")) {
253 return true;
254 }
255
256 return false;
257 }
258
259 public void reset() {
260 deduplicatedPercentages = null;
261 }
262
263 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules.GCModel;
20
21 public class GCModel {
22
23 public int readLength;
24 public GCModelValue[][] models;
25
26 public GCModel (int readLength) {
27
28 int [] claimingCounts = new int [101];
29 this.readLength = readLength;
30 models = new GCModelValue[readLength+1][];
31
32 for (int pos=0;pos<=readLength;pos++) {
33 double lowCount = pos-0.5;
34 double highCount = pos+0.5;
35
36 if (lowCount < 0) lowCount = 0;
37 if (highCount < 0) highCount = 0;
38 if (highCount > readLength) highCount = readLength;
39 if (lowCount > readLength) lowCount = readLength;
40
41 int lowPercentage = (int)Math.round((lowCount*100) / readLength);
42 int highPercentage = (int)Math.round((highCount*100) / readLength);
43
44 for (int p=lowPercentage;p<=highPercentage;p++) {
45 claimingCounts[p]++;
46 }
47 }
48
49
50 // We now do a second pass to make up the model using the weightings
51 // we calculated previously.
52
53 for (int pos=0;pos<=readLength;pos++) {
54 double lowCount = pos-0.5;
55 double highCount = pos+0.5;
56
57 if (lowCount < 0) lowCount = 0;
58 if (highCount < 0) highCount = 0;
59 if (highCount > readLength) highCount = readLength;
60 if (lowCount > readLength) lowCount = readLength;
61
62 int lowPercentage = (int)Math.round((lowCount*100) / readLength);
63 int highPercentage = (int)Math.round((highCount*100) / readLength);
64
65 GCModelValue [] modelValues = new GCModelValue [(highPercentage-lowPercentage)+1];
66
67 for (int p=lowPercentage;p<=highPercentage;p++) {
68 modelValues[p-lowPercentage] = new GCModelValue(p, 1d/claimingCounts[p]);
69 }
70 models[pos] = modelValues;
71 }
72
73
74
75 }
76
77 public GCModelValue [] getModelValues (int gcCount) {
78 return models[gcCount];
79 }
80
81 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules.GCModel;
20
21 public class GCModel {
22
23 public int readLength;
24 public GCModelValue[][] models;
25
26 public GCModel (int readLength) {
27
28 int [] claimingCounts = new int [101];
29 this.readLength = readLength;
30 models = new GCModelValue[readLength+1][];
31
32 for (int pos=0;pos<=readLength;pos++) {
33 double lowCount = pos-0.5;
34 double highCount = pos+0.5;
35
36 if (lowCount < 0) lowCount = 0;
37 if (highCount < 0) highCount = 0;
38 if (highCount > readLength) highCount = readLength;
39 if (lowCount > readLength) lowCount = readLength;
40
41 int lowPercentage = (int)Math.round((lowCount*100) / readLength);
42 int highPercentage = (int)Math.round((highCount*100) / readLength);
43
44 for (int p=lowPercentage;p<=highPercentage;p++) {
45 claimingCounts[p]++;
46 }
47 }
48
49
50 // We now do a second pass to make up the model using the weightings
51 // we calculated previously.
52
53 for (int pos=0;pos<=readLength;pos++) {
54 double lowCount = pos-0.5;
55 double highCount = pos+0.5;
56
57 if (lowCount < 0) lowCount = 0;
58 if (highCount < 0) highCount = 0;
59 if (highCount > readLength) highCount = readLength;
60 if (lowCount > readLength) lowCount = readLength;
61
62 int lowPercentage = (int)Math.round((lowCount*100) / readLength);
63 int highPercentage = (int)Math.round((highCount*100) / readLength);
64
65 GCModelValue [] modelValues = new GCModelValue [(highPercentage-lowPercentage)+1];
66
67 for (int p=lowPercentage;p<=highPercentage;p++) {
68 modelValues[p-lowPercentage] = new GCModelValue(p, 1d/claimingCounts[p]);
69 }
70 models[pos] = modelValues;
71 }
72
73
74
75 }
76
77 public GCModelValue [] getModelValues (int gcCount) {
78 return models[gcCount];
79 }
80
81 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules.GCModel;
20
21 public class GCModelValue {
22
23 private int percentage;
24 private double increment;
25
26 public GCModelValue (int percentage,double increment) {
27 this.percentage = percentage;
28 this.increment = increment;
29 }
30
31 public int percentage () {
32 return percentage;
33 }
34
35 public double increment () {
36 return increment;
37 }
38
39 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules.GCModel;
20
21 public class GCModelValue {
22
23 private int percentage;
24 private double increment;
25
26 public GCModelValue (int percentage,double increment) {
27 this.percentage = percentage;
28 this.increment = increment;
29 }
30
31 public int percentage () {
32 return percentage;
33 }
34
35 public double increment () {
36 return increment;
37 }
38
39 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules;
20
21 import java.awt.BorderLayout;
22 import java.io.IOException;
23 import java.util.Arrays;
24 import java.util.Hashtable;
25 import java.util.Iterator;
26 import java.util.Vector;
27
28 import javax.swing.JLabel;
29 import javax.swing.JPanel;
30 import javax.swing.JScrollPane;
31 import javax.swing.JSplitPane;
32 import javax.swing.JTable;
33 import javax.swing.table.AbstractTableModel;
34 import javax.swing.table.TableModel;
35 import javax.xml.stream.XMLStreamException;
36 import javax.xml.stream.XMLStreamWriter;
37
38 import org.apache.commons.math3.distribution.BinomialDistribution;
39
40
41 import uk.ac.babraham.FastQC.FastQCConfig;
42 import uk.ac.babraham.FastQC.Graphs.BaseGroup;
43 import uk.ac.babraham.FastQC.Graphs.LineGraph;
44 import uk.ac.babraham.FastQC.Report.HTMLReportArchive;
45 import uk.ac.babraham.FastQC.Sequence.Sequence;
46
47 public class KmerContent extends AbstractQCModule {
48
49 private Hashtable<String, Kmer> kmers = new Hashtable<String, Kmer>((int)Math.pow(4, MAX_KMER_SIZE));
50
51 private int longestSequence = 0;
52
53 /* 2D array, first dimension is the position in the sequence, second is Kmer length */
54 private long [][] totalKmerCounts = new long [0][0];
55
56 private long skipCount = 0;
57
58 private static int MIN_KMER_SIZE = 7;
59 private static int MAX_KMER_SIZE = 7;
60
61 public boolean calculated = false;
62
63 // This is the full set of Kmers to be reported
64 private Kmer [] enrichedKmers = null;
65
66 // This is the data for the Kmers which are going to be placed on the graph
67 private double [][] enrichments = null;
68
69 // For the graph we also need to know the scale we need to use on the axes.
70 private double minGraphValue = 0;
71 private double maxGraphValue = 0;
72
73
74 private String [] xCategories = new String[0];
75 private String [] xLabels = new String[0];
76
77 BaseGroup [] groups;
78 public KmerContent () {
79 if (FastQCConfig.getInstance().kmer_size != null) {
80 int kmerSize = FastQCConfig.getInstance().kmer_size;
81 MIN_KMER_SIZE = kmerSize;
82 MAX_KMER_SIZE = kmerSize;
83 }
84 }
85
86 public boolean ignoreFilteredSequences() {
87 return true;
88 }
89
90 public boolean ignoreInReport () {
91 if (ModuleConfig.getParam("kmer", "ignore") > 0) {
92 return true;
93 }
94 return false;
95 }
96
97 public JPanel getResultsPanel() {
98
99 if (!calculated) calculateEnrichment();
100 JPanel returnPanel = new JPanel();
101 returnPanel.setLayout(new BorderLayout());
102 returnPanel.add(new JLabel("Overrepresented Kmers",JLabel.CENTER),BorderLayout.NORTH);
103
104 JSplitPane splitPanel = new JSplitPane(JSplitPane.VERTICAL_SPLIT);
105
106 if (enrichedKmers.length > 0) {
107 TableModel model = new ResultsTable(enrichedKmers);
108 splitPanel.setBottomComponent(new JScrollPane(new JTable(model)));
109 splitPanel.setTopComponent(new LineGraph(enrichments, minGraphValue, maxGraphValue, "Position in read (bp)", xLabels, xCategories, "Log2 Obs/Exp"));
110 returnPanel.add(splitPanel,BorderLayout.CENTER);
111 }
112 else {
113 returnPanel.add(new JLabel("There are no overrepresented Kmers",JLabel.CENTER),BorderLayout.CENTER);
114 }
115
116 return returnPanel;
117 }
118
119 /**
120 * This method simply keeps a count of the number of Kmers of a given size
121 * seen at each position within the run. We can use this later on to calculate
122 * the enrichment of the Kmers we actually count.
123 *
124 * We take in the Kmer sequence even though this isn't used in the total counts
125 * we do this because we don't want to count Kmers with Ns in them, but we do
126 * need to ensure that the data structure is expanded to the right size, and if
127 * we have libraries where later positions are Ns in all sequences then our
128 * data structure ends up too short and we crash.
129 *
130 * @param position Position within the read. 0 indexed
131 * @param kmerLength Actual length of the Kmer analysed
132 */
133 private void addKmerCount (int position,int kmerLength, String kmer) {
134
135
136 if (position >= totalKmerCounts.length) {
137 // We need to expand the array
138 long [][] newCounts = new long[position+1][];
139 for (int i=0;i<totalKmerCounts.length;i++) {
140 newCounts[i] = totalKmerCounts[i];
141 }
142 for (int i=totalKmerCounts.length;i<newCounts.length;i++) {
143 newCounts[i] = new long[MAX_KMER_SIZE];
144 }
145
146 totalKmerCounts = newCounts;
147 }
148
149 if (kmer.indexOf("N") >=0) return;
150
151 ++totalKmerCounts[position][kmerLength-1];
152
153 }
154
155 private synchronized void calculateEnrichment () {
156
157 /*
158 * For each Kmer we work out whether there is a statistically
159 * significant deviation in its coverage at any given position
160 * compared to its average coverage over all positions.
161 */
162
163
164 // We'll be grouping together positions later so make up the groups now
165 groups = BaseGroup.makeBaseGroups((longestSequence-MIN_KMER_SIZE)+1);
166
167 Vector<Kmer>unevenKmers = new Vector<Kmer>();
168
169 Iterator<Kmer> rawKmers = kmers.values().iterator();
170
171 while (rawKmers.hasNext()) {
172 Kmer k = rawKmers.next();
173 char [] chars = k.sequence().toCharArray();
174
175
176 long totalKmerCount = 0;
177
178
179 // This gets us the total number of Kmers of this type in the whole
180 // dataset.
181 for (int i=0;i<totalKmerCounts.length;i++) {
182 totalKmerCount += totalKmerCounts[i][k.sequence().length()-1];
183 }
184
185 // This is the expected proportion of all Kmers which have this
186 // specific Kmer sequence. We no longer make any attempt to judge
187 // overall enrichment or depletion of this sequence since once you
188 // get to longer lengths the distribution isn't flat anyway
189
190 float expectedProportion = k.count/(float)totalKmerCount;
191
192 // We now want to go through each of the positions looking for whether
193 // this Kmer was seen an unexpected number of times compared to what we
194 // expected from the global values
195
196
197 float [] obsExpPositions = new float[groups.length];
198 float [] binomialPValues = new float[groups.length];
199
200 long [] positionCounts = k.getPositions();
201
202 for (int g=0;g<groups.length;g++) {
203 // This is a summation of the number of Kmers of this length which
204 // fall into this base group
205 long totalGroupCount = 0;
206
207 // This is a summation of the number of hit Kmers which fall within
208 // this base group.
209 long totalGroupHits = 0;
210 for (int p=groups[g].lowerCount()-1;p<groups[g].upperCount() && p < positionCounts.length ;p++) {
211 totalGroupCount += totalKmerCounts[p][chars.length-1];
212 totalGroupHits += positionCounts[p];
213 }
214
215 float predicted = expectedProportion * totalGroupCount;
216 // obsExpPositions[g] = (float)(Math.log(totalGroupHits/predicted)/Math.log(2));
217 obsExpPositions[g] = (float)(totalGroupHits/predicted);
218
219 // Now we can run a binomial test to see if there is a significant
220 // deviation from what we expect given the number of observations we've
221 // made
222
223 BinomialDistribution bd = new BinomialDistribution((int)totalGroupCount, expectedProportion);
224 if (totalGroupHits > predicted) {
225 binomialPValues[g] = (float)((1 - bd.cumulativeProbability((int)totalGroupHits)) * Math.pow(4,chars.length));
226 }
227 else {
228 binomialPValues[g] = 1;
229 }
230
231 }
232
233 k.setObsExpPositions(obsExpPositions);
234
235
236 // To keep this we need a p-value below 0.01 and an obs/exp above 5 (actual values are log2 transformed)
237 float lowestPValue = 1;
238 for (int i=0;i<binomialPValues.length;i++) {
239 // if (binomialPValues[i] < 0.01 && obsExpPositions[i] > (Math.log(5)/Math.log(2))) {
240 if (binomialPValues[i] < 0.01 && obsExpPositions[i] > 5) {
241 if (binomialPValues[i]<lowestPValue) {
242 lowestPValue = binomialPValues[i];
243 }
244 }
245 }
246
247 if (lowestPValue < 0.01) {
248 k.setLowestPValue(lowestPValue);
249 unevenKmers.add(k);
250 }
251
252
253 }
254
255 Kmer [] finalKMers = unevenKmers.toArray(new Kmer[0]);
256
257 // We sort by the highest degree of enrichment over the average
258 Arrays.sort(finalKMers);
259
260 // So we don't end up with stupidly long lists of Kmers in the
261 // report we'll only report the top 20
262 if (finalKMers.length > 20) {
263 Kmer [] shortenedKmers = new Kmer [20];
264 for (int i=0;i<shortenedKmers.length;i++) {
265 shortenedKmers[i] = finalKMers[i];
266 }
267
268 finalKMers = shortenedKmers;
269 }
270
271 // Now we take the enrichment positions for the top 6 hits and
272 // record these so we can plot them on a line graph
273 enrichments = new double [Math.min(6, finalKMers.length)][];
274 xLabels = new String[enrichments.length];
275
276 xCategories = new String [groups.length];
277
278 for (int i=0;i<xCategories.length;i++) {
279 xCategories[i] = groups[i].toString();
280 }
281
282 for (int k=0;k<enrichments.length;k++) {
283 enrichments[k] = new double[groups.length];
284
285 float [] obsExpPos = finalKMers[k].getObsExpPositions();
286
287 for (int g=0;g<groups.length;g++) {
288 enrichments[k][g] = obsExpPos[g];
289 if (obsExpPos[g] > maxGraphValue) maxGraphValue = obsExpPos[g];
290 if (obsExpPos[g] < minGraphValue) minGraphValue = obsExpPos[g];
291 }
292
293 xLabels[k] = finalKMers[k].sequence();
294
295 }
296
297 minGraphValue = 0;
298
299 // System.err.println("Max value="+maxGraphValue+" min value="+minGraphValue);
300
301 this.enrichedKmers = finalKMers;
302
303 // Delete the initial data structure so we don't suck up more memory
304 // than we have to.
305 kmers.clear();
306
307 calculated = true;
308 }
309
310
311 public void processSequence(Sequence sequence) {
312 calculated = false;
313
314 /*
315 * The processing done by this module is quite intensive so to speed things
316 * up we don't look at every sequence. Instead we take only 2% of the
317 * submitted sequences and extrapolate from these to the full set in the file.
318 */
319 ++skipCount;
320 if (skipCount % 50 != 0) return;
321
322 /*
323 * This module uses horrible amounts of memory if allowed to store the full
324 * Kmer content for all positions in really long reads (pacbio reads were the
325 * ones which really broke this). We'll therefore limit our read lengths to
326 * 500bp since specific Kmer positions beyond that are not likely to be useful
327 */
328
329 String seq;
330
331 if (sequence.getSequence().length() > 500) {
332 seq = sequence.getSequence().substring(0, 500);
333 }
334 else {
335 seq = sequence.getSequence();
336 }
337
338 if (seq.length() > longestSequence) {
339 longestSequence = seq.length();
340 }
341
342 // Now we go through all of the Kmers to count these
343 for (int kmerSize=MIN_KMER_SIZE;kmerSize<=MAX_KMER_SIZE;kmerSize++) {
344 for (int i=0;i<=seq.length()-kmerSize;i++) {
345
346 String kmer = seq.substring(i, i+kmerSize);
347
348 if (kmer.length() != kmerSize) {
349 throw new IllegalStateException("String length "+kmer.length()+" wasn't the same as the kmer length "+kmerSize);
350 }
351
352 // Add to the counts before skipping Kmers containing Ns (see
353 // explanation in addKmerCount for the reasoning).
354 addKmerCount(i, kmerSize, kmer);
355
356 // Skip Kmers containing N
357 if (kmer.indexOf("N") >=0) continue;
358
359 if (kmers.containsKey(kmer)) {
360 kmers.get(kmer).incrementCount(i);
361 }
362 else {
363 kmers.put(new String(kmer), new Kmer(kmer,i,(seq.length()-kmerSize)+1));
364 }
365
366 }
367 }
368 }
369
370 public void reset () {
371 calculated = false;
372 totalKmerCounts = new long[0][0];
373 longestSequence = 0;
374 skipCount = 0;
375 enrichedKmers = null;
376 kmers.clear();
377 }
378
379 public String description() {
380 return "Identifies short sequences which have uneven representation";
381 }
382
383 public String name() {
384 return "Kmer Content";
385 }
386
387 public boolean raisesError() {
388 if (!calculated) calculateEnrichment();
389
390 // We raise an error if the most enriched kmer is seen more than 100 times
391 // more frequently than we expect.
392
393 if (enrichedKmers.length > 0 && 0-Math.log10(enrichedKmers[0].pValue()) > ModuleConfig.getParam("kmer", "error")) return true;
394 return false;
395 }
396
397 public boolean raisesWarning() {
398 if (!calculated) calculateEnrichment();
399
400 // We raise a warning if there are any enriched kmers
401 if (enrichedKmers.length > 0 && 0-Math.log10(enrichedKmers[0].pValue()) > ModuleConfig.getParam("kmer", "warn")) return true;
402 return false;
403 }
404
405 public void makeReport(HTMLReportArchive report) throws IOException,XMLStreamException {
406 if (!calculated) calculateEnrichment();
407
408 if (enrichedKmers.length > 0) {
409 writeSpecificImage(report, new LineGraph(enrichments, minGraphValue, maxGraphValue, "Position in read (bp)", xLabels, xCategories, "Log2 Obs/Exp"),"kmer_profiles.png", "Kmer graph", Math.max(800, groups.length*15), 600);
410 }
411
412
413 ResultsTable table = new ResultsTable(enrichedKmers);
414
415 XMLStreamWriter xhtml = report.xhtmlStream();
416
417 if (enrichedKmers.length == 0)
418 {
419 xhtml.writeStartElement("p");
420 xhtml.writeCharacters("No overrepresented Kmers");
421 xhtml.writeEndElement();
422 }
423
424 else
425 {
426 super.writeTable(report, table);
427 }
428 }
429
430 private class Kmer implements Comparable<Kmer>{
431
432 private String sequence;
433 private long count = 0;
434 private float lowestPValue = 0;
435 private float [] obsExpPositions = null;
436 private long [] positions = new long[0];
437
438 public Kmer (String sequence, int position, int seqLength) {
439
440 // Do this slightly convoluted dance to try to avoid
441 // keeping the whole original sequence in memory
442 char [] chars = sequence.toCharArray();
443 this.sequence = new String(chars);
444 count = 1;
445 positions = new long[seqLength];
446 ++positions[position];
447 }
448
449 public void incrementCount (int position) {
450 ++count;
451
452 if (position >= positions.length) {
453 long [] newPositions = new long[position+1];
454 for (int i=0;i<positions.length;i++) {
455 newPositions[i] = positions[i];
456 }
457 positions = newPositions;
458 }
459
460 ++positions[position];
461
462 }
463
464 public long [] getPositions () {
465 return positions;
466 }
467
468 public String sequence () {
469 return sequence;
470 }
471
472 public long count () {
473 return count;
474 }
475
476 public void setLowestPValue (float p) {
477 this.lowestPValue = p;
478 }
479
480 public void setObsExpPositions (float [] oePositions) {
481 this.obsExpPositions = oePositions;
482 }
483
484 public float [] getObsExpPositions () {
485 return obsExpPositions;
486 }
487
488 public float pValue () {
489 return lowestPValue;
490 }
491
492 public float maxObsExp () {
493 float max = 0;
494 for (int i=0;i<obsExpPositions.length;i++) {
495 if (obsExpPositions[i]>max) max = obsExpPositions[i];
496 }
497 return max;
498 }
499
500 public int maxPosition () {
501 float max = 0;
502 int position = 0;
503 for (int i=0;i<obsExpPositions.length;i++) {
504 if (obsExpPositions[i]>max) {
505 max = obsExpPositions[i];
506 position = i+1;
507 }
508 }
509
510 if (position == 0) {
511 System.err.println("No value > 0 for "+sequence);
512 position = 1;
513 }
514
515 return position;
516 }
517
518 public int compareTo(Kmer o) {
519 return Float.compare(o.maxObsExp(), maxObsExp());
520 }
521 }
522
523
524 private class ResultsTable extends AbstractTableModel
525 {
526 private static final long serialVersionUID = 1L;
527 private Kmer [] kmers;
528
529 public ResultsTable (Kmer [] kmers) {
530 this.kmers = kmers;
531 }
532
533
534 // Sequence - Count - Obs/Exp
535 public int getColumnCount() {
536 return 5;
537 }
538
539 public int getRowCount() {
540 return kmers.length;
541 }
542
543 public Object getValueAt(int rowIndex, int columnIndex) {
544 switch (columnIndex) {
545 case 0: return kmers[rowIndex].sequence();
546 case 1: return kmers[rowIndex].count()*5;
547 case 2: return kmers[rowIndex].pValue();
548 case 3: return kmers[rowIndex].maxObsExp();
549 case 4: return groups[kmers[rowIndex].maxPosition()-1].toString();
550
551 }
552 return null;
553 }
554
555 public String getColumnName (int columnIndex) {
556 switch (columnIndex) {
557 case 0: return "Sequence";
558 case 1: return "Count";
559 case 2: return "PValue";
560 case 3: return "Obs/Exp Max";
561 case 4: return "Max Obs/Exp Position";
562 }
563 return null;
564 }
565
566 public Class<?> getColumnClass (int columnIndex) {
567 switch (columnIndex) {
568 case 0: return String.class;
569 case 1: return Integer.class;
570 case 2: return Float.class;
571 case 3: return Float.class;
572 case 4: return String.class;
573 }
574 return null;
575
576 }
577 }
578
579 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules;
20
21 import java.awt.BorderLayout;
22 import java.io.IOException;
23 import java.util.Arrays;
24 import java.util.Hashtable;
25 import java.util.Iterator;
26 import java.util.Vector;
27
28 import javax.swing.JLabel;
29 import javax.swing.JPanel;
30 import javax.swing.JScrollPane;
31 import javax.swing.JSplitPane;
32 import javax.swing.JTable;
33 import javax.swing.table.AbstractTableModel;
34 import javax.swing.table.TableModel;
35 import javax.xml.stream.XMLStreamException;
36 import javax.xml.stream.XMLStreamWriter;
37
38 import org.apache.commons.math3.distribution.BinomialDistribution;
39
40
41 import uk.ac.babraham.FastQC.FastQCConfig;
42 import uk.ac.babraham.FastQC.Graphs.BaseGroup;
43 import uk.ac.babraham.FastQC.Graphs.LineGraph;
44 import uk.ac.babraham.FastQC.Report.HTMLReportArchive;
45 import uk.ac.babraham.FastQC.Sequence.Sequence;
46
47 public class KmerContent extends AbstractQCModule {
48
49 private Hashtable<String, Kmer> kmers = new Hashtable<String, Kmer>((int)Math.pow(4, MAX_KMER_SIZE));
50
51 private int longestSequence = 0;
52
53 /* 2D array, first dimension is the position in the sequence, second is Kmer length */
54 private long [][] totalKmerCounts = new long [0][0];
55
56 private long skipCount = 0;
57
58 private static int MIN_KMER_SIZE = 7;
59 private static int MAX_KMER_SIZE = 7;
60
61 public boolean calculated = false;
62
63 // This is the full set of Kmers to be reported
64 private Kmer [] enrichedKmers = null;
65
66 // This is the data for the Kmers which are going to be placed on the graph
67 private double [][] enrichments = null;
68
69 // For the graph we also need to know the scale we need to use on the axes.
70 private double minGraphValue = 0;
71 private double maxGraphValue = 0;
72
73
74 private String [] xCategories = new String[0];
75 private String [] xLabels = new String[0];
76
77 BaseGroup [] groups;
78 public KmerContent () {
79 if (FastQCConfig.getInstance().kmer_size != null) {
80 int kmerSize = FastQCConfig.getInstance().kmer_size;
81 MIN_KMER_SIZE = kmerSize;
82 MAX_KMER_SIZE = kmerSize;
83 }
84 }
85
86 public boolean ignoreFilteredSequences() {
87 return true;
88 }
89
90 public boolean ignoreInReport () {
91 if (ModuleConfig.getParam("kmer", "ignore") > 0) {
92 return true;
93 }
94 return false;
95 }
96
97 public JPanel getResultsPanel() {
98
99 if (!calculated) calculateEnrichment();
100 JPanel returnPanel = new JPanel();
101 returnPanel.setLayout(new BorderLayout());
102 returnPanel.add(new JLabel("Overrepresented Kmers",JLabel.CENTER),BorderLayout.NORTH);
103
104 JSplitPane splitPanel = new JSplitPane(JSplitPane.VERTICAL_SPLIT);
105
106 if (enrichedKmers.length > 0) {
107 TableModel model = new ResultsTable(enrichedKmers);
108 splitPanel.setBottomComponent(new JScrollPane(new JTable(model)));
109 splitPanel.setTopComponent(new LineGraph(enrichments, minGraphValue, maxGraphValue, "Position in read (bp)", xLabels, xCategories, "Log2 Obs/Exp"));
110 returnPanel.add(splitPanel,BorderLayout.CENTER);
111 }
112 else {
113 returnPanel.add(new JLabel("There are no overrepresented Kmers",JLabel.CENTER),BorderLayout.CENTER);
114 }
115
116 return returnPanel;
117 }
118
119 /**
120 * This method simply keeps a count of the number of Kmers of a given size
121 * seen at each position within the run. We can use this later on to calculate
122 * the enrichment of the Kmers we actually count.
123 *
124 * We take in the Kmer sequence even though this isn't used in the total counts
125 * we do this because we don't want to count Kmers with Ns in them, but we do
126 * need to ensure that the data structure is expanded to the right size, and if
127 * we have libraries where later positions are Ns in all sequences then our
128 * data structure ends up too short and we crash.
129 *
130 * @param position Position within the read. 0 indexed
131 * @param kmerLength Actual length of the Kmer analysed
132 */
133 private void addKmerCount (int position,int kmerLength, String kmer) {
134
135
136 if (position >= totalKmerCounts.length) {
137 // We need to expand the array
138 long [][] newCounts = new long[position+1][];
139 for (int i=0;i<totalKmerCounts.length;i++) {
140 newCounts[i] = totalKmerCounts[i];
141 }
142 for (int i=totalKmerCounts.length;i<newCounts.length;i++) {
143 newCounts[i] = new long[MAX_KMER_SIZE];
144 }
145
146 totalKmerCounts = newCounts;
147 }
148
149 if (kmer.indexOf("N") >=0) return;
150
151 ++totalKmerCounts[position][kmerLength-1];
152
153 }
154
155 private synchronized void calculateEnrichment () {
156
157 /*
158 * For each Kmer we work out whether there is a statistically
159 * significant deviation in its coverage at any given position
160 * compared to its average coverage over all positions.
161 */
162
163
164 // We'll be grouping together positions later so make up the groups now
165 groups = BaseGroup.makeBaseGroups((longestSequence-MIN_KMER_SIZE)+1);
166
167 Vector<Kmer>unevenKmers = new Vector<Kmer>();
168
169 Iterator<Kmer> rawKmers = kmers.values().iterator();
170
171 while (rawKmers.hasNext()) {
172 Kmer k = rawKmers.next();
173 char [] chars = k.sequence().toCharArray();
174
175
176 long totalKmerCount = 0;
177
178
179 // This gets us the total number of Kmers of this type in the whole
180 // dataset.
181 for (int i=0;i<totalKmerCounts.length;i++) {
182 totalKmerCount += totalKmerCounts[i][k.sequence().length()-1];
183 }
184
185 // This is the expected proportion of all Kmers which have this
186 // specific Kmer sequence. We no longer make any attempt to judge
187 // overall enrichment or depletion of this sequence since once you
188 // get to longer lengths the distribution isn't flat anyway
189
190 float expectedProportion = k.count/(float)totalKmerCount;
191
192 // We now want to go through each of the positions looking for whether
193 // this Kmer was seen an unexpected number of times compared to what we
194 // expected from the global values
195
196
197 float [] obsExpPositions = new float[groups.length];
198 float [] binomialPValues = new float[groups.length];
199
200 long [] positionCounts = k.getPositions();
201
202 for (int g=0;g<groups.length;g++) {
203 // This is a summation of the number of Kmers of this length which
204 // fall into this base group
205 long totalGroupCount = 0;
206
207 // This is a summation of the number of hit Kmers which fall within
208 // this base group.
209 long totalGroupHits = 0;
210 for (int p=groups[g].lowerCount()-1;p<groups[g].upperCount() && p < positionCounts.length ;p++) {
211 totalGroupCount += totalKmerCounts[p][chars.length-1];
212 totalGroupHits += positionCounts[p];
213 }
214
215 float predicted = expectedProportion * totalGroupCount;
216 // obsExpPositions[g] = (float)(Math.log(totalGroupHits/predicted)/Math.log(2));
217 obsExpPositions[g] = (float)(totalGroupHits/predicted);
218
219 // Now we can run a binomial test to see if there is a significant
220 // deviation from what we expect given the number of observations we've
221 // made
222
223 BinomialDistribution bd = new BinomialDistribution((int)totalGroupCount, expectedProportion);
224 if (totalGroupHits > predicted) {
225 binomialPValues[g] = (float)((1 - bd.cumulativeProbability((int)totalGroupHits)) * Math.pow(4,chars.length));
226 }
227 else {
228 binomialPValues[g] = 1;
229 }
230
231 }
232
233 k.setObsExpPositions(obsExpPositions);
234
235
236 // To keep this we need a p-value below 0.01 and an obs/exp above 5 (actual values are log2 transformed)
237 float lowestPValue = 1;
238 for (int i=0;i<binomialPValues.length;i++) {
239 // if (binomialPValues[i] < 0.01 && obsExpPositions[i] > (Math.log(5)/Math.log(2))) {
240 if (binomialPValues[i] < 0.01 && obsExpPositions[i] > 5) {
241 if (binomialPValues[i]<lowestPValue) {
242 lowestPValue = binomialPValues[i];
243 }
244 }
245 }
246
247 if (lowestPValue < 0.01) {
248 k.setLowestPValue(lowestPValue);
249 unevenKmers.add(k);
250 }
251
252
253 }
254
255 Kmer [] finalKMers = unevenKmers.toArray(new Kmer[0]);
256
257 // We sort by the highest degree of enrichment over the average
258 Arrays.sort(finalKMers);
259
260 // So we don't end up with stupidly long lists of Kmers in the
261 // report we'll only report the top 20
262 if (finalKMers.length > 20) {
263 Kmer [] shortenedKmers = new Kmer [20];
264 for (int i=0;i<shortenedKmers.length;i++) {
265 shortenedKmers[i] = finalKMers[i];
266 }
267
268 finalKMers = shortenedKmers;
269 }
270
271 // Now we take the enrichment positions for the top 6 hits and
272 // record these so we can plot them on a line graph
273 enrichments = new double [Math.min(6, finalKMers.length)][];
274 xLabels = new String[enrichments.length];
275
276 xCategories = new String [groups.length];
277
278 for (int i=0;i<xCategories.length;i++) {
279 xCategories[i] = groups[i].toString();
280 }
281
282 for (int k=0;k<enrichments.length;k++) {
283 enrichments[k] = new double[groups.length];
284
285 float [] obsExpPos = finalKMers[k].getObsExpPositions();
286
287 for (int g=0;g<groups.length;g++) {
288 enrichments[k][g] = obsExpPos[g];
289 if (obsExpPos[g] > maxGraphValue) maxGraphValue = obsExpPos[g];
290 if (obsExpPos[g] < minGraphValue) minGraphValue = obsExpPos[g];
291 }
292
293 xLabels[k] = finalKMers[k].sequence();
294
295 }
296
297 minGraphValue = 0;
298
299 // System.err.println("Max value="+maxGraphValue+" min value="+minGraphValue);
300
301 this.enrichedKmers = finalKMers;
302
303 // Delete the initial data structure so we don't suck up more memory
304 // than we have to.
305 kmers.clear();
306
307 calculated = true;
308 }
309
310
311 public void processSequence(Sequence sequence) {
312 calculated = false;
313
314 /*
315 * The processing done by this module is quite intensive so to speed things
316 * up we don't look at every sequence. Instead we take only 2% of the
317 * submitted sequences and extrapolate from these to the full set in the file.
318 */
319 ++skipCount;
320 if (skipCount % 50 != 0) return;
321
322 /*
323 * This module uses horrible amounts of memory if allowed to store the full
324 * Kmer content for all positions in really long reads (pacbio reads were the
325 * ones which really broke this). We'll therefore limit our read lengths to
326 * 500bp since specific Kmer positions beyond that are not likely to be useful
327 */
328
329 String seq;
330
331 if (sequence.getSequence().length() > 500) {
332 seq = sequence.getSequence().substring(0, 500);
333 }
334 else {
335 seq = sequence.getSequence();
336 }
337
338 if (seq.length() > longestSequence) {
339 longestSequence = seq.length();
340 }
341
342 // Now we go through all of the Kmers to count these
343 for (int kmerSize=MIN_KMER_SIZE;kmerSize<=MAX_KMER_SIZE;kmerSize++) {
344 for (int i=0;i<=seq.length()-kmerSize;i++) {
345
346 String kmer = seq.substring(i, i+kmerSize);
347
348 if (kmer.length() != kmerSize) {
349 throw new IllegalStateException("String length "+kmer.length()+" wasn't the same as the kmer length "+kmerSize);
350 }
351
352 // Add to the counts before skipping Kmers containing Ns (see
353 // explanation in addKmerCount for the reasoning).
354 addKmerCount(i, kmerSize, kmer);
355
356 // Skip Kmers containing N
357 if (kmer.indexOf("N") >=0) continue;
358
359 if (kmers.containsKey(kmer)) {
360 kmers.get(kmer).incrementCount(i);
361 }
362 else {
363 kmers.put(new String(kmer), new Kmer(kmer,i,(seq.length()-kmerSize)+1));
364 }
365
366 }
367 }
368 }
369
370 public void reset () {
371 calculated = false;
372 totalKmerCounts = new long[0][0];
373 longestSequence = 0;
374 skipCount = 0;
375 enrichedKmers = null;
376 kmers.clear();
377 }
378
379 public String description() {
380 return "Identifies short sequences which have uneven representation";
381 }
382
383 public String name() {
384 return "Kmer Content";
385 }
386
387 public boolean raisesError() {
388 if (!calculated) calculateEnrichment();
389
390 // We raise an error if the most enriched kmer is seen more than 100 times
391 // more frequently than we expect.
392
393 if (enrichedKmers.length > 0 && 0-Math.log10(enrichedKmers[0].pValue()) > ModuleConfig.getParam("kmer", "error")) return true;
394 return false;
395 }
396
397 public boolean raisesWarning() {
398 if (!calculated) calculateEnrichment();
399
400 // We raise a warning if there are any enriched kmers
401 if (enrichedKmers.length > 0 && 0-Math.log10(enrichedKmers[0].pValue()) > ModuleConfig.getParam("kmer", "warn")) return true;
402 return false;
403 }
404
405 public void makeReport(HTMLReportArchive report) throws IOException,XMLStreamException {
406 if (!calculated) calculateEnrichment();
407
408 if (enrichedKmers.length > 0) {
409 writeSpecificImage(report, new LineGraph(enrichments, minGraphValue, maxGraphValue, "Position in read (bp)", xLabels, xCategories, "Log2 Obs/Exp"),"kmer_profiles.png", "Kmer graph", Math.max(800, groups.length*15), 600);
410 }
411
412
413 ResultsTable table = new ResultsTable(enrichedKmers);
414
415 XMLStreamWriter xhtml = report.xhtmlStream();
416
417 if (enrichedKmers.length == 0)
418 {
419 xhtml.writeStartElement("p");
420 xhtml.writeCharacters("No overrepresented Kmers");
421 xhtml.writeEndElement();
422 }
423
424 else
425 {
426 super.writeTable(report, table);
427 }
428 }
429
430 private class Kmer implements Comparable<Kmer>{
431
432 private String sequence;
433 private long count = 0;
434 private float lowestPValue = 0;
435 private float [] obsExpPositions = null;
436 private long [] positions = new long[0];
437
438 public Kmer (String sequence, int position, int seqLength) {
439
440 // Do this slightly convoluted dance to try to avoid
441 // keeping the whole original sequence in memory
442 char [] chars = sequence.toCharArray();
443 this.sequence = new String(chars);
444 count = 1;
445 positions = new long[seqLength];
446 ++positions[position];
447 }
448
449 public void incrementCount (int position) {
450 ++count;
451
452 if (position >= positions.length) {
453 long [] newPositions = new long[position+1];
454 for (int i=0;i<positions.length;i++) {
455 newPositions[i] = positions[i];
456 }
457 positions = newPositions;
458 }
459
460 ++positions[position];
461
462 }
463
464 public long [] getPositions () {
465 return positions;
466 }
467
468 public String sequence () {
469 return sequence;
470 }
471
472 public long count () {
473 return count;
474 }
475
476 public void setLowestPValue (float p) {
477 this.lowestPValue = p;
478 }
479
480 public void setObsExpPositions (float [] oePositions) {
481 this.obsExpPositions = oePositions;
482 }
483
484 public float [] getObsExpPositions () {
485 return obsExpPositions;
486 }
487
488 public float pValue () {
489 return lowestPValue;
490 }
491
492 public float maxObsExp () {
493 float max = 0;
494 for (int i=0;i<obsExpPositions.length;i++) {
495 if (obsExpPositions[i]>max) max = obsExpPositions[i];
496 }
497 return max;
498 }
499
500 public int maxPosition () {
501 float max = 0;
502 int position = 0;
503 for (int i=0;i<obsExpPositions.length;i++) {
504 if (obsExpPositions[i]>max) {
505 max = obsExpPositions[i];
506 position = i+1;
507 }
508 }
509
510 if (position == 0) {
511 System.err.println("No value > 0 for "+sequence);
512 position = 1;
513 }
514
515 return position;
516 }
517
518 public int compareTo(Kmer o) {
519 return Float.compare(o.maxObsExp(), maxObsExp());
520 }
521 }
522
523
524 private class ResultsTable extends AbstractTableModel
525 {
526 private static final long serialVersionUID = 1L;
527 private Kmer [] kmers;
528
529 public ResultsTable (Kmer [] kmers) {
530 this.kmers = kmers;
531 }
532
533
534 // Sequence - Count - Obs/Exp
535 public int getColumnCount() {
536 return 5;
537 }
538
539 public int getRowCount() {
540 return kmers.length;
541 }
542
543 public Object getValueAt(int rowIndex, int columnIndex) {
544 switch (columnIndex) {
545 case 0: return kmers[rowIndex].sequence();
546 case 1: return kmers[rowIndex].count()*5;
547 case 2: return kmers[rowIndex].pValue();
548 case 3: return kmers[rowIndex].maxObsExp();
549 case 4: return groups[kmers[rowIndex].maxPosition()-1].toString();
550
551 }
552 return null;
553 }
554
555 public String getColumnName (int columnIndex) {
556 switch (columnIndex) {
557 case 0: return "Sequence";
558 case 1: return "Count";
559 case 2: return "PValue";
560 case 3: return "Obs/Exp Max";
561 case 4: return "Max Obs/Exp Position";
562 }
563 return null;
564 }
565
566 public Class<?> getColumnClass (int columnIndex) {
567 switch (columnIndex) {
568 case 0: return String.class;
569 case 1: return Integer.class;
570 case 2: return Float.class;
571 case 3: return Float.class;
572 case 4: return String.class;
573 }
574 return null;
575
576 }
577 }
578
579 }
0 /**
1 * Copyright Copyright 2013-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules;
20
21 import java.io.BufferedReader;
22 import java.io.FileNotFoundException;
23 import java.io.FileReader;
24 import java.io.InputStream;
25 import java.io.InputStreamReader;
26 import java.util.HashMap;
27
28 import uk.ac.babraham.FastQC.FastQCConfig;
29 import uk.ac.babraham.FastQC.Sequence.Contaminant.ContaminentFinder;
30
31 public class ModuleConfig {
32
33 private static HashMap<String, Double>parameters = readParams();
34
35
36 private static HashMap<String, Double> readParams () {
37
38 HashMap<String, Double>params = new HashMap<String, Double>();
39
40 // Set the defaults to use if we don't have any overrides
41 params.put("duplication:warn",70d);
42 params.put("duplication:error", 50d);
43 params.put("kmer:warn", 2d);
44 params.put("kmer:error",5d);
45 params.put("n_content:warn",5d);
46 params.put("n_content:error",20d);
47 params.put("overrepresented:warn", 0.1);
48 params.put("overrepresented:error",1d);
49 params.put("quality_base_lower:warn",10d);
50 params.put("quality_base_lower:error", 5d);
51 params.put("quality_base_median:warn", 25d);
52 params.put("quality_base_median:error", 20d);
53 params.put("sequence:warn", 10d);
54 params.put("sequence:error",20d);
55 params.put("gc_sequence:warn",15d);
56 params.put("gc_sequence:error",30d);
57 params.put("quality_sequence:warn", 20d);
58 params.put("quality_sequence:error",27d);
59 params.put("tile:warn", 5d);
60 params.put("tile:error",10d);
61 params.put("sequence_length:warn",1d);
62 params.put("sequence_length:error", 1d);
63 params.put("adapter:warn", 5d);
64 params.put("adapter:error", 10d);
65
66 params.put("duplication:ignore",0d);
67 params.put("kmer:ignore",0d);
68 params.put("n_content:ignore",0d);
69 params.put("overrepresented:ignore",0d);
70 params.put("quality_base:ignore",0d);
71 params.put("sequence:ignore",0d);
72 params.put("gc_quality:ignore",0d);
73 params.put("quality_sequence:ignore",0d);
74 params.put("tile:ignore",0d);
75 params.put("sequence_length:ignore",0d);
76 params.put("adapter:ignore",0d);
77
78 // Now read the config file to see if there are updated values for any of these.
79
80 BufferedReader br;
81
82 try {
83 if (FastQCConfig.getInstance().limits_file == null) {
84 InputStream rsrc=ContaminentFinder.class.getResourceAsStream("/Configuration/limits.txt");
85 if (rsrc==null) throw new FileNotFoundException("cannot find Configuration/limits.txt");
86 br =new BufferedReader(new InputStreamReader(rsrc));
87 }
88 else {
89 br=new BufferedReader(new FileReader(FastQCConfig.getInstance().limits_file));
90 }
91
92 String line;
93 while ((line = br.readLine()) != null) {
94
95 if (line.startsWith("#")) continue;
96
97 if (line.trim().length() == 0) continue;
98
99 String [] sections = line.split("\\s+");
100 if (sections.length != 3) {
101 System.err.println("Config line '"+line+"' didn't contain the 3 required sections");
102 }
103
104 if (!(sections[1].equals("warn") || sections[1].equals("error") || sections[1].equals("ignore"))) {
105 System.err.println("Second config field must be error, warn or ignore, not '"+sections[1]+"'");
106 continue;
107 }
108
109 double value;
110 try {
111 value = Double.parseDouble(sections[2]);
112 }
113 catch (NumberFormatException nfe) {
114 System.err.println("Value "+sections[2]+" didn't look like a number");
115 continue;
116 }
117
118 String key = sections[0]+":"+sections[1];
119 params.put(key, value);
120
121
122 }
123
124 }
125 catch (Exception e) {
126 e.printStackTrace();
127 }
128
129
130
131
132 return params;
133
134 }
135
136 public static Double getParam(String module, String level) {
137
138 if (!(level.equals("warn") || level.equals("error") || level.equals("ignore"))) {
139 throw new IllegalArgumentException("Level must be warn, error or ignore");
140 }
141
142 String key = module+":"+level;
143
144 if (! parameters.containsKey(key)) {
145 throw new IllegalArgumentException("No key called "+key+" in the config data");
146 }
147
148 return parameters.get(key);
149
150 }
151
152
153 }
0 /**
1 * Copyright Copyright 2013-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules;
20
21 import java.io.BufferedReader;
22 import java.io.FileNotFoundException;
23 import java.io.FileReader;
24 import java.io.InputStream;
25 import java.io.InputStreamReader;
26 import java.util.HashMap;
27
28 import uk.ac.babraham.FastQC.FastQCConfig;
29 import uk.ac.babraham.FastQC.Sequence.Contaminant.ContaminentFinder;
30
31 public class ModuleConfig {
32
33 private static HashMap<String, Double>parameters = readParams();
34
35
36 private static HashMap<String, Double> readParams () {
37
38 HashMap<String, Double>params = new HashMap<String, Double>();
39
40 // Set the defaults to use if we don't have any overrides
41 params.put("duplication:warn",70d);
42 params.put("duplication:error", 50d);
43 params.put("kmer:warn", 2d);
44 params.put("kmer:error",5d);
45 params.put("n_content:warn",5d);
46 params.put("n_content:error",20d);
47 params.put("overrepresented:warn", 0.1);
48 params.put("overrepresented:error",1d);
49 params.put("quality_base_lower:warn",10d);
50 params.put("quality_base_lower:error", 5d);
51 params.put("quality_base_median:warn", 25d);
52 params.put("quality_base_median:error", 20d);
53 params.put("sequence:warn", 10d);
54 params.put("sequence:error",20d);
55 params.put("gc_sequence:warn",15d);
56 params.put("gc_sequence:error",30d);
57 params.put("quality_sequence:warn", 20d);
58 params.put("quality_sequence:error",27d);
59 params.put("tile:warn", 5d);
60 params.put("tile:error",10d);
61 params.put("sequence_length:warn",1d);
62 params.put("sequence_length:error", 1d);
63 params.put("adapter:warn", 5d);
64 params.put("adapter:error", 10d);
65
66 params.put("duplication:ignore",0d);
67 params.put("kmer:ignore",0d);
68 params.put("n_content:ignore",0d);
69 params.put("overrepresented:ignore",0d);
70 params.put("quality_base:ignore",0d);
71 params.put("sequence:ignore",0d);
72 params.put("gc_quality:ignore",0d);
73 params.put("quality_sequence:ignore",0d);
74 params.put("tile:ignore",0d);
75 params.put("sequence_length:ignore",0d);
76 params.put("adapter:ignore",0d);
77
78 // Now read the config file to see if there are updated values for any of these.
79
80 BufferedReader br;
81
82 try {
83 if (FastQCConfig.getInstance().limits_file == null) {
84 InputStream rsrc=ContaminentFinder.class.getResourceAsStream("/Configuration/limits.txt");
85 if (rsrc==null) throw new FileNotFoundException("cannot find Configuration/limits.txt");
86 br =new BufferedReader(new InputStreamReader(rsrc));
87 }
88 else {
89 br=new BufferedReader(new FileReader(FastQCConfig.getInstance().limits_file));
90 }
91
92 String line;
93 while ((line = br.readLine()) != null) {
94
95 if (line.startsWith("#")) continue;
96
97 if (line.trim().length() == 0) continue;
98
99 String [] sections = line.split("\\s+");
100 if (sections.length != 3) {
101 System.err.println("Config line '"+line+"' didn't contain the 3 required sections");
102 }
103
104 if (!(sections[1].equals("warn") || sections[1].equals("error") || sections[1].equals("ignore"))) {
105 System.err.println("Second config field must be error, warn or ignore, not '"+sections[1]+"'");
106 continue;
107 }
108
109 double value;
110 try {
111 value = Double.parseDouble(sections[2]);
112 }
113 catch (NumberFormatException nfe) {
114 System.err.println("Value "+sections[2]+" didn't look like a number");
115 continue;
116 }
117
118 String key = sections[0]+":"+sections[1];
119 params.put(key, value);
120
121
122 }
123
124 }
125 catch (Exception e) {
126 e.printStackTrace();
127 }
128
129
130
131
132 return params;
133
134 }
135
136 public static Double getParam(String module, String level) {
137
138 if (!(level.equals("warn") || level.equals("error") || level.equals("ignore"))) {
139 throw new IllegalArgumentException("Level must be warn, error or ignore");
140 }
141
142 String key = module+":"+level;
143
144 if (! parameters.containsKey(key)) {
145 throw new IllegalArgumentException("No key called "+key+" in the config data");
146 }
147
148 return parameters.get(key);
149
150 }
151
152
153 }
0 /**
1 * Copyright Copyright 2014-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules;
20
21 public class ModuleFactory {
22
23 public static QCModule [] getStandardModuleList () {
24
25 OverRepresentedSeqs os = new OverRepresentedSeqs();
26
27 QCModule [] module_list = new QCModule [] {
28 new BasicStats(),
29 new PerBaseQualityScores(),
30 new PerTileQualityScores(),
31 new PerSequenceQualityScores(),
32 new PerBaseSequenceContent(),
33 new PerSequenceGCContent(),
34 new NContent(),
35 new SequenceLengthDistribution(),
36 os.duplicationLevelModule(),
37 os,
38 new AdapterContent(),
39 new KmerContent(),
40 };
41
42 return (module_list);
43 }
44
45 }
0 /**
1 * Copyright Copyright 2014-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules;
20
21 public class ModuleFactory {
22
23 public static QCModule [] getStandardModuleList () {
24
25 OverRepresentedSeqs os = new OverRepresentedSeqs();
26
27 QCModule [] module_list = new QCModule [] {
28 new BasicStats(),
29 new PerBaseQualityScores(),
30 new PerTileQualityScores(),
31 new PerSequenceQualityScores(),
32 new PerBaseSequenceContent(),
33 new PerSequenceGCContent(),
34 new NContent(),
35 new SequenceLengthDistribution(),
36 os.duplicationLevelModule(),
37 os,
38 new AdapterContent(),
39 new KmerContent(),
40 };
41
42 return (module_list);
43 }
44
45 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules;
20
21 import java.io.IOException;
22
23 import javax.swing.JPanel;
24 import javax.xml.stream.XMLStreamException;
25
26 import uk.ac.babraham.FastQC.Graphs.BaseGroup;
27 import uk.ac.babraham.FastQC.Graphs.LineGraph;
28 import uk.ac.babraham.FastQC.Report.HTMLReportArchive;
29 import uk.ac.babraham.FastQC.Sequence.Sequence;
30
31 public class NContent extends AbstractQCModule {
32
33 public long [] nCounts = new long [0];
34 public long [] notNCounts = new long [0];
35 public boolean calculated = false;
36 public double [] percentages = null;
37 public String [] xCategories = new String[0];
38
39 public JPanel getResultsPanel() {
40
41 if (!calculated) getPercentages();
42 return new LineGraph(new double [][] {percentages}, 0d, 100d, "Position in read (bp)",new String [] {"%N"}, xCategories, "N content across all bases");
43 }
44
45 public boolean ignoreFilteredSequences() {
46 return true;
47 }
48
49 public boolean ignoreInReport () {
50 if (ModuleConfig.getParam("n_content", "ignore") > 0) {
51 return true;
52 }
53 return false;
54 }
55
56 private synchronized void getPercentages () {
57
58 BaseGroup [] groups = BaseGroup.makeBaseGroups(nCounts.length);
59
60 xCategories = new String[groups.length];
61
62 percentages = new double [groups.length];
63
64 long total;
65 long nCount;
66
67 for (int i=0;i<groups.length;i++) {
68
69 xCategories[i] = groups[i].toString();
70
71 nCount = 0;
72 total = 0;
73
74 for (int bp=groups[i].lowerCount()-1;bp<groups[i].upperCount();bp++) {
75 nCount += nCounts[bp];
76 total += nCounts[bp];
77 total += notNCounts[bp];
78 }
79
80 percentages[i] = 100*(nCount/(double)total);
81 }
82
83 calculated = true;
84
85 }
86
87 public void processSequence(Sequence sequence) {
88 calculated = false;
89 char [] seq = sequence.getSequence().toCharArray();
90 if (nCounts.length < seq.length) {
91 // We need to expand the size of the data structures
92
93 long [] nCountsNew = new long [seq.length];
94 long [] notNCountsNew = new long [seq.length];
95
96 for (int i=0;i<nCounts.length;i++) {
97 nCountsNew[i] = nCounts[i];
98 notNCountsNew[i] = notNCounts[i];
99 }
100
101 nCounts = nCountsNew;
102 notNCounts = notNCountsNew;
103 }
104
105 for (int i=0;i<seq.length;i++) {
106 if (seq[i] == 'N') {
107 ++nCounts[i];
108 }
109 else {
110 ++notNCounts[i];
111 }
112 }
113
114 }
115
116 public void reset () {
117 nCounts = new long[0];
118 notNCounts = new long[0];
119 }
120
121 public String description() {
122 return "Shows the percentage of bases at each position which are not being called";
123 }
124
125 public String name() {
126 return "Per base N content";
127 }
128
129 public boolean raisesError() {
130 if (!calculated) getPercentages();
131 for (int i=0;i<percentages.length;i++) {
132 if (percentages[i] > ModuleConfig.getParam("n_content", "error")) {
133 return true;
134 }
135 }
136 return false;
137 }
138
139 public boolean raisesWarning() {
140 if (!calculated) getPercentages();
141 for (int i=0;i<percentages.length;i++) {
142 if (percentages[i] > ModuleConfig.getParam("n_content", "warn")) {
143 return true;
144 }
145 }
146 return false;
147 }
148
149 public void makeReport(HTMLReportArchive report) throws XMLStreamException,IOException {
150 if (!calculated) getPercentages();
151
152 writeDefaultImage(report, "per_base_n_content.png", "N content graph", Math.max(800, percentages.length*15), 600);
153
154 StringBuffer sb = report.dataDocument();
155 sb.append("#Base\tN-Count\n");
156 for (int i=0;i<xCategories.length;i++) {
157 sb.append(xCategories[i]);
158 sb.append("\t");
159 sb.append(percentages[i]);
160 sb.append("\n");
161 }
162 }
163
164 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules;
20
21 import java.io.IOException;
22
23 import javax.swing.JPanel;
24 import javax.xml.stream.XMLStreamException;
25
26 import uk.ac.babraham.FastQC.Graphs.BaseGroup;
27 import uk.ac.babraham.FastQC.Graphs.LineGraph;
28 import uk.ac.babraham.FastQC.Report.HTMLReportArchive;
29 import uk.ac.babraham.FastQC.Sequence.Sequence;
30
31 public class NContent extends AbstractQCModule {
32
33 public long [] nCounts = new long [0];
34 public long [] notNCounts = new long [0];
35 public boolean calculated = false;
36 public double [] percentages = null;
37 public String [] xCategories = new String[0];
38
39 public JPanel getResultsPanel() {
40
41 if (!calculated) getPercentages();
42 return new LineGraph(new double [][] {percentages}, 0d, 100d, "Position in read (bp)",new String [] {"%N"}, xCategories, "N content across all bases");
43 }
44
45 public boolean ignoreFilteredSequences() {
46 return true;
47 }
48
49 public boolean ignoreInReport () {
50 if (ModuleConfig.getParam("n_content", "ignore") > 0) {
51 return true;
52 }
53 return false;
54 }
55
56 private synchronized void getPercentages () {
57
58 BaseGroup [] groups = BaseGroup.makeBaseGroups(nCounts.length);
59
60 xCategories = new String[groups.length];
61
62 percentages = new double [groups.length];
63
64 long total;
65 long nCount;
66
67 for (int i=0;i<groups.length;i++) {
68
69 xCategories[i] = groups[i].toString();
70
71 nCount = 0;
72 total = 0;
73
74 for (int bp=groups[i].lowerCount()-1;bp<groups[i].upperCount();bp++) {
75 nCount += nCounts[bp];
76 total += nCounts[bp];
77 total += notNCounts[bp];
78 }
79
80 percentages[i] = 100*(nCount/(double)total);
81 }
82
83 calculated = true;
84
85 }
86
87 public void processSequence(Sequence sequence) {
88 calculated = false;
89 char [] seq = sequence.getSequence().toCharArray();
90 if (nCounts.length < seq.length) {
91 // We need to expand the size of the data structures
92
93 long [] nCountsNew = new long [seq.length];
94 long [] notNCountsNew = new long [seq.length];
95
96 for (int i=0;i<nCounts.length;i++) {
97 nCountsNew[i] = nCounts[i];
98 notNCountsNew[i] = notNCounts[i];
99 }
100
101 nCounts = nCountsNew;
102 notNCounts = notNCountsNew;
103 }
104
105 for (int i=0;i<seq.length;i++) {
106 if (seq[i] == 'N') {
107 ++nCounts[i];
108 }
109 else {
110 ++notNCounts[i];
111 }
112 }
113
114 }
115
116 public void reset () {
117 nCounts = new long[0];
118 notNCounts = new long[0];
119 }
120
121 public String description() {
122 return "Shows the percentage of bases at each position which are not being called";
123 }
124
125 public String name() {
126 return "Per base N content";
127 }
128
129 public boolean raisesError() {
130 if (!calculated) getPercentages();
131 for (int i=0;i<percentages.length;i++) {
132 if (percentages[i] > ModuleConfig.getParam("n_content", "error")) {
133 return true;
134 }
135 }
136 return false;
137 }
138
139 public boolean raisesWarning() {
140 if (!calculated) getPercentages();
141 for (int i=0;i<percentages.length;i++) {
142 if (percentages[i] > ModuleConfig.getParam("n_content", "warn")) {
143 return true;
144 }
145 }
146 return false;
147 }
148
149 public void makeReport(HTMLReportArchive report) throws XMLStreamException,IOException {
150 if (!calculated) getPercentages();
151
152 writeDefaultImage(report, "per_base_n_content.png", "N content graph", Math.max(800, percentages.length*15), 600);
153
154 StringBuffer sb = report.dataDocument();
155 sb.append("#Base\tN-Count\n");
156 for (int i=0;i<xCategories.length;i++) {
157 sb.append(xCategories[i]);
158 sb.append("\t");
159 sb.append(percentages[i]);
160 sb.append("\n");
161 }
162 }
163
164 }
00 /**
1 * Copyright Copyright 2010-15 Simon Andrews
1 * Copyright Copyright 2010-17 Simon Andrews
22 *
33 * This file is part of FastQC.
44 *
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules;
20
21 import java.io.IOException;
22
23 import javax.swing.JPanel;
24 import javax.xml.stream.XMLStreamException;
25
26 import uk.ac.babraham.FastQC.Graphs.BaseGroup;
27 import uk.ac.babraham.FastQC.Graphs.QualityBoxPlot;
28 import uk.ac.babraham.FastQC.Report.HTMLReportArchive;
29 import uk.ac.babraham.FastQC.Sequence.Sequence;
30 import uk.ac.babraham.FastQC.Sequence.QualityEncoding.PhredEncoding;
31 import uk.ac.babraham.FastQC.Utilities.QualityCount;
32
33 public class PerBaseQualityScores extends AbstractQCModule {
34
35 public QualityCount [] qualityCounts = new QualityCount[0];
36 double [] means = null;
37 double [] medians = null;
38 double [] lowerQuartile = null;
39 double [] upperQuartile = null;
40 double [] lowest = null;
41 double [] highest = null;
42 String [] xLabels;
43 int low = 0;
44 int high = 0;
45 PhredEncoding encodingScheme;
46 private boolean calculated = false;
47
48 public JPanel getResultsPanel() {
49
50 if (!calculated) getPercentages();
51
52 return new QualityBoxPlot(means,medians,lowest,highest,lowerQuartile,upperQuartile, low, high, 2d, xLabels, "Quality scores across all bases ("+encodingScheme+" encoding)");
53 }
54
55 public boolean ignoreFilteredSequences() {
56 return true;
57 }
58
59 public boolean ignoreInReport () {
60 // We don't show this if there is no quality data.
61 if (ModuleConfig.getParam("quality_base", "ignore") > 0 || qualityCounts.length == 0) {
62 return true;
63 }
64 return false;
65 }
66
67 private synchronized void getPercentages () {
68
69 char [] range = calculateOffsets();
70 encodingScheme = PhredEncoding.getFastQEncodingOffset(range[0]);
71 low = 0;
72 high = range[1] - encodingScheme.offset();
73 if (high < 35) {
74 high = 35;
75 }
76
77 BaseGroup [] groups = BaseGroup.makeBaseGroups(qualityCounts.length);
78
79 means = new double[groups.length];
80 medians = new double[groups.length];
81 lowest = new double[groups.length];
82 highest = new double[groups.length];
83 lowerQuartile = new double[groups.length];
84 upperQuartile = new double[groups.length];
85 xLabels = new String[groups.length];
86
87 for (int i=0;i<groups.length;i++) {
88 xLabels[i] = groups[i].toString();
89 int minBase = groups[i].lowerCount();
90 int maxBase = groups[i].upperCount();
91 lowest[i] = getPercentile(minBase, maxBase, encodingScheme.offset(), 10);
92 highest[i] = getPercentile(minBase, maxBase, encodingScheme.offset(), 90);
93 means[i] = getMean(minBase,maxBase,encodingScheme.offset());
94 medians[i] = getPercentile(minBase, maxBase, encodingScheme.offset(), 50);
95 lowerQuartile[i] = getPercentile(minBase, maxBase, encodingScheme.offset(), 25);
96 upperQuartile[i] = getPercentile(minBase, maxBase, encodingScheme.offset(), 75);
97 }
98
99 calculated = true;
100
101 }
102
103 private char [] calculateOffsets () {
104 // Works out from the set of chars what is the most
105 // likely encoding scale for this file.
106
107 char minChar = 0;
108 char maxChar = 0;
109
110 for (int q=0;q<qualityCounts.length;q++) {
111 if (q == 0) {
112 minChar = qualityCounts[q].getMinChar();
113 maxChar = qualityCounts[q].getMaxChar();
114 }
115 else {
116 if (qualityCounts[q].getMinChar() < minChar) {
117 minChar = qualityCounts[q].getMinChar();
118 }
119 if (qualityCounts[q].getMaxChar() > maxChar) {
120 maxChar = qualityCounts[q].getMaxChar();
121 }
122 }
123 }
124
125 return new char[] {minChar,maxChar};
126 }
127
128 public void processSequence(Sequence sequence) {
129
130 calculated = false;
131 char [] qual = sequence.getQualityString().toCharArray();
132 if (qualityCounts.length < qual.length) {
133
134 QualityCount [] qualityCountsNew = new QualityCount[qual.length];
135
136 for (int i=0;i<qualityCounts.length;i++) {
137 qualityCountsNew[i] = qualityCounts[i];
138 }
139 for (int i=qualityCounts.length;i<qualityCountsNew.length;i++) {
140 qualityCountsNew[i] = new QualityCount();
141 }
142 qualityCounts = qualityCountsNew;
143
144 }
145
146 for (int i=0;i<qual.length;i++) {
147 qualityCounts[i].addValue(qual[i]);
148 }
149
150 }
151
152 public void reset () {
153 qualityCounts = new QualityCount[0];
154 }
155
156 public String description() {
157 return "Shows the Quality scores of all bases at a given position in a sequencing run";
158 }
159
160 public String name() {
161 return "Per base sequence quality";
162 }
163
164 public boolean raisesError() {
165 if (!calculated) getPercentages();
166
167 for (int i=0;i<lowerQuartile.length;i++) {
168 if (Double.isNaN(lowerQuartile[i])) {
169 // There wasn't enough data for this group to make an assessment
170 continue;
171 }
172 if (lowerQuartile[i] < ModuleConfig.getParam("quality_base_lower", "error") || medians[i] < ModuleConfig.getParam("quality_base_median", "error")) {
173 return true;
174 }
175 }
176 return false;
177 }
178
179 public boolean raisesWarning() {
180 if (!calculated) getPercentages();
181
182 for (int i=0;i<lowerQuartile.length;i++) {
183 if (Double.isNaN(lowerQuartile[i])) {
184 // There wasn't enough data for this group to make an assessment
185 continue;
186 }
187 if (lowerQuartile[i] < ModuleConfig.getParam("quality_base_lower", "warn") || medians[i] < ModuleConfig.getParam("quality_base_median", "warn")) {
188 return true;
189 }
190 }
191 return false;
192 }
193
194 public void makeReport(HTMLReportArchive report) throws IOException,XMLStreamException
195 {
196 if (!calculated) getPercentages();
197
198 writeDefaultImage(report, "per_base_quality.png", "Per base quality graph", Math.max(800, means.length*15), 600);
199
200 StringBuffer sb = report.dataDocument();
201 sb.append("#Base\tMean\tMedian\tLower Quartile\tUpper Quartile\t10th Percentile\t90th Percentile\n");
202 for (int i=0;i<means.length;i++) {
203 sb.append(xLabels[i]);
204 sb.append("\t");
205
206 sb.append(means[i]);
207 sb.append("\t");
208
209 sb.append(medians[i]);
210 sb.append("\t");
211
212 sb.append(lowerQuartile[i]);
213 sb.append("\t");
214
215 sb.append(upperQuartile[i]);
216 sb.append("\t");
217
218 sb.append(lowest[i]);
219 sb.append("\t");
220
221 sb.append(highest[i]);
222
223 sb.append("\n");
224 }
225 }
226
227 private double getPercentile (int minbp, int maxbp, int offset, int percentile) {
228 int count = 0;
229 double total = 0;
230
231 for (int i=minbp-1;i<maxbp;i++) {
232 if (qualityCounts[i].getTotalCount() > 100) {
233 count++;
234 total += qualityCounts[i].getPercentile(offset, percentile);
235 }
236 }
237
238 if (count > 0) {
239 return total/count;
240 }
241 return Double.NaN;
242
243 }
244
245 private double getMean (int minbp, int maxbp, int offset) {
246 int count = 0;
247 double total = 0;
248
249 for (int i=minbp-1;i<maxbp;i++) {
250 if (qualityCounts[i].getTotalCount() > 0) {
251 count++;
252 total += qualityCounts[i].getMean(offset);
253 }
254 }
255
256 if (count > 0) {
257 return total/count;
258 }
259 return 0;
260
261 }
262
263 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules;
20
21 import java.io.IOException;
22
23 import javax.swing.JPanel;
24 import javax.xml.stream.XMLStreamException;
25
26 import uk.ac.babraham.FastQC.Graphs.BaseGroup;
27 import uk.ac.babraham.FastQC.Graphs.QualityBoxPlot;
28 import uk.ac.babraham.FastQC.Report.HTMLReportArchive;
29 import uk.ac.babraham.FastQC.Sequence.Sequence;
30 import uk.ac.babraham.FastQC.Sequence.QualityEncoding.PhredEncoding;
31 import uk.ac.babraham.FastQC.Utilities.QualityCount;
32
33 public class PerBaseQualityScores extends AbstractQCModule {
34
35 public QualityCount [] qualityCounts = new QualityCount[0];
36 double [] means = null;
37 double [] medians = null;
38 double [] lowerQuartile = null;
39 double [] upperQuartile = null;
40 double [] lowest = null;
41 double [] highest = null;
42 String [] xLabels;
43 int low = 0;
44 int high = 0;
45 PhredEncoding encodingScheme;
46 private boolean calculated = false;
47
48 public JPanel getResultsPanel() {
49
50 if (!calculated) getPercentages();
51
52 return new QualityBoxPlot(means,medians,lowest,highest,lowerQuartile,upperQuartile, low, high, 2d, xLabels, "Quality scores across all bases ("+encodingScheme+" encoding)");
53 }
54
55 public boolean ignoreFilteredSequences() {
56 return true;
57 }
58
59 public boolean ignoreInReport () {
60 // We don't show this if there is no quality data.
61 if (ModuleConfig.getParam("quality_base", "ignore") > 0 || qualityCounts.length == 0) {
62 return true;
63 }
64 return false;
65 }
66
67 private synchronized void getPercentages () {
68
69 char [] range = calculateOffsets();
70 encodingScheme = PhredEncoding.getFastQEncodingOffset(range[0]);
71 low = 0;
72 high = range[1] - encodingScheme.offset();
73 if (high < 35) {
74 high = 35;
75 }
76
77 BaseGroup [] groups = BaseGroup.makeBaseGroups(qualityCounts.length);
78
79 means = new double[groups.length];
80 medians = new double[groups.length];
81 lowest = new double[groups.length];
82 highest = new double[groups.length];
83 lowerQuartile = new double[groups.length];
84 upperQuartile = new double[groups.length];
85 xLabels = new String[groups.length];
86
87 for (int i=0;i<groups.length;i++) {
88 xLabels[i] = groups[i].toString();
89 int minBase = groups[i].lowerCount();
90 int maxBase = groups[i].upperCount();
91 lowest[i] = getPercentile(minBase, maxBase, encodingScheme.offset(), 10);
92 highest[i] = getPercentile(minBase, maxBase, encodingScheme.offset(), 90);
93 means[i] = getMean(minBase,maxBase,encodingScheme.offset());
94 medians[i] = getPercentile(minBase, maxBase, encodingScheme.offset(), 50);
95 lowerQuartile[i] = getPercentile(minBase, maxBase, encodingScheme.offset(), 25);
96 upperQuartile[i] = getPercentile(minBase, maxBase, encodingScheme.offset(), 75);
97 }
98
99 calculated = true;
100
101 }
102
103 private char [] calculateOffsets () {
104 // Works out from the set of chars what is the most
105 // likely encoding scale for this file.
106
107 char minChar = 0;
108 char maxChar = 0;
109
110 for (int q=0;q<qualityCounts.length;q++) {
111 if (q == 0) {
112 minChar = qualityCounts[q].getMinChar();
113 maxChar = qualityCounts[q].getMaxChar();
114 }
115 else {
116 if (qualityCounts[q].getMinChar() < minChar) {
117 minChar = qualityCounts[q].getMinChar();
118 }
119 if (qualityCounts[q].getMaxChar() > maxChar) {
120 maxChar = qualityCounts[q].getMaxChar();
121 }
122 }
123 }
124
125 return new char[] {minChar,maxChar};
126 }
127
128 public void processSequence(Sequence sequence) {
129
130 calculated = false;
131 char [] qual = sequence.getQualityString().toCharArray();
132 if (qualityCounts.length < qual.length) {
133
134 QualityCount [] qualityCountsNew = new QualityCount[qual.length];
135
136 for (int i=0;i<qualityCounts.length;i++) {
137 qualityCountsNew[i] = qualityCounts[i];
138 }
139 for (int i=qualityCounts.length;i<qualityCountsNew.length;i++) {
140 qualityCountsNew[i] = new QualityCount();
141 }
142 qualityCounts = qualityCountsNew;
143
144 }
145
146 for (int i=0;i<qual.length;i++) {
147 qualityCounts[i].addValue(qual[i]);
148 }
149
150 }
151
152 public void reset () {
153 qualityCounts = new QualityCount[0];
154 }
155
156 public String description() {
157 return "Shows the Quality scores of all bases at a given position in a sequencing run";
158 }
159
160 public String name() {
161 return "Per base sequence quality";
162 }
163
164 public boolean raisesError() {
165 if (!calculated) getPercentages();
166
167 for (int i=0;i<lowerQuartile.length;i++) {
168 if (Double.isNaN(lowerQuartile[i])) {
169 // There wasn't enough data for this group to make an assessment
170 continue;
171 }
172 if (lowerQuartile[i] < ModuleConfig.getParam("quality_base_lower", "error") || medians[i] < ModuleConfig.getParam("quality_base_median", "error")) {
173 return true;
174 }
175 }
176 return false;
177 }
178
179 public boolean raisesWarning() {
180 if (!calculated) getPercentages();
181
182 for (int i=0;i<lowerQuartile.length;i++) {
183 if (Double.isNaN(lowerQuartile[i])) {
184 // There wasn't enough data for this group to make an assessment
185 continue;
186 }
187 if (lowerQuartile[i] < ModuleConfig.getParam("quality_base_lower", "warn") || medians[i] < ModuleConfig.getParam("quality_base_median", "warn")) {
188 return true;
189 }
190 }
191 return false;
192 }
193
194 public void makeReport(HTMLReportArchive report) throws IOException,XMLStreamException
195 {
196 if (!calculated) getPercentages();
197
198 writeDefaultImage(report, "per_base_quality.png", "Per base quality graph", Math.max(800, means.length*15), 600);
199
200 StringBuffer sb = report.dataDocument();
201 sb.append("#Base\tMean\tMedian\tLower Quartile\tUpper Quartile\t10th Percentile\t90th Percentile\n");
202 for (int i=0;i<means.length;i++) {
203 sb.append(xLabels[i]);
204 sb.append("\t");
205
206 sb.append(means[i]);
207 sb.append("\t");
208
209 sb.append(medians[i]);
210 sb.append("\t");
211
212 sb.append(lowerQuartile[i]);
213 sb.append("\t");
214
215 sb.append(upperQuartile[i]);
216 sb.append("\t");
217
218 sb.append(lowest[i]);
219 sb.append("\t");
220
221 sb.append(highest[i]);
222
223 sb.append("\n");
224 }
225 }
226
227 private double getPercentile (int minbp, int maxbp, int offset, int percentile) {
228 int count = 0;
229 double total = 0;
230
231 for (int i=minbp-1;i<maxbp;i++) {
232 if (qualityCounts[i].getTotalCount() > 100) {
233 count++;
234 total += qualityCounts[i].getPercentile(offset, percentile);
235 }
236 }
237
238 if (count > 0) {
239 return total/count;
240 }
241 return Double.NaN;
242
243 }
244
245 private double getMean (int minbp, int maxbp, int offset) {
246 int count = 0;
247 double total = 0;
248
249 for (int i=minbp-1;i<maxbp;i++) {
250 if (qualityCounts[i].getTotalCount() > 0) {
251 count++;
252 total += qualityCounts[i].getMean(offset);
253 }
254 }
255
256 if (count > 0) {
257 return total/count;
258 }
259 return 0;
260
261 }
262
263 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules;
20
21 import java.io.IOException;
22
23 import javax.swing.JPanel;
24 import javax.xml.stream.XMLStreamException;
25
26 import uk.ac.babraham.FastQC.Graphs.BaseGroup;
27 import uk.ac.babraham.FastQC.Graphs.LineGraph;
28 import uk.ac.babraham.FastQC.Report.HTMLReportArchive;
29 import uk.ac.babraham.FastQC.Sequence.Sequence;
30
31 public class PerBaseSequenceContent extends AbstractQCModule {
32
33 public long [] gCounts = new long [0];
34 public long [] aCounts = new long [0];
35 public long [] cCounts = new long [0];
36 public long [] tCounts = new long [0];
37 private double [][] percentages = null;
38 private String [] xCategories = new String[0];
39 private boolean calculated = false;
40
41
42 public JPanel getResultsPanel() {
43
44 if (!calculated) getPercentages();
45
46 return new LineGraph(percentages, 0d, 100d, "Position in read (bp)", new String [] {"%T","%C","%A","%G"}, xCategories, "Sequence content across all bases");
47 }
48
49 public boolean ignoreFilteredSequences() {
50 return true;
51 }
52
53 public boolean ignoreInReport () {
54 if (ModuleConfig.getParam("sequence", "ignore") > 0) {
55 return true;
56 }
57 return false;
58 }
59
60 private synchronized void getPercentages () {
61
62 BaseGroup [] groups = BaseGroup.makeBaseGroups(gCounts.length);
63
64 xCategories = new String[groups.length];
65
66
67 double [] gPercent = new double[groups.length];
68 double [] aPercent = new double[groups.length];
69 double [] tPercent = new double[groups.length];
70 double [] cPercent = new double[groups.length];
71
72 long total;
73 long gCount;
74 long aCount;
75 long tCount;
76 long cCount;
77
78 for (int i=0;i<groups.length;i++) {
79
80 xCategories[i] = groups[i].toString();
81
82 gCount = 0;
83 aCount = 0;
84 tCount = 0;
85 cCount = 0;
86 total = 0;
87
88 for (int bp=groups[i].lowerCount()-1;bp<groups[i].upperCount();bp++) {
89
90 total += gCounts[bp];
91 total += cCounts[bp];
92 total += aCounts[bp];
93 total += tCounts[bp];
94
95 aCount += aCounts[bp];
96 tCount += tCounts[bp];
97 cCount += cCounts[bp];
98 gCount += gCounts[bp];
99 }
100
101 gPercent[i] = (gCount/(double)total)*100;
102 aPercent[i] = (aCount/(double)total)*100;
103 tPercent[i] = (tCount/(double)total)*100;
104 cPercent[i] = (cCount/(double)total)*100;
105
106 }
107
108 percentages = new double [][] {tPercent,cPercent,aPercent,gPercent};
109
110 calculated = true;
111 }
112
113 public void processSequence(Sequence sequence) {
114 calculated = false;
115 char [] seq = sequence.getSequence().toCharArray();
116 if (gCounts.length < seq.length) {
117
118 long [] gCountsNew = new long [seq.length];
119 long [] aCountsNew = new long [seq.length];
120 long [] cCountsNew = new long [seq.length];
121 long [] tCountsNew = new long [seq.length];
122
123 for (int i=0;i<gCounts.length;i++) {
124 gCountsNew[i] = gCounts[i];
125 aCountsNew[i] = aCounts[i];
126 tCountsNew[i] = tCounts[i];
127 cCountsNew[i] = cCounts[i];
128 }
129
130 gCounts = gCountsNew;
131 aCounts = aCountsNew;
132 tCounts = tCountsNew;
133 cCounts = cCountsNew;
134 }
135
136 for (int i=0;i<seq.length;i++) {
137 if (seq[i] == 'G') {
138 ++gCounts[i];
139 }
140 else if (seq[i] == 'A') {
141 ++aCounts[i];
142 }
143 else if (seq[i] == 'T') {
144 ++tCounts[i];
145 }
146 else if (seq[i] == 'C') {
147 ++cCounts[i];
148 }
149 }
150
151 }
152
153 public void reset () {
154 gCounts = new long[0];
155 aCounts = new long[0];
156 tCounts = new long[0];
157 cCounts = new long[0];
158 }
159
160 public String description() {
161 return "Shows the relative amounts of each base at each position in a sequencing run";
162 }
163
164 public String name() {
165 return "Per base sequence content";
166 }
167
168 public boolean raisesError() {
169 if (!calculated) getPercentages();
170
171 // Percentages come in the order TCAG
172 for (int i=0;i<percentages[0].length;i++) {
173
174 double gcDiff = Math.abs(percentages[1][i]-percentages[3][i]);
175 double atDiff = Math.abs(percentages[0][i]-percentages[2][i]);
176
177 if (gcDiff > ModuleConfig.getParam("sequence", "error") || atDiff > ModuleConfig.getParam("sequence", "error")) return true;
178
179 }
180 return false;
181 }
182
183 public boolean raisesWarning() {
184
185 if (!calculated) getPercentages();
186
187 // Percentages come in the order TCAG
188 for (int i=0;i<percentages[0].length;i++) {
189
190 double gcDiff = Math.abs(percentages[1][i]-percentages[3][i]);
191 double atDiff = Math.abs(percentages[0][i]-percentages[2][i]);
192
193 if (gcDiff > ModuleConfig.getParam("sequence", "warn") || atDiff > ModuleConfig.getParam("sequence", "warn")) return true;
194
195 }
196 return false;
197 }
198
199 public void makeReport(HTMLReportArchive report) throws IOException,XMLStreamException {
200
201 if (!calculated) getPercentages();
202
203 writeDefaultImage(report, "per_base_sequence_content.png", "Per base sequence content", Math.max(800, xCategories.length*15), 600);
204
205 StringBuffer sb = report.dataDocument();
206 sb.append("#Base\tG\tA\tT\tC\n");
207 for (int i=0;i<xCategories.length;i++) {
208 sb.append(xCategories[i]);
209 sb.append("\t");
210 sb.append(percentages[3][i]);
211 sb.append("\t");
212 sb.append(percentages[2][i]);
213 sb.append("\t");
214 sb.append(percentages[0][i]);
215 sb.append("\t");
216 sb.append(percentages[1][i]);
217 sb.append("\n");
218 }
219
220 }
221
222 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules;
20
21 import java.io.IOException;
22
23 import javax.swing.JPanel;
24 import javax.xml.stream.XMLStreamException;
25
26 import uk.ac.babraham.FastQC.Graphs.BaseGroup;
27 import uk.ac.babraham.FastQC.Graphs.LineGraph;
28 import uk.ac.babraham.FastQC.Report.HTMLReportArchive;
29 import uk.ac.babraham.FastQC.Sequence.Sequence;
30
31 public class PerBaseSequenceContent extends AbstractQCModule {
32
33 public long [] gCounts = new long [0];
34 public long [] aCounts = new long [0];
35 public long [] cCounts = new long [0];
36 public long [] tCounts = new long [0];
37 private double [][] percentages = null;
38 private String [] xCategories = new String[0];
39 private boolean calculated = false;
40
41
42 public JPanel getResultsPanel() {
43
44 if (!calculated) getPercentages();
45
46 return new LineGraph(percentages, 0d, 100d, "Position in read (bp)", new String [] {"%T","%C","%A","%G"}, xCategories, "Sequence content across all bases");
47 }
48
49 public boolean ignoreFilteredSequences() {
50 return true;
51 }
52
53 public boolean ignoreInReport () {
54 if (ModuleConfig.getParam("sequence", "ignore") > 0) {
55 return true;
56 }
57 return false;
58 }
59
60 private synchronized void getPercentages () {
61
62 BaseGroup [] groups = BaseGroup.makeBaseGroups(gCounts.length);
63
64 xCategories = new String[groups.length];
65
66
67 double [] gPercent = new double[groups.length];
68 double [] aPercent = new double[groups.length];
69 double [] tPercent = new double[groups.length];
70 double [] cPercent = new double[groups.length];
71
72 long total;
73 long gCount;
74 long aCount;
75 long tCount;
76 long cCount;
77
78 for (int i=0;i<groups.length;i++) {
79
80 xCategories[i] = groups[i].toString();
81
82 gCount = 0;
83 aCount = 0;
84 tCount = 0;
85 cCount = 0;
86 total = 0;
87
88 for (int bp=groups[i].lowerCount()-1;bp<groups[i].upperCount();bp++) {
89
90 total += gCounts[bp];
91 total += cCounts[bp];
92 total += aCounts[bp];
93 total += tCounts[bp];
94
95 aCount += aCounts[bp];
96 tCount += tCounts[bp];
97 cCount += cCounts[bp];
98 gCount += gCounts[bp];
99 }
100
101 gPercent[i] = (gCount/(double)total)*100;
102 aPercent[i] = (aCount/(double)total)*100;
103 tPercent[i] = (tCount/(double)total)*100;
104 cPercent[i] = (cCount/(double)total)*100;
105
106 }
107
108 percentages = new double [][] {tPercent,cPercent,aPercent,gPercent};
109
110 calculated = true;
111 }
112
113 public void processSequence(Sequence sequence) {
114 calculated = false;
115 char [] seq = sequence.getSequence().toCharArray();
116 if (gCounts.length < seq.length) {
117
118 long [] gCountsNew = new long [seq.length];
119 long [] aCountsNew = new long [seq.length];
120 long [] cCountsNew = new long [seq.length];
121 long [] tCountsNew = new long [seq.length];
122
123 for (int i=0;i<gCounts.length;i++) {
124 gCountsNew[i] = gCounts[i];
125 aCountsNew[i] = aCounts[i];
126 tCountsNew[i] = tCounts[i];
127 cCountsNew[i] = cCounts[i];
128 }
129
130 gCounts = gCountsNew;
131 aCounts = aCountsNew;
132 tCounts = tCountsNew;
133 cCounts = cCountsNew;
134 }
135
136 for (int i=0;i<seq.length;i++) {
137 if (seq[i] == 'G') {
138 ++gCounts[i];
139 }
140 else if (seq[i] == 'A') {
141 ++aCounts[i];
142 }
143 else if (seq[i] == 'T') {
144 ++tCounts[i];
145 }
146 else if (seq[i] == 'C') {
147 ++cCounts[i];
148 }
149 }
150
151 }
152
153 public void reset () {
154 gCounts = new long[0];
155 aCounts = new long[0];
156 tCounts = new long[0];
157 cCounts = new long[0];
158 }
159
160 public String description() {
161 return "Shows the relative amounts of each base at each position in a sequencing run";
162 }
163
164 public String name() {
165 return "Per base sequence content";
166 }
167
168 public boolean raisesError() {
169 if (!calculated) getPercentages();
170
171 // Percentages come in the order TCAG
172 for (int i=0;i<percentages[0].length;i++) {
173
174 double gcDiff = Math.abs(percentages[1][i]-percentages[3][i]);
175 double atDiff = Math.abs(percentages[0][i]-percentages[2][i]);
176
177 if (gcDiff > ModuleConfig.getParam("sequence", "error") || atDiff > ModuleConfig.getParam("sequence", "error")) return true;
178
179 }
180 return false;
181 }
182
183 public boolean raisesWarning() {
184
185 if (!calculated) getPercentages();
186
187 // Percentages come in the order TCAG
188 for (int i=0;i<percentages[0].length;i++) {
189
190 double gcDiff = Math.abs(percentages[1][i]-percentages[3][i]);
191 double atDiff = Math.abs(percentages[0][i]-percentages[2][i]);
192
193 if (gcDiff > ModuleConfig.getParam("sequence", "warn") || atDiff > ModuleConfig.getParam("sequence", "warn")) return true;
194
195 }
196 return false;
197 }
198
199 public void makeReport(HTMLReportArchive report) throws IOException,XMLStreamException {
200
201 if (!calculated) getPercentages();
202
203 writeDefaultImage(report, "per_base_sequence_content.png", "Per base sequence content", Math.max(800, xCategories.length*15), 600);
204
205 StringBuffer sb = report.dataDocument();
206 sb.append("#Base\tG\tA\tT\tC\n");
207 for (int i=0;i<xCategories.length;i++) {
208 sb.append(xCategories[i]);
209 sb.append("\t");
210 sb.append(percentages[3][i]);
211 sb.append("\t");
212 sb.append(percentages[2][i]);
213 sb.append("\t");
214 sb.append(percentages[0][i]);
215 sb.append("\t");
216 sb.append(percentages[1][i]);
217 sb.append("\n");
218 }
219
220 }
221
222 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules;
20
21 import java.io.IOException;
22
23 import javax.swing.JPanel;
24 import javax.xml.stream.XMLStreamException;
25
26 import uk.ac.babraham.FastQC.Graphs.LineGraph;
27 import uk.ac.babraham.FastQC.Modules.GCModel.GCModel;
28 import uk.ac.babraham.FastQC.Modules.GCModel.GCModelValue;
29 import uk.ac.babraham.FastQC.Report.HTMLReportArchive;
30 import uk.ac.babraham.FastQC.Sequence.Sequence;
31 import uk.ac.babraham.FastQC.Statistics.NormalDistribution;
32
33 public class PerSequenceGCContent extends AbstractQCModule {
34
35 private double [] gcDistribution = new double[101];
36 private double [] theoreticalDistribution = new double[101];
37 private int [] xCategories = new int[0];
38 private double max = 0;
39 private double deviationPercent;
40 private boolean calculated = false;
41
42 private GCModel [] cachedModels = new GCModel [200];
43
44 public JPanel getResultsPanel() {
45
46 if (!calculated) calculateDistribution();
47
48 return new LineGraph(new double [][] {gcDistribution,theoreticalDistribution}, 0d, max, "Mean GC content (%)", new String [] {"GC count per read","Theoretical Distribution"}, xCategories, "GC distribution over all sequences");
49 }
50
51 public boolean ignoreFilteredSequences() {
52 return true;
53 }
54
55 public boolean ignoreInReport () {
56 if (ModuleConfig.getParam("gc_sequence", "ignore") > 0) {
57 return true;
58 }
59 return false;
60 }
61
62
63 private synchronized void calculateDistribution () {
64 max = 0;
65 xCategories = new int[gcDistribution.length];
66 double totalCount = 0;
67
68
69 // We use the mode to calculate the theoretical distribution
70 // so that we cope better with skewed distributions.
71 int firstMode = 0;
72 double modeCount = 0;
73
74 for (int i=0;i<gcDistribution.length;i++) {
75 xCategories[i] = i;
76 totalCount += gcDistribution[i];
77
78 if (gcDistribution[i] > modeCount) {
79 modeCount = gcDistribution[i];
80 firstMode = i;
81 }
82 if (gcDistribution[i] > max) max = gcDistribution[i];
83 }
84
85 // The mode might not be a very good measure of the centre
86 // of the distribution either due to duplicated vales or
87 // several very similar values next to each other. We therefore
88 // average over adjacent points which stay above 95% of the modal
89 // value
90
91 double mode = 0;
92 int modeDuplicates = 0;
93
94 boolean fellOffTop = true;
95
96 for (int i=firstMode;i<gcDistribution.length;i++) {
97 if (gcDistribution[i] > gcDistribution[firstMode] - (gcDistribution[firstMode]/10)) {
98 mode += i;
99 modeDuplicates++;
100 }
101 else {
102 fellOffTop = false;
103 break;
104 }
105 }
106
107 boolean fellOffBottom = true;
108
109 for (int i=firstMode-1;i>=0;i--) {
110 if (gcDistribution[i] > gcDistribution[firstMode] - (gcDistribution[firstMode]/10)) {
111 mode += i;
112 modeDuplicates++;
113 }
114 else {
115 fellOffBottom = false;
116 break;
117 }
118 }
119
120 if (fellOffBottom || fellOffTop) {
121 // If the distribution is so skewed that 95% of the mode
122 // is off the 0-100% scale then we keep the mode as the
123 // centre of the model
124 mode = firstMode;
125 }
126 else {
127 mode /= modeDuplicates;
128 }
129
130
131
132 // We can now work out a theoretical distribution
133 double stdev = 0;
134
135 for (int i=0;i<gcDistribution.length;i++) {
136 stdev += Math.pow((i-mode),2) * gcDistribution[i];
137 }
138
139 stdev /= totalCount-1;
140
141 stdev = Math.sqrt(stdev);
142
143 NormalDistribution nd = new NormalDistribution(mode, stdev);
144
145 deviationPercent = 0;
146
147 for (int i=0;i<theoreticalDistribution.length;i++) {
148 double probability = nd.getZScoreForValue(i);
149 theoreticalDistribution[i] = probability*totalCount;
150
151 if (theoreticalDistribution[i] > max) {
152 max = theoreticalDistribution[i];
153 }
154
155 deviationPercent += Math.abs(theoreticalDistribution[i]-gcDistribution[i]);
156 }
157
158 deviationPercent /= totalCount;
159 deviationPercent *= 100;
160
161 // System.out.println("Percentage deviation from normality is "+deviationPercent);
162
163
164 calculated = true;
165 }
166
167 public void processSequence(Sequence sequence) {
168
169 // Because we keep a model around for every possible sequence length we
170 // encounter we need to reduce the number of models. We can do this by
171 // rounding off the sequence once we get above a certain size
172
173 char [] seq = truncateSequence(sequence);
174
175 if (seq.length == 0) return; // Ignore empty sequences
176
177
178 int thisSeqGCCount = 0;
179 for (int i=0;i<seq.length;i++) {
180 if (seq[i] == 'G' || seq[i] == 'C') {
181 ++thisSeqGCCount;
182 }
183 }
184
185 if (seq.length >= cachedModels.length) {
186 // We need to extend the length of cached models
187
188 GCModel [] longerModels = new GCModel[seq.length+1];
189 for (int i=0;i<cachedModels.length;i++) {
190 longerModels[i] = cachedModels[i];
191 }
192
193 cachedModels = longerModels;
194 }
195
196 if (cachedModels[seq.length] == null) {
197 cachedModels[seq.length] = new GCModel(seq.length);
198 }
199
200 GCModelValue [] values = cachedModels[seq.length].getModelValues(thisSeqGCCount);
201
202 for (int i=0;i<values.length;i++) {
203 gcDistribution[values[i].percentage()] += values[i].increment();
204 }
205
206 }
207
208 private char [] truncateSequence (Sequence sequence) {
209
210 String seq = sequence.getSequence();
211
212 // TODO: We should return a random chunk of sequence, rather
213 // than the start.
214
215 if (seq.length() > 1000) {
216 int length = (seq.length()/1000)*1000;
217 return seq.substring(0, length).toCharArray();
218 }
219 if (seq.length() > 100) {
220 int length = (seq.length()/100)*100;
221 return seq.substring(0, length).toCharArray();
222 }
223
224 return seq.toCharArray();
225
226 }
227
228 public void reset () {
229 gcDistribution = new double[101];
230 }
231
232 public String description() {
233 return "Shows the distribution of GC contents for whole sequences";
234 }
235
236 public String name() {
237 return "Per sequence GC content";
238 }
239
240 public boolean raisesError() {
241 if (!calculated) calculateDistribution();
242
243 return deviationPercent > ModuleConfig.getParam("gc_sequence", "error");
244 }
245
246 public boolean raisesWarning() {
247 if (!calculated) calculateDistribution();
248
249 return deviationPercent > ModuleConfig.getParam("gc_sequence", "warn");
250 }
251
252 public void makeReport(HTMLReportArchive report) throws IOException,XMLStreamException {
253
254 writeDefaultImage(report, "per_sequence_gc_content.png", "Per sequence GC content graph", 800, 600);
255
256 StringBuffer sb = report.dataDocument();
257 sb.append("#GC Content\tCount\n");
258 for (int i=0;i<xCategories.length;i++) {
259 sb.append(xCategories[i]);
260 sb.append("\t");
261 sb.append(gcDistribution[i]);
262 sb.append("\n");
263 }
264 }
265 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules;
20
21 import java.io.IOException;
22
23 import javax.swing.JPanel;
24 import javax.xml.stream.XMLStreamException;
25
26 import uk.ac.babraham.FastQC.Graphs.LineGraph;
27 import uk.ac.babraham.FastQC.Modules.GCModel.GCModel;
28 import uk.ac.babraham.FastQC.Modules.GCModel.GCModelValue;
29 import uk.ac.babraham.FastQC.Report.HTMLReportArchive;
30 import uk.ac.babraham.FastQC.Sequence.Sequence;
31 import uk.ac.babraham.FastQC.Statistics.NormalDistribution;
32
33 public class PerSequenceGCContent extends AbstractQCModule {
34
35 private double [] gcDistribution = new double[101];
36 private double [] theoreticalDistribution = new double[101];
37 private int [] xCategories = new int[0];
38 private double max = 0;
39 private double deviationPercent;
40 private boolean calculated = false;
41
42 private GCModel [] cachedModels = new GCModel [200];
43
44 public JPanel getResultsPanel() {
45
46 if (!calculated) calculateDistribution();
47
48 return new LineGraph(new double [][] {gcDistribution,theoreticalDistribution}, 0d, max, "Mean GC content (%)", new String [] {"GC count per read","Theoretical Distribution"}, xCategories, "GC distribution over all sequences");
49 }
50
51 public boolean ignoreFilteredSequences() {
52 return true;
53 }
54
55 public boolean ignoreInReport () {
56 if (ModuleConfig.getParam("gc_sequence", "ignore") > 0) {
57 return true;
58 }
59 return false;
60 }
61
62
63 private synchronized void calculateDistribution () {
64 max = 0;
65 xCategories = new int[gcDistribution.length];
66 double totalCount = 0;
67
68
69 // We use the mode to calculate the theoretical distribution
70 // so that we cope better with skewed distributions.
71 int firstMode = 0;
72 double modeCount = 0;
73
74 for (int i=0;i<gcDistribution.length;i++) {
75 xCategories[i] = i;
76 totalCount += gcDistribution[i];
77
78 if (gcDistribution[i] > modeCount) {
79 modeCount = gcDistribution[i];
80 firstMode = i;
81 }
82 if (gcDistribution[i] > max) max = gcDistribution[i];
83 }
84
85 // The mode might not be a very good measure of the centre
86 // of the distribution either due to duplicated vales or
87 // several very similar values next to each other. We therefore
88 // average over adjacent points which stay above 95% of the modal
89 // value
90
91 double mode = 0;
92 int modeDuplicates = 0;
93
94 boolean fellOffTop = true;
95
96 for (int i=firstMode;i<gcDistribution.length;i++) {
97 if (gcDistribution[i] > gcDistribution[firstMode] - (gcDistribution[firstMode]/10)) {
98 mode += i;
99 modeDuplicates++;
100 }
101 else {
102 fellOffTop = false;
103 break;
104 }
105 }
106
107 boolean fellOffBottom = true;
108
109 for (int i=firstMode-1;i>=0;i--) {
110 if (gcDistribution[i] > gcDistribution[firstMode] - (gcDistribution[firstMode]/10)) {
111 mode += i;
112 modeDuplicates++;
113 }
114 else {
115 fellOffBottom = false;
116 break;
117 }
118 }
119
120 if (fellOffBottom || fellOffTop) {
121 // If the distribution is so skewed that 95% of the mode
122 // is off the 0-100% scale then we keep the mode as the
123 // centre of the model
124 mode = firstMode;
125 }
126 else {
127 mode /= modeDuplicates;
128 }
129
130
131
132 // We can now work out a theoretical distribution
133 double stdev = 0;
134
135 for (int i=0;i<gcDistribution.length;i++) {
136 stdev += Math.pow((i-mode),2) * gcDistribution[i];
137 }
138
139 stdev /= totalCount-1;
140
141 stdev = Math.sqrt(stdev);
142
143 NormalDistribution nd = new NormalDistribution(mode, stdev);
144
145 deviationPercent = 0;
146
147 for (int i=0;i<theoreticalDistribution.length;i++) {
148 double probability = nd.getZScoreForValue(i);
149 theoreticalDistribution[i] = probability*totalCount;
150
151 if (theoreticalDistribution[i] > max) {
152 max = theoreticalDistribution[i];
153 }
154
155 deviationPercent += Math.abs(theoreticalDistribution[i]-gcDistribution[i]);
156 }
157
158 deviationPercent /= totalCount;
159 deviationPercent *= 100;
160
161 // System.out.println("Percentage deviation from normality is "+deviationPercent);
162
163
164 calculated = true;
165 }
166
167 public void processSequence(Sequence sequence) {
168
169 // Because we keep a model around for every possible sequence length we
170 // encounter we need to reduce the number of models. We can do this by
171 // rounding off the sequence once we get above a certain size
172
173 char [] seq = truncateSequence(sequence);
174
175 if (seq.length == 0) return; // Ignore empty sequences
176
177
178 int thisSeqGCCount = 0;
179 for (int i=0;i<seq.length;i++) {
180 if (seq[i] == 'G' || seq[i] == 'C') {
181 ++thisSeqGCCount;
182 }
183 }
184
185 if (seq.length >= cachedModels.length) {
186 // We need to extend the length of cached models
187
188 GCModel [] longerModels = new GCModel[seq.length+1];
189 for (int i=0;i<cachedModels.length;i++) {
190 longerModels[i] = cachedModels[i];
191 }
192
193 cachedModels = longerModels;
194 }
195
196 if (cachedModels[seq.length] == null) {
197 cachedModels[seq.length] = new GCModel(seq.length);
198 }
199
200 GCModelValue [] values = cachedModels[seq.length].getModelValues(thisSeqGCCount);
201
202 for (int i=0;i<values.length;i++) {
203 gcDistribution[values[i].percentage()] += values[i].increment();
204 }
205
206 }
207
208 private char [] truncateSequence (Sequence sequence) {
209
210 String seq = sequence.getSequence();
211
212 // TODO: We should return a random chunk of sequence, rather
213 // than the start.
214
215 if (seq.length() > 1000) {
216 int length = (seq.length()/1000)*1000;
217 return seq.substring(0, length).toCharArray();
218 }
219 if (seq.length() > 100) {
220 int length = (seq.length()/100)*100;
221 return seq.substring(0, length).toCharArray();
222 }
223
224 return seq.toCharArray();
225
226 }
227
228 public void reset () {
229 gcDistribution = new double[101];
230 }
231
232 public String description() {
233 return "Shows the distribution of GC contents for whole sequences";
234 }
235
236 public String name() {
237 return "Per sequence GC content";
238 }
239
240 public boolean raisesError() {
241 if (!calculated) calculateDistribution();
242
243 return deviationPercent > ModuleConfig.getParam("gc_sequence", "error");
244 }
245
246 public boolean raisesWarning() {
247 if (!calculated) calculateDistribution();
248
249 return deviationPercent > ModuleConfig.getParam("gc_sequence", "warn");
250 }
251
252 public void makeReport(HTMLReportArchive report) throws IOException,XMLStreamException {
253
254 writeDefaultImage(report, "per_sequence_gc_content.png", "Per sequence GC content graph", 800, 600);
255
256 StringBuffer sb = report.dataDocument();
257 sb.append("#GC Content\tCount\n");
258 for (int i=0;i<xCategories.length;i++) {
259 sb.append(xCategories[i]);
260 sb.append("\t");
261 sb.append(gcDistribution[i]);
262 sb.append("\n");
263 }
264 }
265 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules;
20
21 import java.io.IOException;
22 import java.util.Arrays;
23 import java.util.HashMap;
24
25 import javax.swing.JPanel;
26 import javax.xml.stream.XMLStreamException;
27
28 import uk.ac.babraham.FastQC.Graphs.LineGraph;
29 import uk.ac.babraham.FastQC.Report.HTMLReportArchive;
30 import uk.ac.babraham.FastQC.Sequence.Sequence;
31 import uk.ac.babraham.FastQC.Sequence.QualityEncoding.PhredEncoding;
32
33 public class PerSequenceQualityScores extends AbstractQCModule {
34
35 private HashMap<Integer, Long> averageScoreCounts = new HashMap<Integer, Long>();
36 private double [] qualityDistribution = null;
37 private int [] xCategories = new int[0];
38 private char lowestChar = 126;
39 private int maxCount = 0;
40 private int mostFrequentScore;
41 private boolean calculated = false;
42
43 public JPanel getResultsPanel() {
44
45 if (!calculated) calculateDistribution();
46
47 return new LineGraph(new double [][] {qualityDistribution}, 0d, maxCount, "Mean Sequence Quality (Phred Score)",new String [] {"Average Quality per read"}, xCategories, "Quality score distribution over all sequences");
48 }
49
50 public boolean ignoreInReport () {
51 // We don't show this if they didn't have any quality data.
52 if (ModuleConfig.getParam("quality_sequence", "ignore") > 0 || averageScoreCounts.size() == 0) {
53 return true;
54 }
55 return false;
56 }
57
58 private synchronized void calculateDistribution () {
59
60 PhredEncoding encoding = PhredEncoding.getFastQEncodingOffset(lowestChar);
61
62 Integer [] rawScores = averageScoreCounts.keySet().toArray(new Integer [0]);
63 Arrays.sort(rawScores);
64
65 // We'll run from the lowest to the highest
66 qualityDistribution = new double [1+(rawScores[rawScores.length-1]-rawScores[0])] ;
67
68 xCategories = new int[qualityDistribution.length];
69
70 for (int i=0;i<qualityDistribution.length;i++) {
71 xCategories[i] = (rawScores[0]+i)-encoding.offset();
72 if (averageScoreCounts.containsKey(rawScores[0]+i)) {
73 qualityDistribution[i] = averageScoreCounts.get(rawScores[0]+i);
74 }
75 }
76
77
78 for (int i=0;i<qualityDistribution.length;i++) {
79 if (qualityDistribution[i]>maxCount) {
80 maxCount = (int)qualityDistribution[i];
81 mostFrequentScore = xCategories[i];
82 }
83 }
84
85 calculated = true;
86 }
87
88 public void processSequence(Sequence sequence) {
89
90 char [] seq = sequence.getQualityString().toCharArray();
91 int averageQuality = 0;
92
93 for (int i=0;i<seq.length;i++) {
94 if (seq[i] < lowestChar) {
95 lowestChar = seq[i];
96 }
97 averageQuality += seq[i];
98 }
99
100 if (seq.length > 0) {
101 averageQuality /= seq.length;
102
103 if (averageScoreCounts.containsKey(averageQuality)) {
104 long currentCount = averageScoreCounts.get(averageQuality);
105 currentCount++;
106 averageScoreCounts.put(averageQuality, currentCount);
107 }
108 else {
109 averageScoreCounts.put(averageQuality, 1L);
110 }
111 }
112 }
113
114 public void reset () {
115 averageScoreCounts.clear();
116 lowestChar = 126;
117 maxCount = 0;
118 calculated = false;
119 }
120
121 public String description() {
122 return "Shows the distribution of average quality scores for whole sequences";
123 }
124
125 public String name() {
126 return "Per sequence quality scores";
127 }
128
129 public boolean raisesError() {
130 if (!calculated) calculateDistribution();
131
132 if (mostFrequentScore <= ModuleConfig.getParam("quality_sequence", "error")) return true;
133
134 return false;
135 }
136
137 public boolean raisesWarning() {
138 if (!calculated) calculateDistribution();
139
140 if (mostFrequentScore <= ModuleConfig.getParam("quality_sequence", "warn")) return true;
141
142 return false;
143 }
144
145 public void makeReport(HTMLReportArchive report) throws IOException,XMLStreamException {
146 if (!calculated) calculateDistribution();
147
148 writeDefaultImage(report, "per_sequence_quality.png", "Per Sequence quality graph", 800, 600);
149
150 StringBuffer sb = report.dataDocument();
151 sb.append("#Quality\tCount\n");
152 for (int i=0;i<xCategories.length;i++) {
153 sb.append(xCategories[i]);
154 sb.append("\t");
155 sb.append(qualityDistribution[i]);
156 sb.append("\n");
157 }
158 }
159
160
161 public boolean ignoreFilteredSequences() {
162 return true;
163 }
164
165 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules;
20
21 import java.io.IOException;
22 import java.util.Arrays;
23 import java.util.HashMap;
24
25 import javax.swing.JPanel;
26 import javax.xml.stream.XMLStreamException;
27
28 import uk.ac.babraham.FastQC.Graphs.LineGraph;
29 import uk.ac.babraham.FastQC.Report.HTMLReportArchive;
30 import uk.ac.babraham.FastQC.Sequence.Sequence;
31 import uk.ac.babraham.FastQC.Sequence.QualityEncoding.PhredEncoding;
32
33 public class PerSequenceQualityScores extends AbstractQCModule {
34
35 private HashMap<Integer, Long> averageScoreCounts = new HashMap<Integer, Long>();
36 private double [] qualityDistribution = null;
37 private int [] xCategories = new int[0];
38 private char lowestChar = 126;
39 private int maxCount = 0;
40 private int mostFrequentScore;
41 private boolean calculated = false;
42
43 public JPanel getResultsPanel() {
44
45 if (!calculated) calculateDistribution();
46
47 return new LineGraph(new double [][] {qualityDistribution}, 0d, maxCount, "Mean Sequence Quality (Phred Score)",new String [] {"Average Quality per read"}, xCategories, "Quality score distribution over all sequences");
48 }
49
50 public boolean ignoreInReport () {
51 // We don't show this if they didn't have any quality data.
52 if (ModuleConfig.getParam("quality_sequence", "ignore") > 0 || averageScoreCounts.size() == 0) {
53 return true;
54 }
55 return false;
56 }
57
58 private synchronized void calculateDistribution () {
59
60 PhredEncoding encoding = PhredEncoding.getFastQEncodingOffset(lowestChar);
61
62 Integer [] rawScores = averageScoreCounts.keySet().toArray(new Integer [0]);
63 Arrays.sort(rawScores);
64
65 // We'll run from the lowest to the highest
66 qualityDistribution = new double [1+(rawScores[rawScores.length-1]-rawScores[0])] ;
67
68 xCategories = new int[qualityDistribution.length];
69
70 for (int i=0;i<qualityDistribution.length;i++) {
71 xCategories[i] = (rawScores[0]+i)-encoding.offset();
72 if (averageScoreCounts.containsKey(rawScores[0]+i)) {
73 qualityDistribution[i] = averageScoreCounts.get(rawScores[0]+i);
74 }
75 }
76
77
78 for (int i=0;i<qualityDistribution.length;i++) {
79 if (qualityDistribution[i]>maxCount) {
80 maxCount = (int)qualityDistribution[i];
81 mostFrequentScore = xCategories[i];
82 }
83 }
84
85 calculated = true;
86 }
87
88 public void processSequence(Sequence sequence) {
89
90 char [] seq = sequence.getQualityString().toCharArray();
91 int averageQuality = 0;
92
93 for (int i=0;i<seq.length;i++) {
94 if (seq[i] < lowestChar) {
95 lowestChar = seq[i];
96 }
97 averageQuality += seq[i];
98 }
99
100 if (seq.length > 0) {
101 averageQuality /= seq.length;
102
103 if (averageScoreCounts.containsKey(averageQuality)) {
104 long currentCount = averageScoreCounts.get(averageQuality);
105 currentCount++;
106 averageScoreCounts.put(averageQuality, currentCount);
107 }
108 else {
109 averageScoreCounts.put(averageQuality, 1L);
110 }
111 }
112 }
113
114 public void reset () {
115 averageScoreCounts.clear();
116 lowestChar = 126;
117 maxCount = 0;
118 calculated = false;
119 }
120
121 public String description() {
122 return "Shows the distribution of average quality scores for whole sequences";
123 }
124
125 public String name() {
126 return "Per sequence quality scores";
127 }
128
129 public boolean raisesError() {
130 if (!calculated) calculateDistribution();
131
132 if (mostFrequentScore <= ModuleConfig.getParam("quality_sequence", "error")) return true;
133
134 return false;
135 }
136
137 public boolean raisesWarning() {
138 if (!calculated) calculateDistribution();
139
140 if (mostFrequentScore <= ModuleConfig.getParam("quality_sequence", "warn")) return true;
141
142 return false;
143 }
144
145 public void makeReport(HTMLReportArchive report) throws IOException,XMLStreamException {
146 if (!calculated) calculateDistribution();
147
148 writeDefaultImage(report, "per_sequence_quality.png", "Per Sequence quality graph", 800, 600);
149
150 StringBuffer sb = report.dataDocument();
151 sb.append("#Quality\tCount\n");
152 for (int i=0;i<xCategories.length;i++) {
153 sb.append(xCategories[i]);
154 sb.append("\t");
155 sb.append(qualityDistribution[i]);
156 sb.append("\n");
157 }
158 }
159
160
161 public boolean ignoreFilteredSequences() {
162 return true;
163 }
164
165 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules;
20
21 import java.io.IOException;
22 import java.util.Arrays;
23 import java.util.HashMap;
24 import java.util.Iterator;
25
26 import javax.swing.JPanel;
27 import javax.xml.stream.XMLStreamException;
28
29 import uk.ac.babraham.FastQC.Graphs.BaseGroup;
30 import uk.ac.babraham.FastQC.Graphs.TileGraph;
31 import uk.ac.babraham.FastQC.Report.HTMLReportArchive;
32 import uk.ac.babraham.FastQC.Sequence.Sequence;
33 import uk.ac.babraham.FastQC.Sequence.QualityEncoding.PhredEncoding;
34 import uk.ac.babraham.FastQC.Utilities.QualityCount;
35
36 public class PerTileQualityScores extends AbstractQCModule {
37
38
39 public HashMap<Integer, QualityCount []> perTileQualityCounts = new HashMap<Integer, QualityCount[]>();
40 private int currentLength = 0;
41 private double [][] means = null;
42 private String [] xLabels;
43 private int [] tiles;
44 private int high = 0;
45 PhredEncoding encodingScheme;
46 private boolean calculated = false;
47
48 private long totalCount = 0;
49
50 private int splitPosition = -1;
51
52 private double maxDeviation = 0;
53
54 private boolean ignoreInReport = false;
55
56 public JPanel getResultsPanel() {
57
58 if (!calculated) getPercentages();
59
60 return new TileGraph(xLabels, tiles, means);
61
62 }
63
64 public boolean ignoreFilteredSequences() {
65 return true;
66 }
67
68 public boolean ignoreInReport () {
69 if (ignoreInReport || ModuleConfig.getParam("tile", "ignore") > 0 || currentLength == 0) {
70 return true;
71 }
72 return false;
73 }
74
75 private synchronized void getPercentages () {
76
77 char [] range = calculateOffsets();
78 encodingScheme = PhredEncoding.getFastQEncodingOffset(range[0]);
79 high = range[1] - encodingScheme.offset();
80 if (high < 35) {
81 high = 35;
82 }
83
84 BaseGroup [] groups = BaseGroup.makeBaseGroups(currentLength);
85
86 Integer [] tileNumbers = perTileQualityCounts.keySet().toArray(new Integer[0]);
87
88 Arrays.sort(tileNumbers);
89
90 tiles = new int[tileNumbers.length];
91 for (int i=0;i<tiles.length;i++) {
92 tiles[i] = tileNumbers[i];
93 }
94
95 means = new double[tileNumbers.length][groups.length];
96 xLabels = new String[groups.length];
97
98 for (int t=0;t<tileNumbers.length;t++){
99 for (int i=0;i<groups.length;i++) {
100 if (t==0)
101 xLabels[i] = groups[i].toString();
102
103 int minBase = groups[i].lowerCount();
104 int maxBase = groups[i].upperCount();
105 means[t][i] = getMean(tileNumbers[t],minBase,maxBase,encodingScheme.offset());
106 }
107 }
108
109 // Now we normalise across each column to see if there are any tiles with unusually
110 // high or low quality.
111
112 double maxDeviation = 0;
113
114 double [] averageQualitiesPerGroup = new double[groups.length];
115
116 for (int t=0;t<tileNumbers.length;t++) {
117 for (int i=0;i<groups.length;i++) {
118 averageQualitiesPerGroup[i] += means[t][i];
119 }
120 }
121
122 for (int i=0;i<averageQualitiesPerGroup.length;i++) {
123 averageQualitiesPerGroup[i] /= tileNumbers.length;
124 }
125
126 for (int i=0;i<groups.length;i++) {
127 for (int t=0;t<tileNumbers.length;t++) {
128 means[t][i] -= averageQualitiesPerGroup[i];
129 if (Math.abs(means[t][i])> maxDeviation) {
130 maxDeviation = Math.abs(means[t][i]);
131 }
132 }
133 }
134
135 this.maxDeviation = maxDeviation;
136
137 calculated = true;
138
139 }
140
141 private char [] calculateOffsets () {
142 // Works out from the set of chars what is the most
143 // likely encoding scale for this file.
144
145 char minChar = 0;
146 char maxChar = 0;
147
148 // Use the data from the first tile
149 QualityCount [] qualityCounts = perTileQualityCounts.get(perTileQualityCounts.keySet().toArray()[0]);
150
151 for (int q=0;q<qualityCounts.length;q++) {
152 if (q == 0) {
153 minChar = qualityCounts[q].getMinChar();
154 maxChar = qualityCounts[q].getMaxChar();
155 }
156 else {
157 if (qualityCounts[q].getMinChar() < minChar) {
158 minChar = qualityCounts[q].getMinChar();
159 }
160 if (qualityCounts[q].getMaxChar() > maxChar) {
161 maxChar = qualityCounts[q].getMaxChar();
162 }
163 }
164 }
165
166 return new char[] {minChar,maxChar};
167 }
168
169 public void processSequence(Sequence sequence) {
170
171 // Check if we can skip counting because the module is being ignored anyway
172 if (totalCount == 0) {
173 if (ModuleConfig.getParam("tile", "ignore") > 0) {
174 ignoreInReport = true;
175 }
176 }
177
178
179 // Don't waste time calculating this if we're not going to use it anyway
180 if (ignoreInReport) return;
181
182 calculated = false;
183
184 // Try to find the tile id. This can come in one of two forms:
185 // @HWI-1KL136:211:D1LGAACXX:1:1101:18518:48851 3:N:0:ATGTCA
186 // ^
187 // @HWUSI-EAS493_0001:2:1:1000:16900#0/1
188 // ^
189
190 // These would appear at sections 2 or 3 of an array split on :
191
192 // This module does quite a lot of work and ends up being the limiting
193 // step when calculating. We'll therefore take only a sample of the
194 // sequences to try to get a representative selection.
195
196 ++totalCount;
197 if (totalCount % 10 != 0) return;
198
199 // First try to split the id by :
200 int tile = 0;
201
202 String [] splitID = sequence.getID().split(":");
203
204
205 // If there are 7 or more fields then it's a 1.8+ file
206 try {
207
208 if (splitPosition >=0) {
209 tile = Integer.parseInt(splitID[splitPosition]);
210 }
211
212 else if (splitID.length>=7) {
213 splitPosition = 4;
214 tile = Integer.parseInt(splitID[4]);
215 }
216 else if (splitID.length >=5) {
217 splitPosition = 2;
218 // We can try the older format
219 tile = Integer.parseInt(splitID[2]);
220 }
221 else {
222 // We're not going to get a tile out of this
223 ignoreInReport = true;
224 return;
225 }
226
227
228 }
229 catch (NumberFormatException nfe) {
230 // This doesn't conform
231 ignoreInReport = true;
232 return;
233 }
234
235
236 char [] qual = sequence.getQualityString().toCharArray();
237 if (currentLength < qual.length) {
238
239 Iterator<Integer> tiles = perTileQualityCounts.keySet().iterator();
240 while (tiles.hasNext()) {
241 int thisTile = tiles.next();
242
243 QualityCount [] qualityCounts = perTileQualityCounts.get(thisTile);
244 QualityCount [] qualityCountsNew = new QualityCount[qual.length];
245
246 for (int i=0;i<qualityCounts.length;i++) {
247 qualityCountsNew[i] = qualityCounts[i];
248 }
249 for (int i=qualityCounts.length;i<qualityCountsNew.length;i++) {
250 qualityCountsNew[i] = new QualityCount();
251 }
252 perTileQualityCounts.put(thisTile, qualityCountsNew);
253 }
254
255 currentLength = qual.length;
256
257 }
258
259 if (! perTileQualityCounts.containsKey(tile)) {
260
261 if (perTileQualityCounts.size() > 500) {
262 // There are too many tiles, so we're probably parsing this wrong.
263 // Let's give up
264 System.err.println("Too many tiles (>500) so giving up trying to do per-tile qualities since we're probably parsing the file wrongly");
265 ignoreInReport = true;
266 perTileQualityCounts.clear();
267 return;
268 }
269
270 QualityCount [] qualityCounts = new QualityCount[currentLength];
271 for (int i=0;i<currentLength;i++) {
272 qualityCounts[i] = new QualityCount();
273 }
274
275 perTileQualityCounts.put(tile, qualityCounts);
276 }
277
278 QualityCount [] qualityCounts = perTileQualityCounts.get(tile);
279
280 for (int i=0;i<qual.length;i++) {
281 qualityCounts[i].addValue(qual[i]);
282 }
283
284 }
285
286 public void reset () {
287 totalCount = 0;
288 perTileQualityCounts = new HashMap<Integer, QualityCount[]>();
289 }
290
291 public String description() {
292 return "Shows the perl tile Quality scores of all bases at a given position in a sequencing run";
293 }
294
295 public String name() {
296 return "Per tile sequence quality";
297 }
298
299 public boolean raisesError() {
300 if (!calculated) getPercentages();
301
302 if (maxDeviation > ModuleConfig.getParam("tile", "error")) return true;
303 return false;
304 }
305
306 public boolean raisesWarning() {
307 if (!calculated) getPercentages();
308
309 if (maxDeviation > ModuleConfig.getParam("tile", "warn")) return true;
310 return false;
311 }
312
313 public void makeReport(HTMLReportArchive report) throws IOException,XMLStreamException {
314 if (!calculated) getPercentages();
315
316 writeDefaultImage(report, "per_tile_quality.png", "Per base quality graph", Math.max(800, xLabels.length*15), 600);
317
318 StringBuffer sb = report.dataDocument();
319 sb.append("#Tile\tBase\tMean\n");
320
321 for (int t=0;t<tiles.length;t++) {
322 for (int i=0;i<means[t].length;i++) {
323
324 sb.append(tiles[t]);
325 sb.append("\t");
326
327 sb.append(xLabels[i]);
328 sb.append("\t");
329
330 sb.append(means[t][i]);
331
332 sb.append("\n");
333 }
334 }
335 }
336
337 private double getMean (int tile, int minbp, int maxbp, int offset) {
338 int count = 0;
339 double total = 0;
340
341 QualityCount [] qualityCounts = perTileQualityCounts.get(tile);
342
343 for (int i=minbp-1;i<maxbp;i++) {
344 if (qualityCounts[i].getTotalCount() > 0) {
345 count++;
346 total += qualityCounts[i].getMean(offset);
347 }
348 }
349
350 if (count > 0) {
351 return total/count;
352 }
353 return 0;
354
355 }
356
357
358 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules;
20
21 import java.io.IOException;
22 import java.util.Arrays;
23 import java.util.HashMap;
24 import java.util.Iterator;
25
26 import javax.swing.JPanel;
27 import javax.xml.stream.XMLStreamException;
28
29 import uk.ac.babraham.FastQC.Graphs.BaseGroup;
30 import uk.ac.babraham.FastQC.Graphs.TileGraph;
31 import uk.ac.babraham.FastQC.Report.HTMLReportArchive;
32 import uk.ac.babraham.FastQC.Sequence.Sequence;
33 import uk.ac.babraham.FastQC.Sequence.QualityEncoding.PhredEncoding;
34 import uk.ac.babraham.FastQC.Utilities.QualityCount;
35
36 public class PerTileQualityScores extends AbstractQCModule {
37
38
39 public HashMap<Integer, QualityCount []> perTileQualityCounts = new HashMap<Integer, QualityCount[]>();
40 private int currentLength = 0;
41 private double [][] means = null;
42 private String [] xLabels;
43 private int [] tiles;
44 private int high = 0;
45 PhredEncoding encodingScheme;
46 private boolean calculated = false;
47
48 private long totalCount = 0;
49
50 private int splitPosition = -1;
51
52 private double maxDeviation = 0;
53
54 private boolean ignoreInReport = false;
55
56 public JPanel getResultsPanel() {
57
58 if (!calculated) getPercentages();
59
60 return new TileGraph(xLabels, tiles, means);
61
62 }
63
64 public boolean ignoreFilteredSequences() {
65 return true;
66 }
67
68 public boolean ignoreInReport () {
69 if (ignoreInReport || ModuleConfig.getParam("tile", "ignore") > 0 || currentLength == 0) {
70 return true;
71 }
72 return false;
73 }
74
75 private synchronized void getPercentages () {
76
77 char [] range = calculateOffsets();
78 encodingScheme = PhredEncoding.getFastQEncodingOffset(range[0]);
79 high = range[1] - encodingScheme.offset();
80 if (high < 35) {
81 high = 35;
82 }
83
84 BaseGroup [] groups = BaseGroup.makeBaseGroups(currentLength);
85
86 Integer [] tileNumbers = perTileQualityCounts.keySet().toArray(new Integer[0]);
87
88 Arrays.sort(tileNumbers);
89
90 tiles = new int[tileNumbers.length];
91 for (int i=0;i<tiles.length;i++) {
92 tiles[i] = tileNumbers[i];
93 }
94
95 means = new double[tileNumbers.length][groups.length];
96 xLabels = new String[groups.length];
97
98 for (int t=0;t<tileNumbers.length;t++){
99 for (int i=0;i<groups.length;i++) {
100 if (t==0)
101 xLabels[i] = groups[i].toString();
102
103 int minBase = groups[i].lowerCount();
104 int maxBase = groups[i].upperCount();
105 means[t][i] = getMean(tileNumbers[t],minBase,maxBase,encodingScheme.offset());
106 }
107 }
108
109 // Now we normalise across each column to see if there are any tiles with unusually
110 // high or low quality.
111
112 double maxDeviation = 0;
113
114 double [] averageQualitiesPerGroup = new double[groups.length];
115
116 for (int t=0;t<tileNumbers.length;t++) {
117 for (int i=0;i<groups.length;i++) {
118 averageQualitiesPerGroup[i] += means[t][i];
119 }
120 }
121
122 for (int i=0;i<averageQualitiesPerGroup.length;i++) {
123 averageQualitiesPerGroup[i] /= tileNumbers.length;
124 }
125
126 for (int i=0;i<groups.length;i++) {
127 for (int t=0;t<tileNumbers.length;t++) {
128 means[t][i] -= averageQualitiesPerGroup[i];
129 if (Math.abs(means[t][i])> maxDeviation) {
130 maxDeviation = Math.abs(means[t][i]);
131 }
132 }
133 }
134
135 this.maxDeviation = maxDeviation;
136
137 calculated = true;
138
139 }
140
141 private char [] calculateOffsets () {
142 // Works out from the set of chars what is the most
143 // likely encoding scale for this file.
144
145 char minChar = 0;
146 char maxChar = 0;
147
148 // Use the data from the first tile
149 QualityCount [] qualityCounts = perTileQualityCounts.get(perTileQualityCounts.keySet().toArray()[0]);
150
151 for (int q=0;q<qualityCounts.length;q++) {
152 if (q == 0) {
153 minChar = qualityCounts[q].getMinChar();
154 maxChar = qualityCounts[q].getMaxChar();
155 }
156 else {
157 if (qualityCounts[q].getMinChar() < minChar) {
158 minChar = qualityCounts[q].getMinChar();
159 }
160 if (qualityCounts[q].getMaxChar() > maxChar) {
161 maxChar = qualityCounts[q].getMaxChar();
162 }
163 }
164 }
165
166 return new char[] {minChar,maxChar};
167 }
168
169 public void processSequence(Sequence sequence) {
170
171 // Check if we can skip counting because the module is being ignored anyway
172 if (totalCount == 0) {
173 if (ModuleConfig.getParam("tile", "ignore") > 0) {
174 ignoreInReport = true;
175 }
176 }
177
178
179 // Don't waste time calculating this if we're not going to use it anyway
180 if (ignoreInReport) return;
181
182 calculated = false;
183
184 // Try to find the tile id. This can come in one of two forms:
185 // @HWI-1KL136:211:D1LGAACXX:1:1101:18518:48851 3:N:0:ATGTCA
186 // ^
187 // @HWUSI-EAS493_0001:2:1:1000:16900#0/1
188 // ^
189
190 // These would appear at sections 2 or 3 of an array split on :
191
192 // This module does quite a lot of work and ends up being the limiting
193 // step when calculating. We'll therefore take only a sample of the
194 // sequences to try to get a representative selection.
195
196 ++totalCount;
197 if (totalCount % 10 != 0) return;
198
199 // First try to split the id by :
200 int tile = 0;
201
202 String [] splitID = sequence.getID().split(":");
203
204
205 // If there are 7 or more fields then it's a 1.8+ file
206 try {
207
208
209 if (splitPosition >=0) {
210 // We've found a split position before so let's try to use it again
211
212
213 if (splitID.length <= splitPosition) {
214 // There isn't enough data in this header to split the way we did before
215 throw new NumberFormatException("Can't extract a number - not enough data");
216 }
217
218 tile = Integer.parseInt(splitID[splitPosition]);
219 }
220
221 else if (splitID.length>=7) {
222 splitPosition = 4;
223 tile = Integer.parseInt(splitID[4]);
224 }
225 else if (splitID.length >=5) {
226 splitPosition = 2;
227 // We can try the older format
228 tile = Integer.parseInt(splitID[2]);
229 }
230 else {
231 // We're not going to get a tile out of this
232 ignoreInReport = true;
233 return;
234 }
235
236
237 }
238 catch (NumberFormatException nfe) {
239 // This doesn't conform
240 ignoreInReport = true;
241 return;
242 }
243
244 char [] qual = sequence.getQualityString().toCharArray();
245 if (currentLength < qual.length) {
246
247 Iterator<Integer> tiles = perTileQualityCounts.keySet().iterator();
248 while (tiles.hasNext()) {
249 int thisTile = tiles.next();
250
251 QualityCount [] qualityCounts = perTileQualityCounts.get(thisTile);
252 QualityCount [] qualityCountsNew = new QualityCount[qual.length];
253
254 for (int i=0;i<qualityCounts.length;i++) {
255 qualityCountsNew[i] = qualityCounts[i];
256 }
257 for (int i=qualityCounts.length;i<qualityCountsNew.length;i++) {
258 qualityCountsNew[i] = new QualityCount();
259 }
260 perTileQualityCounts.put(thisTile, qualityCountsNew);
261 }
262
263 currentLength = qual.length;
264
265 }
266
267 if (! perTileQualityCounts.containsKey(tile)) {
268
269 if (perTileQualityCounts.size() > 1000) {
270 // There are too many tiles, so we're probably parsing this wrong.
271 // Let's give up
272 System.err.println("Too many tiles (>1000) so giving up trying to do per-tile qualities since we're probably parsing the file wrongly");
273 ignoreInReport = true;
274 perTileQualityCounts.clear();
275 return;
276 }
277
278 QualityCount [] qualityCounts = new QualityCount[currentLength];
279 for (int i=0;i<currentLength;i++) {
280 qualityCounts[i] = new QualityCount();
281 }
282
283 perTileQualityCounts.put(tile, qualityCounts);
284 }
285
286 QualityCount [] qualityCounts = perTileQualityCounts.get(tile);
287
288 for (int i=0;i<qual.length;i++) {
289 qualityCounts[i].addValue(qual[i]);
290 }
291
292 }
293
294 public void reset () {
295 totalCount = 0;
296 perTileQualityCounts = new HashMap<Integer, QualityCount[]>();
297 }
298
299 public String description() {
300 return "Shows the perl tile Quality scores of all bases at a given position in a sequencing run";
301 }
302
303 public String name() {
304 return "Per tile sequence quality";
305 }
306
307 public boolean raisesError() {
308 if (!calculated) getPercentages();
309
310 if (maxDeviation > ModuleConfig.getParam("tile", "error")) return true;
311 return false;
312 }
313
314 public boolean raisesWarning() {
315 if (!calculated) getPercentages();
316
317 if (maxDeviation > ModuleConfig.getParam("tile", "warn")) return true;
318 return false;
319 }
320
321 public void makeReport(HTMLReportArchive report) throws IOException,XMLStreamException {
322 if (!calculated) getPercentages();
323
324 writeDefaultImage(report, "per_tile_quality.png", "Per base quality graph", Math.max(800, xLabels.length*15), 600);
325
326 StringBuffer sb = report.dataDocument();
327 sb.append("#Tile\tBase\tMean\n");
328
329 for (int t=0;t<tiles.length;t++) {
330 for (int i=0;i<means[t].length;i++) {
331
332 sb.append(tiles[t]);
333 sb.append("\t");
334
335 sb.append(xLabels[i]);
336 sb.append("\t");
337
338 sb.append(means[t][i]);
339
340 sb.append("\n");
341 }
342 }
343 }
344
345 private double getMean (int tile, int minbp, int maxbp, int offset) {
346 int count = 0;
347 double total = 0;
348
349 QualityCount [] qualityCounts = perTileQualityCounts.get(tile);
350
351 for (int i=minbp-1;i<maxbp;i++) {
352 if (qualityCounts[i].getTotalCount() > 0) {
353 count++;
354 total += qualityCounts[i].getMean(offset);
355 }
356 }
357
358 if (count > 0) {
359 return total/count;
360 }
361 return 0;
362
363 }
364
365
366 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules;
20
21
22 import java.io.IOException;
23
24 import javax.swing.JPanel;
25 import javax.xml.stream.XMLStreamException;
26
27 import uk.ac.babraham.FastQC.Report.HTMLReportArchive;
28 import uk.ac.babraham.FastQC.Sequence.Sequence;
29
30 public interface QCModule {
31
32 public void processSequence(Sequence sequence);
33
34 public JPanel getResultsPanel();
35
36 public String name ();
37
38 public String description ();
39
40 public void reset ();
41
42 public boolean raisesError();
43
44 public boolean raisesWarning();
45
46 public boolean ignoreFilteredSequences();
47
48 /**
49 * Allows you to say that this module shouldn't be included in the final report.
50 * Useful for modules which have a use under some circumstances but not others.
51 * @return
52 */
53 public boolean ignoreInReport();
54
55 public void makeReport(HTMLReportArchive report) throws XMLStreamException,IOException;
56
57
58 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules;
20
21
22 import java.io.IOException;
23
24 import javax.swing.JPanel;
25 import javax.xml.stream.XMLStreamException;
26
27 import uk.ac.babraham.FastQC.Report.HTMLReportArchive;
28 import uk.ac.babraham.FastQC.Sequence.Sequence;
29
30 public interface QCModule {
31
32 public void processSequence(Sequence sequence);
33
34 public JPanel getResultsPanel();
35
36 public String name ();
37
38 public String description ();
39
40 public void reset ();
41
42 public boolean raisesError();
43
44 public boolean raisesWarning();
45
46 public boolean ignoreFilteredSequences();
47
48 /**
49 * Allows you to say that this module shouldn't be included in the final report.
50 * Useful for modules which have a use under some circumstances but not others.
51 * @return
52 */
53 public boolean ignoreInReport();
54
55 public void makeReport(HTMLReportArchive report) throws XMLStreamException,IOException;
56
57
58 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules;
20
21 import java.io.IOException;
22
23 import javax.swing.JPanel;
24 import javax.xml.stream.XMLStreamException;
25
26 import uk.ac.babraham.FastQC.Graphs.LineGraph;
27 import uk.ac.babraham.FastQC.Report.HTMLReportArchive;
28 import uk.ac.babraham.FastQC.Sequence.Sequence;
29
30 public class SequenceLengthDistribution extends AbstractQCModule {
31
32 private long [] lengthCounts = new long[0];
33 private double [] graphCounts = null;
34 private String [] xCategories = new String[0];
35 private double max = 0;
36 private boolean calculated = false;
37
38 public JPanel getResultsPanel() {
39
40 if (!calculated) calculateDistribution();
41
42 return new LineGraph(new double [][] {graphCounts}, 0d, max, "Sequence Length (bp)",new String [] {"Sequence Length"}, xCategories, "Distribution of sequence lengths over all sequences");
43 }
44
45 public boolean ignoreFilteredSequences() {
46 return true;
47 }
48
49 public boolean ignoreInReport () {
50 if (ModuleConfig.getParam("sequence_length", "ignore") > 0) {
51 return true;
52 }
53 return false;
54 }
55
56 private synchronized void calculateDistribution () {
57 int maxLen = 0;
58 int minLen = -1;
59 max = 0;
60
61 // Find the min and max lengths
62 for (int i=0;i<lengthCounts.length;i++) {
63 if (lengthCounts[i]>0) {
64 if (minLen < 0) {
65 minLen = i;
66 }
67 maxLen = i;
68 }
69 }
70
71 // We put one extra category either side of the actual size
72 if (minLen>0) minLen--;
73 maxLen++;
74
75 int [] startAndInterval = getSizeDistribution(minLen, maxLen);
76
77 // Work out how many categories we need
78 int categories = 0;
79 int currentValue = startAndInterval[0];
80 while (currentValue<= maxLen) {
81 ++categories;
82 currentValue+= startAndInterval[1];
83 }
84
85 graphCounts = new double[categories];
86 xCategories = new String[categories];
87
88 for (int i=0;i<graphCounts.length;i++) {
89
90 int minValue = startAndInterval[0]+(startAndInterval[1]*i);
91 int maxValue = (startAndInterval[0]+(startAndInterval[1]*(i+1)))-1;
92
93 if (maxValue > maxLen) {
94 maxValue = maxLen;
95 }
96
97 for (int bp=minValue;bp<=maxValue;bp++) {
98 if (bp < lengthCounts.length) {
99 graphCounts[i] += lengthCounts[bp];
100 }
101 }
102
103 if (startAndInterval[1] == 1) {
104 xCategories[i] = ""+minValue;
105 }
106 else {
107 xCategories[i] = minValue+"-"+maxValue;
108 }
109
110 if (graphCounts[i] > max) max = graphCounts[i];
111 }
112 calculated = true;
113 }
114
115 public void processSequence(Sequence sequence) {
116 int seqLen = sequence.getSequence().length();
117
118 if (seqLen+2 > lengthCounts.length) {
119 long [] newLengthCounts = new long[seqLen+2];
120 for (int i=0;i<lengthCounts.length;i++) {
121 newLengthCounts[i] = lengthCounts[i];
122 }
123 lengthCounts = newLengthCounts;
124 }
125
126 ++lengthCounts[seqLen];
127
128 }
129
130 private int [] getSizeDistribution (int min, int max) {
131
132 int base = 1;
133
134 while (base > (max-min)) {
135 base /= 10;
136 }
137
138 int interval;
139 int starting;
140
141 int [] divisions = new int [] {1,2,5};
142
143 OUTER: while (true) {
144
145 for (int d=0;d<divisions.length;d++) {
146 int tester = base * divisions[d];
147 if (((max-min) / tester) <= 50) {
148 interval = tester;
149 break OUTER;
150 }
151 }
152
153 base *=10;
154
155 }
156
157 // Now we work out the first value to be plotted
158 int basicDivision = (int)(min/interval);
159
160 int testStart = basicDivision * interval;
161
162 starting = testStart;
163
164 return new int[] {starting,interval};
165
166 }
167
168
169
170 public void reset () {
171 lengthCounts = new long[0];
172 }
173
174 public String description() {
175 return "Shows the distribution of sequence length over all sequences";
176 }
177
178 public String name() {
179 return "Sequence Length Distribution";
180 }
181
182 public boolean raisesError() {
183 if (!calculated) calculateDistribution();
184
185 // See if they've turned this test off
186 if (ModuleConfig.getParam("sequence_length", "error") == 0) {
187 return false;
188 }
189
190
191 if (lengthCounts[0] > 0) {
192 return true;
193 }
194 return false;
195 }
196
197 public boolean raisesWarning() {
198 if (!calculated) calculateDistribution();
199
200 // See if they've turned this test off
201 if (ModuleConfig.getParam("sequence_length", "warn") == 0) {
202 return false;
203 }
204
205 // Warn if they're not all the same length
206 boolean seenLength = false;
207 for (int i=0;i<lengthCounts.length;i++) {
208 if (lengthCounts[i] > 0) {
209 if (seenLength) {
210 return true;
211 }
212 else {
213 seenLength = true;
214 }
215 }
216 }
217 return false;
218 }
219
220 public void makeReport(HTMLReportArchive report) throws IOException,XMLStreamException {
221 if (!calculated) calculateDistribution();
222
223 writeDefaultImage(report, "sequence_length_distribution.png", "Sequence length distribution", 800, 600);
224
225 StringBuffer sb = report.dataDocument();
226 sb.append("#Length\tCount\n");
227 for (int i=0;i<xCategories.length;i++) {
228 // Remove any padding we added to make the graph look better
229 if ((i==0 || i== xCategories.length-1) && graphCounts[i] == 0) {
230 continue;
231 }
232 sb.append(xCategories[i]);
233 sb.append("\t");
234 sb.append(graphCounts[i]);
235 sb.append("\n");
236 }
237 }
238
239 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Modules;
20
21 import java.io.IOException;
22
23 import javax.swing.JPanel;
24 import javax.xml.stream.XMLStreamException;
25
26 import uk.ac.babraham.FastQC.Graphs.LineGraph;
27 import uk.ac.babraham.FastQC.Report.HTMLReportArchive;
28 import uk.ac.babraham.FastQC.Sequence.Sequence;
29
30 public class SequenceLengthDistribution extends AbstractQCModule {
31
32 private long [] lengthCounts = new long[0];
33 private double [] graphCounts = null;
34 private String [] xCategories = new String[0];
35 private double max = 0;
36 private boolean calculated = false;
37
38 public JPanel getResultsPanel() {
39
40 if (!calculated) calculateDistribution();
41
42 return new LineGraph(new double [][] {graphCounts}, 0d, max, "Sequence Length (bp)",new String [] {"Sequence Length"}, xCategories, "Distribution of sequence lengths over all sequences");
43 }
44
45 public boolean ignoreFilteredSequences() {
46 return true;
47 }
48
49 public boolean ignoreInReport () {
50 if (ModuleConfig.getParam("sequence_length", "ignore") > 0) {
51 return true;
52 }
53 return false;
54 }
55
56 private synchronized void calculateDistribution () {
57 int maxLen = 0;
58 int minLen = -1;
59 max = 0;
60
61 // Find the min and max lengths
62 for (int i=0;i<lengthCounts.length;i++) {
63 if (lengthCounts[i]>0) {
64 if (minLen < 0) {
65 minLen = i;
66 }
67 maxLen = i;
68 }
69 }
70
71 // We put one extra category either side of the actual size
72 if (minLen>0) minLen--;
73 maxLen++;
74
75 int [] startAndInterval = getSizeDistribution(minLen, maxLen);
76
77 // Work out how many categories we need
78 int categories = 0;
79 int currentValue = startAndInterval[0];
80 while (currentValue<= maxLen) {
81 ++categories;
82 currentValue+= startAndInterval[1];
83 }
84
85 graphCounts = new double[categories];
86 xCategories = new String[categories];
87
88 for (int i=0;i<graphCounts.length;i++) {
89
90 int minValue = startAndInterval[0]+(startAndInterval[1]*i);
91 int maxValue = (startAndInterval[0]+(startAndInterval[1]*(i+1)))-1;
92
93 if (maxValue > maxLen) {
94 maxValue = maxLen;
95 }
96
97 for (int bp=minValue;bp<=maxValue;bp++) {
98 if (bp < lengthCounts.length) {
99 graphCounts[i] += lengthCounts[bp];
100 }
101 }
102
103 if (startAndInterval[1] == 1) {
104 xCategories[i] = ""+minValue;
105 }
106 else {
107 xCategories[i] = minValue+"-"+maxValue;
108 }
109
110 if (graphCounts[i] > max) max = graphCounts[i];
111 }
112 calculated = true;
113 }
114
115 public void processSequence(Sequence sequence) {
116 int seqLen = sequence.getSequence().length();
117
118 if (seqLen+2 > lengthCounts.length) {
119 long [] newLengthCounts = new long[seqLen+2];
120 for (int i=0;i<lengthCounts.length;i++) {
121 newLengthCounts[i] = lengthCounts[i];
122 }
123 lengthCounts = newLengthCounts;
124 }
125
126 ++lengthCounts[seqLen];
127
128 }
129
130 private int [] getSizeDistribution (int min, int max) {
131
132 int base = 1;
133
134 while (base > (max-min)) {
135 base /= 10;
136 }
137
138 int interval;
139 int starting;
140
141 int [] divisions = new int [] {1,2,5};
142
143 OUTER: while (true) {
144
145 for (int d=0;d<divisions.length;d++) {
146 int tester = base * divisions[d];
147 if (((max-min) / tester) <= 50) {
148 interval = tester;
149 break OUTER;
150 }
151 }
152
153 base *=10;
154
155 }
156
157 // Now we work out the first value to be plotted
158 int basicDivision = (int)(min/interval);
159
160 int testStart = basicDivision * interval;
161
162 starting = testStart;
163
164 return new int[] {starting,interval};
165
166 }
167
168
169
170 public void reset () {
171 lengthCounts = new long[0];
172 }
173
174 public String description() {
175 return "Shows the distribution of sequence length over all sequences";
176 }
177
178 public String name() {
179 return "Sequence Length Distribution";
180 }
181
182 public boolean raisesError() {
183 if (!calculated) calculateDistribution();
184
185 // See if they've turned this test off
186 if (ModuleConfig.getParam("sequence_length", "error") == 0) {
187 return false;
188 }
189
190
191 if (lengthCounts[0] > 0) {
192 return true;
193 }
194 return false;
195 }
196
197 public boolean raisesWarning() {
198 if (!calculated) calculateDistribution();
199
200 // See if they've turned this test off
201 if (ModuleConfig.getParam("sequence_length", "warn") == 0) {
202 return false;
203 }
204
205 // Warn if they're not all the same length
206 boolean seenLength = false;
207 for (int i=0;i<lengthCounts.length;i++) {
208 if (lengthCounts[i] > 0) {
209 if (seenLength) {
210 return true;
211 }
212 else {
213 seenLength = true;
214 }
215 }
216 }
217 return false;
218 }
219
220 public void makeReport(HTMLReportArchive report) throws IOException,XMLStreamException {
221 if (!calculated) calculateDistribution();
222
223 writeDefaultImage(report, "sequence_length_distribution.png", "Sequence length distribution", 800, 600);
224
225 StringBuffer sb = report.dataDocument();
226 sb.append("#Length\tCount\n");
227 for (int i=0;i<xCategories.length;i++) {
228 // Remove any padding we added to make the graph look better
229 if ((i==0 || i== xCategories.length-1) && graphCounts[i] == 0) {
230 continue;
231 }
232 sb.append(xCategories[i]);
233 sb.append("\t");
234 sb.append(graphCounts[i]);
235 sb.append("\n");
236 }
237 }
238
239 }
00 /**
1 * Copyright Copyright 2010-15 Simon Andrews
1 * Copyright Copyright 2010-17 Simon Andrews
22 *
33 * This file is part of FastQC.
44 *
0 body {
1 font-family: sans-serif;
2 color: #000000;
3 background-color: #FFFFFF;
4 }
5
6 a {
7 color: #000080;
8 }
9
10 a:hover {
11 color: #800000;
12 }
13
14 h2 {
15 color: #800000;
16 padding-bottom: 0;
17 margin-bottom: 0;
18 padding-top: 2em;
19 }
20
21 table {
22 margin-left: 3em;
23 text-align: center;
24 }
25
26 th {
27 text-align: center;
28 background-color: #000080;
29 color: #FFFFFF;
30 padding: 0.4em;
31 }
32
33 td {
34 font-family: monospace;
35 text-align: left;
36 background-color: #EEEEEE;
37 color: #000000;
38 padding: 0.4em;
39 }
40
41 img {
42 padding-top: 0;
43 margin-top: 0;
44 border-top: 0;
45 }
46
47 img.indented {
48 margin-left: 3em;
49 }
50
51 p {
52 padding-top: 0;
53 margin-top: 0;
54 }
0 body {
1 font-family: sans-serif;
2 color: #000000;
3 background-color: #FFFFFF;
4 }
5
6 a {
7 color: #000080;
8 }
9
10 a:hover {
11 color: #800000;
12 }
13
14 h2 {
15 color: #800000;
16 padding-bottom: 0;
17 margin-bottom: 0;
18 padding-top: 2em;
19 }
20
21 table {
22 margin-left: 3em;
23 text-align: center;
24 }
25
26 th {
27 text-align: center;
28 background-color: #000080;
29 color: #FFFFFF;
30 padding: 0.4em;
31 }
32
33 td {
34 font-family: monospace;
35 text-align: left;
36 background-color: #EEEEEE;
37 color: #000000;
38 padding: 0.4em;
39 }
40
41 img {
42 padding-top: 0;
43 margin-top: 0;
44 border-top: 0;
45 }
46
47 img.indented {
48 margin-left: 3em;
49 }
50
51 p {
52 padding-top: 0;
53 margin-top: 0;
54 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Results;
20
21 import java.awt.BorderLayout;
22 import java.awt.Color;
23 import java.awt.Component;
24 import java.util.Vector;
25
26 import javax.swing.DefaultListCellRenderer;
27 import javax.swing.ImageIcon;
28 import javax.swing.JLabel;
29 import javax.swing.JList;
30 import javax.swing.JPanel;
31 import javax.swing.JScrollPane;
32 import javax.swing.ListSelectionModel;
33 import javax.swing.event.ListSelectionEvent;
34 import javax.swing.event.ListSelectionListener;
35
36 import uk.ac.babraham.FastQC.Analysis.AnalysisListener;
37 import uk.ac.babraham.FastQC.Modules.QCModule;
38 import uk.ac.babraham.FastQC.Sequence.SequenceFile;
39
40 public class ResultsPanel extends JPanel implements ListSelectionListener, AnalysisListener{
41
42 private static final ImageIcon ERROR_ICON = new ImageIcon(ClassLoader.getSystemResource("uk/ac/babraham/FastQC/Resources/error.png"));
43 private static final ImageIcon WARNING_ICON = new ImageIcon(ClassLoader.getSystemResource("uk/ac/babraham/FastQC/Resources/warning.png"));
44 private static final ImageIcon OK_ICON = new ImageIcon(ClassLoader.getSystemResource("uk/ac/babraham/FastQC/Resources/tick.png"));
45
46
47 private QCModule [] modules;
48 private JList moduleList;
49 private JPanel [] panels;
50 private JPanel currentPanel = null;
51 private JLabel progressLabel;
52 private SequenceFile sequenceFile;
53
54 public ResultsPanel (SequenceFile sequenceFile) {
55 this.sequenceFile = sequenceFile;
56 setLayout(new BorderLayout());
57 progressLabel = new JLabel("Waiting to start...",JLabel.CENTER);
58 add(progressLabel,BorderLayout.CENTER);
59 }
60
61 public void valueChanged(ListSelectionEvent e) {
62 int index = moduleList.getSelectedIndex();
63 if (index >= 0) {
64 remove(currentPanel);
65 currentPanel = panels[index];
66 add(currentPanel,BorderLayout.CENTER);
67 validate();
68 repaint();
69 }
70 }
71
72 public SequenceFile sequenceFile () {
73 return sequenceFile;
74 }
75
76 public QCModule [] modules () {
77 return modules;
78 }
79
80 private class ModuleRenderer extends DefaultListCellRenderer {
81
82 public Component getListCellRendererComponent(JList list, Object value, int index, boolean isSelected, boolean cellHasFocus) {
83 if (! (value instanceof QCModule)) {
84 return super.getListCellRendererComponent(list, value, index, isSelected, cellHasFocus);
85 }
86
87 QCModule module = (QCModule)value;
88 ImageIcon icon = OK_ICON;
89 if (module.raisesError()) {
90 icon = ERROR_ICON;
91 }
92 else if (module.raisesWarning()) {
93 icon = WARNING_ICON;
94 }
95
96 JLabel returnLabel = new JLabel(module.name(),icon,JLabel.LEFT);
97 returnLabel.setOpaque(true);
98 if (isSelected) {
99 returnLabel.setBackground(Color.LIGHT_GRAY);
100 }
101 else {
102 returnLabel.setBackground(Color.WHITE);
103 }
104
105 return returnLabel;
106 }
107
108 }
109
110 public void analysisComplete(SequenceFile file, QCModule[] rawModules) {
111 remove(progressLabel);
112
113 Vector<QCModule> modulesToDisplay = new Vector<QCModule>();
114
115 for (int m=0;m<rawModules.length;m++) {
116 if (!rawModules[m].ignoreInReport()) {
117 modulesToDisplay.add(rawModules[m]);
118 }
119 }
120
121 modules = modulesToDisplay.toArray(new QCModule[0]);
122
123 panels = new JPanel[modules.length];
124
125 for (int m=0;m<modules.length;m++) {
126 panels[m] = modules[m].getResultsPanel();
127 }
128
129 moduleList = new JList(modules);
130 moduleList.setCellRenderer(new ModuleRenderer());
131 moduleList.setSelectionMode(ListSelectionModel.SINGLE_SELECTION);
132 moduleList.setSelectedIndex(0);
133 moduleList.addListSelectionListener(this);
134
135 add(new JScrollPane(moduleList),BorderLayout.WEST);
136
137 currentPanel = panels[0];
138 add(currentPanel,BorderLayout.CENTER);
139 validate();
140
141 }
142
143 public void analysisUpdated(SequenceFile file, int sequencesProcessed, int percentComplete) {
144 if (percentComplete > 99) {
145 progressLabel.setText("Read "+sequencesProcessed+" sequences");
146 }
147 else {
148 progressLabel.setText("Read "+sequencesProcessed+" sequences ("+percentComplete+"%)");
149 }
150 }
151
152 public void analysisExceptionReceived(SequenceFile file, Exception e) {
153 progressLabel.setText("Failed to process file: "+e.getLocalizedMessage());
154 }
155
156 public void analysisStarted(SequenceFile file) {
157 progressLabel.setText("Starting analysis...");
158 }
159
160
161 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Results;
20
21 import java.awt.BorderLayout;
22 import java.awt.Color;
23 import java.awt.Component;
24 import java.util.Vector;
25
26 import javax.swing.DefaultListCellRenderer;
27 import javax.swing.ImageIcon;
28 import javax.swing.JLabel;
29 import javax.swing.JList;
30 import javax.swing.JPanel;
31 import javax.swing.JScrollPane;
32 import javax.swing.ListSelectionModel;
33 import javax.swing.event.ListSelectionEvent;
34 import javax.swing.event.ListSelectionListener;
35
36 import uk.ac.babraham.FastQC.Analysis.AnalysisListener;
37 import uk.ac.babraham.FastQC.Modules.QCModule;
38 import uk.ac.babraham.FastQC.Sequence.SequenceFile;
39
40 public class ResultsPanel extends JPanel implements ListSelectionListener, AnalysisListener{
41
42 private static final ImageIcon ERROR_ICON = new ImageIcon(ClassLoader.getSystemResource("uk/ac/babraham/FastQC/Resources/error.png"));
43 private static final ImageIcon WARNING_ICON = new ImageIcon(ClassLoader.getSystemResource("uk/ac/babraham/FastQC/Resources/warning.png"));
44 private static final ImageIcon OK_ICON = new ImageIcon(ClassLoader.getSystemResource("uk/ac/babraham/FastQC/Resources/tick.png"));
45
46
47 private QCModule [] modules;
48 private JList moduleList;
49 private JPanel [] panels;
50 private JPanel currentPanel = null;
51 private JLabel progressLabel;
52 private SequenceFile sequenceFile;
53
54 public ResultsPanel (SequenceFile sequenceFile) {
55 this.sequenceFile = sequenceFile;
56 setLayout(new BorderLayout());
57 progressLabel = new JLabel("Waiting to start...",JLabel.CENTER);
58 add(progressLabel,BorderLayout.CENTER);
59 }
60
61 public void valueChanged(ListSelectionEvent e) {
62 int index = moduleList.getSelectedIndex();
63 if (index >= 0) {
64 remove(currentPanel);
65 currentPanel = panels[index];
66 add(currentPanel,BorderLayout.CENTER);
67 validate();
68 repaint();
69 }
70 }
71
72 public SequenceFile sequenceFile () {
73 return sequenceFile;
74 }
75
76 public QCModule [] modules () {
77 return modules;
78 }
79
80 private class ModuleRenderer extends DefaultListCellRenderer {
81
82 public Component getListCellRendererComponent(JList list, Object value, int index, boolean isSelected, boolean cellHasFocus) {
83 if (! (value instanceof QCModule)) {
84 return super.getListCellRendererComponent(list, value, index, isSelected, cellHasFocus);
85 }
86
87 QCModule module = (QCModule)value;
88 ImageIcon icon = OK_ICON;
89 if (module.raisesError()) {
90 icon = ERROR_ICON;
91 }
92 else if (module.raisesWarning()) {
93 icon = WARNING_ICON;
94 }
95
96 JLabel returnLabel = new JLabel(module.name(),icon,JLabel.LEFT);
97 returnLabel.setOpaque(true);
98 if (isSelected) {
99 returnLabel.setBackground(Color.LIGHT_GRAY);
100 }
101 else {
102 returnLabel.setBackground(Color.WHITE);
103 }
104
105 return returnLabel;
106 }
107
108 }
109
110 public void analysisComplete(SequenceFile file, QCModule[] rawModules) {
111 remove(progressLabel);
112
113 Vector<QCModule> modulesToDisplay = new Vector<QCModule>();
114
115 for (int m=0;m<rawModules.length;m++) {
116 if (!rawModules[m].ignoreInReport()) {
117 modulesToDisplay.add(rawModules[m]);
118 }
119 }
120
121 modules = modulesToDisplay.toArray(new QCModule[0]);
122
123 panels = new JPanel[modules.length];
124
125 for (int m=0;m<modules.length;m++) {
126 panels[m] = modules[m].getResultsPanel();
127 }
128
129 moduleList = new JList(modules);
130 moduleList.setCellRenderer(new ModuleRenderer());
131 moduleList.setSelectionMode(ListSelectionModel.SINGLE_SELECTION);
132 moduleList.setSelectedIndex(0);
133 moduleList.addListSelectionListener(this);
134
135 add(new JScrollPane(moduleList),BorderLayout.WEST);
136
137 currentPanel = panels[0];
138 add(currentPanel,BorderLayout.CENTER);
139 validate();
140
141 }
142
143 public void analysisUpdated(SequenceFile file, int sequencesProcessed, int percentComplete) {
144 if (percentComplete > 99) {
145 progressLabel.setText("Read "+sequencesProcessed+" sequences");
146 }
147 else {
148 progressLabel.setText("Read "+sequencesProcessed+" sequences ("+percentComplete+"%)");
149 }
150 }
151
152 public void analysisExceptionReceived(SequenceFile file, Exception e) {
153 progressLabel.setText("Failed to process file: "+e.getLocalizedMessage());
154 }
155
156 public void analysisStarted(SequenceFile file) {
157 progressLabel.setText("Starting analysis...");
158 }
159
160
161 }
0 /**
1 * Copyright Copyright 2010-12 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Sequence;
20
21 import java.io.File;
22 import java.io.FileInputStream;
23 import java.io.IOException;
24 import java.util.Iterator;
25 import java.util.List;
26
27 import net.sf.samtools.CigarElement;
28 import net.sf.samtools.CigarOperator;
29 import net.sf.samtools.SAMFileReader;
30 import net.sf.samtools.SAMFormatException;
31 import net.sf.samtools.SAMRecord;
32
33 public class BAMFile implements SequenceFile {
34
35 private File file;
36 private boolean onlyMapped;
37 private long fileSize = 0;
38 private long recordSize = 0;
39
40 // We keep the file stream around just so we can see how far through
41 // the file we've got. We don't read from this directly, but it's the
42 // only way to access the file pointer.
43 private FileInputStream fis;
44
45 private SAMFileReader br;
46 private String name;
47 private Sequence nextSequence = null;
48 Iterator<SAMRecord> it;
49
50
51 protected BAMFile (File file, boolean onlyMapped) throws SequenceFormatException, IOException {
52 this.file = file;
53 fileSize = file.length();
54 name = file.getName();
55 this.onlyMapped = onlyMapped;
56
57 SAMFileReader.setDefaultValidationStringency(SAMFileReader.ValidationStringency.SILENT);
58
59 fis = new FileInputStream(file);
60
61 br = new SAMFileReader(fis);
62
63 it = br.iterator();
64 readNext();
65 }
66
67 public String name () {
68 return name;
69 }
70
71 public int getPercentComplete() {
72 if (!hasNext()) return 100;
73
74 try {
75 int percent = (int) (((double)fis.getChannel().position()/ fileSize)*100);
76 return percent;
77 }
78 catch (IOException e) {
79 e.printStackTrace();
80 }
81 return 0;
82 }
83
84 public boolean isColorspace () {
85 return false;
86 }
87
88 public boolean hasNext() {
89 return nextSequence != null;
90 }
91
92 public Sequence next () throws SequenceFormatException {
93 Sequence returnSeq = nextSequence;
94 readNext();
95 return returnSeq;
96 }
97
98 private void readNext() throws SequenceFormatException {
99
100 SAMRecord record;
101
102 while (true) {
103
104 if (!it.hasNext()) {
105 nextSequence = null;
106 try {
107 br.close();
108 fis.close();
109 }
110 catch (IOException ioe) {
111 ioe.printStackTrace();
112 }
113 return;
114 }
115
116 try {
117 record = it.next();
118 }
119 catch (SAMFormatException sfe) {
120 throw new SequenceFormatException(sfe.getMessage());
121 }
122
123 // We skip over entries with no mapping if that's what the user asked for
124 if (onlyMapped && record.getReadUnmappedFlag()) {
125 continue;
126 }
127 else {
128 break;
129 }
130 }
131
132 // This is a very rough calculation of the record size so we can approximately track progress
133 // through the file.
134 if (recordSize == 0) {
135 recordSize = (record.getReadLength()*2)+150;
136 if (br.isBinary()) {
137 recordSize /= 4;
138 }
139 }
140
141
142 String sequence = record.getReadString();
143 String qualities = record.getBaseQualityString();
144
145
146 // TODO: TEST THIS!!!
147 // If we're only working with mapped data then we need to exclude any regions which have been either
148 // hard or soft clipped by our aligner.
149 if (onlyMapped) {
150 List<CigarElement> elements = record.getCigar().getCigarElements();
151
152 // We need to clip the 3' end first otherwise the numbers at the 5' end won't be right.
153 if (elements.get(elements.size()-1).getOperator().equals(CigarOperator.S)) {
154 int value = elements.get(elements.size()-1).getLength();
155 sequence = sequence.substring(0,sequence.length()-value);
156 qualities = qualities.substring(0,qualities.length()-value);
157
158 }
159
160
161 if (elements.get(0).getOperator().equals(CigarOperator.S)) {
162 int value = elements.get(0).getLength();
163 sequence = sequence.substring(value);
164 qualities = qualities.substring(value);
165 }
166
167
168 }
169
170
171 // BAM/SAM files always show sequence relative to the top strand of
172 // the mapped reference so if this sequence maps to the reverse strand
173 // we need to reverse complement the sequence and reverse the qualities
174 // to get the original orientation of the read.
175 if (record.getReadNegativeStrandFlag()) {
176 sequence = reverseComplement(sequence);
177 qualities = reverse(qualities);
178 }
179
180 nextSequence = new Sequence(this, sequence, qualities, record.getReadName());
181
182 }
183
184
185 private String reverseComplement (String sequence) {
186
187 char [] letters = reverse(sequence).toUpperCase().toCharArray();
188 char [] rc = new char[letters.length];
189
190 for (int i=0;i<letters.length;i++) {
191 switch(letters[i]) {
192 case 'G': rc[i] = 'C';break;
193 case 'A': rc[i] = 'T';break;
194 case 'T': rc[i] = 'A';break;
195 case 'C': rc[i] = 'G';break;
196 default: rc[i] = letters[i];
197 }
198 }
199
200 return new String(rc);
201
202 }
203
204 private String reverse (String sequence) {
205 char [] starting = sequence.toCharArray();
206 char [] reversed = new char[starting.length];
207
208 for (int i=0;i<starting.length;i++) {
209 reversed[reversed.length-(1+i)] = starting[i];
210 }
211
212 return new String(reversed);
213 }
214
215 public File getFile() {
216 return file;
217 }
218
219 }
0 /**
1 * Copyright Copyright 2010-12 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Sequence;
20
21 import java.io.File;
22 import java.io.FileInputStream;
23 import java.io.IOException;
24 import java.util.Iterator;
25 import java.util.List;
26
27 import net.sf.samtools.CigarElement;
28 import net.sf.samtools.CigarOperator;
29 import net.sf.samtools.SAMFileReader;
30 import net.sf.samtools.SAMFormatException;
31 import net.sf.samtools.SAMRecord;
32
33 public class BAMFile implements SequenceFile {
34
35 private File file;
36 private boolean onlyMapped;
37 private long fileSize = 0;
38 private long recordSize = 0;
39
40 // We keep the file stream around just so we can see how far through
41 // the file we've got. We don't read from this directly, but it's the
42 // only way to access the file pointer.
43 private FileInputStream fis;
44
45 private SAMFileReader br;
46 private String name;
47 private Sequence nextSequence = null;
48 Iterator<SAMRecord> it;
49
50
51 protected BAMFile (File file, boolean onlyMapped) throws SequenceFormatException, IOException {
52 this.file = file;
53 fileSize = file.length();
54 name = file.getName();
55 this.onlyMapped = onlyMapped;
56
57 SAMFileReader.setDefaultValidationStringency(SAMFileReader.ValidationStringency.SILENT);
58
59 fis = new FileInputStream(file);
60
61 br = new SAMFileReader(fis);
62
63 it = br.iterator();
64 readNext();
65 }
66
67 public String name () {
68 return name;
69 }
70
71 public int getPercentComplete() {
72 if (!hasNext()) return 100;
73
74 try {
75 int percent = (int) (((double)fis.getChannel().position()/ fileSize)*100);
76 return percent;
77 }
78 catch (IOException e) {
79 e.printStackTrace();
80 }
81 return 0;
82 }
83
84 public boolean isColorspace () {
85 return false;
86 }
87
88 public boolean hasNext() {
89 return nextSequence != null;
90 }
91
92 public Sequence next () throws SequenceFormatException {
93 Sequence returnSeq = nextSequence;
94 readNext();
95 return returnSeq;
96 }
97
98 private void readNext() throws SequenceFormatException {
99
100 SAMRecord record;
101
102 while (true) {
103
104 if (!it.hasNext()) {
105 nextSequence = null;
106 try {
107 br.close();
108 fis.close();
109 }
110 catch (IOException ioe) {
111 ioe.printStackTrace();
112 }
113 return;
114 }
115
116 try {
117 record = it.next();
118 }
119 catch (SAMFormatException sfe) {
120 throw new SequenceFormatException(sfe.getMessage());
121 }
122
123 // We skip over entries with no mapping if that's what the user asked for
124 if (onlyMapped && record.getReadUnmappedFlag()) {
125 continue;
126 }
127 else {
128 break;
129 }
130 }
131
132 // This is a very rough calculation of the record size so we can approximately track progress
133 // through the file.
134 if (recordSize == 0) {
135 recordSize = (record.getReadLength()*2)+150;
136 if (br.isBinary()) {
137 recordSize /= 4;
138 }
139 }
140
141
142 String sequence = record.getReadString();
143 String qualities = record.getBaseQualityString();
144
145
146 // TODO: TEST THIS!!!
147 // If we're only working with mapped data then we need to exclude any regions which have been either
148 // hard or soft clipped by our aligner.
149 if (onlyMapped) {
150 List<CigarElement> elements = record.getCigar().getCigarElements();
151
152 // We need to clip the 3' end first otherwise the numbers at the 5' end won't be right.
153 if (elements.get(elements.size()-1).getOperator().equals(CigarOperator.S)) {
154 int value = elements.get(elements.size()-1).getLength();
155 sequence = sequence.substring(0,sequence.length()-value);
156 qualities = qualities.substring(0,qualities.length()-value);
157
158 }
159
160
161 if (elements.get(0).getOperator().equals(CigarOperator.S)) {
162 int value = elements.get(0).getLength();
163 sequence = sequence.substring(value);
164 qualities = qualities.substring(value);
165 }
166
167
168 }
169
170
171 // BAM/SAM files always show sequence relative to the top strand of
172 // the mapped reference so if this sequence maps to the reverse strand
173 // we need to reverse complement the sequence and reverse the qualities
174 // to get the original orientation of the read.
175 if (record.getReadNegativeStrandFlag()) {
176 sequence = reverseComplement(sequence);
177 qualities = reverse(qualities);
178 }
179
180 nextSequence = new Sequence(this, sequence, qualities, record.getReadName());
181
182 }
183
184
185 private String reverseComplement (String sequence) {
186
187 char [] letters = reverse(sequence).toUpperCase().toCharArray();
188 char [] rc = new char[letters.length];
189
190 for (int i=0;i<letters.length;i++) {
191 switch(letters[i]) {
192 case 'G': rc[i] = 'C';break;
193 case 'A': rc[i] = 'T';break;
194 case 'T': rc[i] = 'A';break;
195 case 'C': rc[i] = 'G';break;
196 default: rc[i] = letters[i];
197 }
198 }
199
200 return new String(rc);
201
202 }
203
204 private String reverse (String sequence) {
205 char [] starting = sequence.toCharArray();
206 char [] reversed = new char[starting.length];
207
208 for (int i=0;i<starting.length;i++) {
209 reversed[reversed.length-(1+i)] = starting[i];
210 }
211
212 return new String(reversed);
213 }
214
215 public File getFile() {
216 return file;
217 }
218
219 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Sequence.Contaminant;
20
21 public class Contaminant {
22
23 private String name;
24 private char [] forward;
25 private char [] reverse;
26
27 public Contaminant (String name, String sequence) {
28 this.name = name;
29
30 sequence = sequence.toUpperCase();
31 forward = sequence.toCharArray();
32 reverse = new char[forward.length];
33 for (int c=0;c<forward.length;c++) {
34 int revPos = (reverse.length-1)-c;
35 switch (forward[c]) {
36 case 'G':
37 reverse[revPos] = 'C';
38 break;
39 case 'A':
40 reverse[revPos] = 'T';
41 break;
42 case 'T':
43 reverse[revPos] = 'A';
44 break;
45 case 'C':
46 reverse[revPos] = 'G';
47 break;
48 default:
49 throw new IllegalArgumentException("Contaminant contained the illegal character '"+forward[c]+"'");
50 }
51 }
52
53 }
54
55 public ContaminantHit findMatch (String query) {
56 query = query.toUpperCase();
57
58 // We have a special case for queries between 8 - 20bp where we will allow a hit
59 // if it's an exact substring of this contaminant
60 if (query.length()<20 && query.length()>=8) {
61 if ((new String(forward)).contains(query)) {
62 return new ContaminantHit(this, ContaminantHit.FORWARD, query.length(), 100);
63 }
64 if ((new String(reverse)).contains(query)) {
65 return new ContaminantHit(this, ContaminantHit.REVERSE, query.length(), 100);
66 }
67
68 }
69
70
71 char [] q = query.toCharArray();
72
73 ContaminantHit bestHit = null;
74
75 // We're going to allow only one mismatch and will require
76 // a match of at least 20bp to consider this a match at all
77
78 for (int offset=0-(forward.length-20);offset<q.length-20;offset++) {
79 ContaminantHit thisHit = findMatch(forward,q,offset,ContaminantHit.FORWARD);
80 // System.out.println("Best match from offset "+offset+" was "+thisHit);
81 if (thisHit == null) continue;
82 if (bestHit == null || thisHit.length()>bestHit.length()) {
83 bestHit = thisHit;
84 }
85 }
86
87 for (int offset=0-(forward.length-20);offset<q.length-20;offset++) {
88 ContaminantHit thisHit = findMatch(reverse,q,offset,ContaminantHit.REVERSE);
89 if (thisHit == null) continue;
90 if (bestHit == null || thisHit.length()>bestHit.length()) {
91 bestHit = thisHit;
92 }
93 }
94
95 return bestHit;
96
97 }
98
99 private ContaminantHit findMatch (char [] ca, char [] cb, int offset, int direction) {
100
101 ContaminantHit bestHit = null;
102
103 int mismatchCount = 0;
104 int start = 0;
105 int end = 0;
106
107 for (int i=0;i<ca.length;i++) {
108 if (i+offset < 0) {
109 start=i+1;
110 continue;
111 }
112 if (i+offset >= cb.length) break;
113
114 if (ca[i] == cb[i+offset]) {
115 end = i;
116 }
117 else {
118 ++mismatchCount;
119 if (mismatchCount>1) {
120 // That's the end of this match, see if it's worth recording
121 if (1+(end-start) > 20) {
122 int id = (((1+(end-start))-(mismatchCount-1))*100)/(1+(end-start));
123 if (bestHit == null || bestHit.length()< 1+(end-start) || (bestHit.length() == 1+(end-start) && bestHit.percentID()<id)) {
124 // System.out.println("New best hit from "+start+"-"+end);
125 bestHit = new ContaminantHit(this, direction, 1+(end-start), id);
126 }
127 }
128 start = i+1;
129 end = i+1;
130 mismatchCount = 0;
131 }
132 }
133 }
134
135 // See if we ended with a match.
136 if (1+(end-start) > 20) {
137 int id = (((1+(end-start))-mismatchCount)*100)/(1+(end-start));
138 if (bestHit == null || bestHit.length()< 1+(end-start) || (bestHit.length() == 1+(end-start) && bestHit.percentID()<id)) {
139 // System.out.println("New best hit from "+start+"-"+end);
140 bestHit = new ContaminantHit(this, direction, 1+(end-start), id);
141 }
142 }
143
144 return bestHit;
145
146 }
147
148 public String name () {
149 return name;
150 }
151
152
153 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Sequence.Contaminant;
20
21 public class Contaminant {
22
23 private String name;
24 private char [] forward;
25 private char [] reverse;
26
27 public Contaminant (String name, String sequence) {
28 this.name = name;
29
30 sequence = sequence.toUpperCase();
31 forward = sequence.toCharArray();
32 reverse = new char[forward.length];
33 for (int c=0;c<forward.length;c++) {
34 int revPos = (reverse.length-1)-c;
35 switch (forward[c]) {
36 case 'G':
37 reverse[revPos] = 'C';
38 break;
39 case 'A':
40 reverse[revPos] = 'T';
41 break;
42 case 'T':
43 reverse[revPos] = 'A';
44 break;
45 case 'C':
46 reverse[revPos] = 'G';
47 break;
48 default:
49 throw new IllegalArgumentException("Contaminant contained the illegal character '"+forward[c]+"'");
50 }
51 }
52
53 }
54
55 public ContaminantHit findMatch (String query) {
56 query = query.toUpperCase();
57
58 // We have a special case for queries between 8 - 20bp where we will allow a hit
59 // if it's an exact substring of this contaminant
60 if (query.length()<20 && query.length()>=8) {
61 if ((new String(forward)).contains(query)) {
62 return new ContaminantHit(this, ContaminantHit.FORWARD, query.length(), 100);
63 }
64 if ((new String(reverse)).contains(query)) {
65 return new ContaminantHit(this, ContaminantHit.REVERSE, query.length(), 100);
66 }
67
68 }
69
70
71 char [] q = query.toCharArray();
72
73 ContaminantHit bestHit = null;
74
75 // We're going to allow only one mismatch and will require
76 // a match of at least 20bp to consider this a match at all
77
78 for (int offset=0-(forward.length-20);offset<q.length-20;offset++) {
79 ContaminantHit thisHit = findMatch(forward,q,offset,ContaminantHit.FORWARD);
80 // System.out.println("Best match from offset "+offset+" was "+thisHit);
81 if (thisHit == null) continue;
82 if (bestHit == null || thisHit.length()>bestHit.length()) {
83 bestHit = thisHit;
84 }
85 }
86
87 for (int offset=0-(forward.length-20);offset<q.length-20;offset++) {
88 ContaminantHit thisHit = findMatch(reverse,q,offset,ContaminantHit.REVERSE);
89 if (thisHit == null) continue;
90 if (bestHit == null || thisHit.length()>bestHit.length()) {
91 bestHit = thisHit;
92 }
93 }
94
95 return bestHit;
96
97 }
98
99 private ContaminantHit findMatch (char [] ca, char [] cb, int offset, int direction) {
100
101 ContaminantHit bestHit = null;
102
103 int mismatchCount = 0;
104 int start = 0;
105 int end = 0;
106
107 for (int i=0;i<ca.length;i++) {
108 if (i+offset < 0) {
109 start=i+1;
110 continue;
111 }
112 if (i+offset >= cb.length) break;
113
114 if (ca[i] == cb[i+offset]) {
115 end = i;
116 }
117 else {
118 ++mismatchCount;
119 if (mismatchCount>1) {
120 // That's the end of this match, see if it's worth recording
121 if (1+(end-start) > 20) {
122 int id = (((1+(end-start))-(mismatchCount-1))*100)/(1+(end-start));
123 if (bestHit == null || bestHit.length()< 1+(end-start) || (bestHit.length() == 1+(end-start) && bestHit.percentID()<id)) {
124 // System.out.println("New best hit from "+start+"-"+end);
125 bestHit = new ContaminantHit(this, direction, 1+(end-start), id);
126 }
127 }
128 start = i+1;
129 end = i+1;
130 mismatchCount = 0;
131 }
132 }
133 }
134
135 // See if we ended with a match.
136 if (1+(end-start) > 20) {
137 int id = (((1+(end-start))-mismatchCount)*100)/(1+(end-start));
138 if (bestHit == null || bestHit.length()< 1+(end-start) || (bestHit.length() == 1+(end-start) && bestHit.percentID()<id)) {
139 // System.out.println("New best hit from "+start+"-"+end);
140 bestHit = new ContaminantHit(this, direction, 1+(end-start), id);
141 }
142 }
143
144 return bestHit;
145
146 }
147
148 public String name () {
149 return name;
150 }
151
152
153 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Sequence.Contaminant;
20
21 public class ContaminantHit {
22
23 private Contaminant contaminant;
24 private int direction;
25 private int length;
26 private int percentID;
27
28 public static final int FORWARD = 1;
29 public static final int REVERSE = 2;
30
31 public ContaminantHit (Contaminant contaminant, int direction, int length, int percentID) {
32 if (direction == FORWARD || direction == REVERSE) {
33 this.direction = direction;
34 }
35 else {
36 throw new IllegalArgumentException("Direction of hit must be FORWARD or REVERSE");
37 }
38 this.contaminant = contaminant;
39 this.length = length;
40 this.percentID = percentID;
41 }
42
43
44 public Contaminant contaminant () {
45 return contaminant;
46 }
47
48 public int direction () {
49 return direction;
50 }
51
52 public int length () {
53 return length;
54 }
55
56 public int percentID () {
57 return percentID;
58 }
59
60 public String toString () {
61 return contaminant.name()+" ("+percentID+"% over "+length+"bp)";
62 }
63 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Sequence.Contaminant;
20
21 public class ContaminantHit {
22
23 private Contaminant contaminant;
24 private int direction;
25 private int length;
26 private int percentID;
27
28 public static final int FORWARD = 1;
29 public static final int REVERSE = 2;
30
31 public ContaminantHit (Contaminant contaminant, int direction, int length, int percentID) {
32 if (direction == FORWARD || direction == REVERSE) {
33 this.direction = direction;
34 }
35 else {
36 throw new IllegalArgumentException("Direction of hit must be FORWARD or REVERSE");
37 }
38 this.contaminant = contaminant;
39 this.length = length;
40 this.percentID = percentID;
41 }
42
43
44 public Contaminant contaminant () {
45 return contaminant;
46 }
47
48 public int direction () {
49 return direction;
50 }
51
52 public int length () {
53 return length;
54 }
55
56 public int percentID () {
57 return percentID;
58 }
59
60 public String toString () {
61 return contaminant.name()+" ("+percentID+"% over "+length+"bp)";
62 }
63 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Sequence.Contaminant;
20
21 import java.io.BufferedReader;
22 import java.io.FileNotFoundException;
23 import java.io.FileReader;
24 import java.io.IOException;
25 import java.io.InputStream;
26 import java.io.InputStreamReader;
27 import java.util.Vector;
28
29 import uk.ac.babraham.FastQC.FastQCConfig;
30
31 public class ContaminentFinder {
32
33 private static Contaminant [] contaminants;
34
35 public static ContaminantHit findContaminantHit (String sequence) {
36 if (contaminants == null) {
37 contaminants = makeContaminantList();
38 }
39
40 ContaminantHit bestHit = null;
41
42 for (int c=0;c<contaminants.length;c++) {
43 ContaminantHit thisHit = contaminants[c].findMatch(sequence);
44
45 // System.out.println("Best hit from "+c+" is "+thisHit);
46
47 if (thisHit == null) continue; // No hit
48
49 if (bestHit == null || thisHit.length() > bestHit.length()) {
50 bestHit = thisHit;
51 }
52
53 }
54
55 return bestHit;
56
57 }
58
59
60 private static Contaminant [] makeContaminantList () {
61 Vector<Contaminant>c = new Vector<Contaminant>();
62
63 try {
64
65 BufferedReader br = null;
66 if (FastQCConfig.getInstance().contaminant_file == null) {
67 InputStream rsrc=ContaminentFinder.class.getResourceAsStream("/Configuration/contaminant_list.txt");
68 if (rsrc==null) throw new FileNotFoundException("cannot find Confituration/contaminant_list.txt");
69 br =new BufferedReader(new InputStreamReader(rsrc));
70 }
71 else {
72 br=new BufferedReader(new FileReader(FastQCConfig.getInstance().contaminant_file));
73 }
74
75
76
77 String line;
78 while ((line = br.readLine())!= null){
79
80 if (line.startsWith("#")) continue; // Skip comments
81 if (line.trim().length() == 0) continue; // Skip blank lines
82
83 String [] sections = line.split("\\t+");
84 if (sections.length != 2) {
85 System.err.println("Expected 2 sections for contaminant line but got "+sections.length+" from "+line);
86 continue;
87 }
88 Contaminant con = new Contaminant(sections[0], sections[1]);
89 c.add(con);
90 }
91
92 br.close();
93 }
94 catch (IOException e) {
95 e.printStackTrace();
96 }
97
98 return c.toArray(new Contaminant[0]);
99 }
100 /*
101 public static void main (String [] args) {
102
103 Config cfg=new Config();
104 String query = "agagtgtagatctccgtggtcgccgtatca";
105
106 ContaminantHit c = findContaminantHit(cfg,query);
107
108 System.out.println("Query was "+query.length()+"bp Found hit "+c);
109
110 }*/
111
112 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Sequence.Contaminant;
20
21 import java.io.BufferedReader;
22 import java.io.FileNotFoundException;
23 import java.io.FileReader;
24 import java.io.IOException;
25 import java.io.InputStream;
26 import java.io.InputStreamReader;
27 import java.util.Vector;
28
29 import uk.ac.babraham.FastQC.FastQCConfig;
30
31 public class ContaminentFinder {
32
33 private static Contaminant [] contaminants;
34
35 public static ContaminantHit findContaminantHit (String sequence) {
36 if (contaminants == null) {
37 contaminants = makeContaminantList();
38 }
39
40 ContaminantHit bestHit = null;
41
42 for (int c=0;c<contaminants.length;c++) {
43 ContaminantHit thisHit = contaminants[c].findMatch(sequence);
44
45 // System.out.println("Best hit from "+c+" is "+thisHit);
46
47 if (thisHit == null) continue; // No hit
48
49 if (bestHit == null || thisHit.length() > bestHit.length()) {
50 bestHit = thisHit;
51 }
52
53 }
54
55 return bestHit;
56
57 }
58
59
60 private static Contaminant [] makeContaminantList () {
61 Vector<Contaminant>c = new Vector<Contaminant>();
62
63 try {
64
65 BufferedReader br = null;
66 if (FastQCConfig.getInstance().contaminant_file == null) {
67 InputStream rsrc=ContaminentFinder.class.getResourceAsStream("/Configuration/contaminant_list.txt");
68 if (rsrc==null) throw new FileNotFoundException("cannot find Confituration/contaminant_list.txt");
69 br =new BufferedReader(new InputStreamReader(rsrc));
70 }
71 else {
72 br=new BufferedReader(new FileReader(FastQCConfig.getInstance().contaminant_file));
73 }
74
75
76
77 String line;
78 while ((line = br.readLine())!= null){
79
80 if (line.startsWith("#")) continue; // Skip comments
81 if (line.trim().length() == 0) continue; // Skip blank lines
82
83 String [] sections = line.split("\\t+");
84 if (sections.length != 2) {
85 System.err.println("Expected 2 sections for contaminant line but got "+sections.length+" from "+line);
86 continue;
87 }
88 Contaminant con = new Contaminant(sections[0], sections[1]);
89 c.add(con);
90 }
91
92 br.close();
93 }
94 catch (IOException e) {
95 e.printStackTrace();
96 }
97
98 return c.toArray(new Contaminant[0]);
99 }
100 /*
101 public static void main (String [] args) {
102
103 Config cfg=new Config();
104 String query = "agagtgtagatctccgtggtcgccgtatca";
105
106 ContaminantHit c = findContaminantHit(cfg,query);
107
108 System.out.println("Query was "+query.length()+"bp Found hit "+c);
109
110 }*/
111
112 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Sequence;
20
21 import java.io.File;
22 import java.io.IOException;
23
24 import ch.systemsx.cisd.hdf5.HDF5Factory;
25 import ch.systemsx.cisd.hdf5.IHDF5SimpleReader;
26
27 public class Fast5File implements SequenceFile {
28
29 private Sequence nextSequence = null;
30 private File file;
31
32 private String name;
33
34 protected Fast5File(File file) throws SequenceFormatException, IOException {
35 this.file = file;
36 name = file.getName();
37
38 IHDF5SimpleReader reader = HDF5Factory.openForReading(file);
39
40 if (reader.exists("Analyses/Basecall_2D_000/BaseCalled_template/Fastq")) {
41
42 String fastq = reader.readString("Analyses/Basecall_2D_000/BaseCalled_template/Fastq");
43
44 String [] sections = fastq.split("\\n");
45
46 if (sections.length != 4) {
47 throw new SequenceFormatException("Didn't get 4 sections from "+fastq);
48 }
49
50 nextSequence = new Sequence(this, sections[1].toUpperCase(),sections[3], sections[0]);
51 }
52 reader.close();
53
54 }
55
56 public String name() {
57 return name;
58 }
59
60 public int getPercentComplete() {
61 if (! hasNext()) return 100;
62
63 return 0;
64 }
65
66 public boolean isColorspace() {
67 return false;
68 }
69
70 public boolean hasNext() {
71 return nextSequence != null;
72 }
73
74 public Sequence next() throws SequenceFormatException {
75 Sequence seq = nextSequence;
76 nextSequence = null;
77 return seq;
78 }
79
80 public void remove() {
81 // No action here
82 }
83
84 public File getFile() {
85 return file;
86 }
87
88 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Sequence;
20
21 import java.io.File;
22 import java.io.IOException;
23
24 import ch.systemsx.cisd.hdf5.HDF5Factory;
25 import ch.systemsx.cisd.hdf5.IHDF5SimpleReader;
26
27 public class Fast5File implements SequenceFile {
28
29 private Sequence nextSequence = null;
30 private File file;
31
32 private String name;
33
34 protected Fast5File(File file) throws SequenceFormatException, IOException {
35 this.file = file;
36 name = file.getName();
37
38 IHDF5SimpleReader reader = HDF5Factory.openForReading(file);
39
40 String [] rdfPaths = new String [] {
41 "Analyses/Basecall_2D_000/BaseCalled_template/Fastq",
42 "Analyses/Basecall_2D_000/BaseCalled_2D/Fastq",
43 "Analyses/Basecall_1D_000/BaseCalled_template/Fastq",
44 "Analyses/Basecall_1D_000/BaseCalled_1D/Fastq"
45 };
46
47 boolean foundPath = false;
48 for (int r=0;r<rdfPaths.length;r++) {
49
50 if (reader.exists(rdfPaths[r])) {
51
52 foundPath = true;
53 String fastq = reader.readString(rdfPaths[r]);
54
55 String [] sections = fastq.split("\\n");
56
57 if (sections.length != 4) {
58 throw new SequenceFormatException("Didn't get 4 sections from "+fastq);
59 }
60
61 nextSequence = new Sequence(this, sections[1].toUpperCase(),sections[3], sections[0]);
62 break;
63 }
64 }
65
66 reader.close();
67
68 if (!foundPath) {
69 throw new SequenceFormatException("No valid fastq paths found in "+file);
70 }
71
72 }
73
74 public String name() {
75 return name;
76 }
77
78 public int getPercentComplete() {
79 if (! hasNext()) return 100;
80
81 return 0;
82 }
83
84 public boolean isColorspace() {
85 return false;
86 }
87
88 public boolean hasNext() {
89 return nextSequence != null;
90 }
91
92 public Sequence next() throws SequenceFormatException {
93 Sequence seq = nextSequence;
94 nextSequence = null;
95 return seq;
96 }
97
98 public void remove() {
99 // No action here
100 }
101
102 public File getFile() {
103 return file;
104 }
105
106 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Sequence;
20
21 import java.io.BufferedReader;
22 import java.io.File;
23 import java.io.FileInputStream;
24 import java.io.IOException;
25 import java.io.InputStreamReader;
26 import java.util.regex.Matcher;
27 import java.util.regex.Pattern;
28
29 import org.itadaki.bzip2.BZip2InputStream;
30
31 import uk.ac.babraham.FastQC.FastQCConfig;
32 import uk.ac.babraham.FastQC.Utilities.MultiMemberGZIPInputStream;
33
34 public class FastQFile implements SequenceFile {
35
36 private Sequence nextSequence = null;
37 private File file;
38 private long fileSize = 0;
39
40 private boolean casavaMode = false;
41 private boolean nofilter = false;
42
43 // We actually read our final data from this buffered reader
44 private BufferedReader br;
45
46 // We keep the file stream around just so we can see how far through
47 // the file we've got. We don't read from this directly, but it's the
48 // only way to access the file pointer.
49 private FileInputStream fis;
50
51 private String name;
52 private boolean isColorspace = false;
53
54 protected FastQFile(FastQCConfig config,File file) throws SequenceFormatException, IOException {
55 this.file = file;
56 if (file.getName().equals("stdin")) {
57 fileSize = Long.MAX_VALUE;
58 }
59 else {
60 fileSize = file.length();
61 }
62 name = file.getName();
63
64 if (config.casava) {
65 casavaMode = true;
66 if (config.nofilter) {
67 nofilter = true;
68 }
69 }
70
71 if (!file.getName().equals("stdin")) {
72 fis = new FileInputStream(file);
73 }
74
75 if (file.getName().equals("stdin")) {
76 br = new BufferedReader(new InputStreamReader(System.in));
77 }
78 else if (file.getName().toLowerCase().endsWith(".gz")) {
79 br = new BufferedReader(new InputStreamReader(new MultiMemberGZIPInputStream(fis)));
80 }
81 else if (file.getName().toLowerCase().endsWith(".bz2")) {
82 br = new BufferedReader(new InputStreamReader(new BZip2InputStream(fis,false)));
83 }
84
85 else {
86 br = new BufferedReader(new InputStreamReader(fis));
87 }
88 readNext();
89 }
90
91 public String name() {
92 return name;
93 }
94
95 public int getPercentComplete() {
96 if (! hasNext()) return 100;
97 if (file.getName().equals("stdin")) {
98 return 0;
99 }
100 try {
101 int percent = (int) (((double)fis.getChannel().position()/ fileSize)*100);
102 return percent;
103 }
104 catch (IOException e) {
105 e.printStackTrace();
106 }
107 return 0;
108 }
109
110 public boolean isColorspace() {
111 return isColorspace;
112 }
113
114 public void setIsColorspace(boolean isColorspace) {
115 this.isColorspace = isColorspace;
116 }
117
118 public boolean hasNext() {
119 return nextSequence != null;
120 }
121
122 public Sequence next() throws SequenceFormatException {
123 Sequence seq = nextSequence;
124 readNext();
125 return seq;
126 }
127
128 private void readNext() throws SequenceFormatException {
129 try {
130 // First line should be the id
131
132 // We might have blank lines between entries or at the end
133 // so allow for this
134 String id;
135
136 while (true) {
137 id = br.readLine();
138
139 if (id == null) {
140 nextSequence = null;
141 br.close();
142 if (fis != null) {
143 fis.close();
144 }
145 return;
146 }
147 if (id.length()==0) {
148 continue;
149 }
150
151 break;
152 }
153
154
155 if (!id.startsWith("@")) {
156 nextSequence = null;
157 throw new SequenceFormatException("ID line didn't start with '@'");
158 }
159
160 String seq;
161 String midLine;
162 String quality;
163 try {
164 // Then the sequence
165 seq = br.readLine();
166 if (seq == null) throw new IOException("No more data");
167 // Then another id which we don't need
168 midLine = br.readLine();
169 if (midLine == null) throw new IOException("No more data");
170 if (!midLine.startsWith("+")) {
171 throw new SequenceFormatException("Midline '"+midLine+"' didn't start with '+'");
172 }
173 // Then the quality string
174 quality = br.readLine();
175 if (quality == null) throw new IOException("No more data");
176 }
177 catch (IOException ioe) {
178 throw new SequenceFormatException("Ran out of data in the middle of a fastq entry. Your file is probably truncated");
179 }
180
181
182 // We only check for colourspace on the first entry. After that we assume
183 // the rest of the file is the same. For the first entry the nextSequence
184 // will be null, but we'll have real data in seq
185 if (nextSequence == null && seq != null) {
186 checkColorspace(seq);
187 }
188
189 if (isColorspace()) {
190 nextSequence = new Sequence(this,convertColorspaceToBases(seq.toUpperCase()), seq.toUpperCase(), quality, id);
191 }
192 else {
193 nextSequence = new Sequence(this, seq.toUpperCase(),quality, id);
194 }
195
196 // If we're running in --casava mode then we will flag any sequences which
197 // are marked as being filtered.
198 if (casavaMode && !nofilter) {
199
200 // This is the test illumina suggest, but it's a bit flakey, and I'm not
201 // sure it's not going to catch things it shouldn't.
202 if (id.indexOf(":Y:") > 0) {
203 nextSequence.setIsFiltered(true);
204 }
205 }
206
207
208
209
210 }
211 catch (IOException ioe) {
212 nextSequence = null;
213 ioe.printStackTrace();
214 }
215 }
216
217 private void checkColorspace(String seq) {
218 // Some basecalled files can be all dots, which leads to them
219 // being identified as colorspace data. This check should find
220 // only true colorspace files.
221 String regex = "^[GATCNgatcn][\\.0123456]+$";
222 Pattern pattern = Pattern.compile(regex);
223 Matcher matcher = pattern.matcher(seq);
224 if (matcher.find()) {
225 isColorspace = true;
226 } else {
227 isColorspace = false;
228 }
229 }
230
231 private String convertColorspaceToBases(String s) {
232
233 char[] cs = s.toUpperCase().toCharArray();
234
235 // We've had a crash report where a file contained a zero length
236 // colorspace entry. This is completely invalid, but we should
237 // handle it anyway.
238 if (cs.length == 0) {
239 return "";
240 }
241
242 char[] bp = new char[cs.length - 1];
243
244 char refBase;
245
246 for (int i = 1; i < cs.length; i++) {
247 if (i == 1) {
248 refBase = cs[i - 1];
249 } else {
250 refBase = bp[i - 2];
251 }
252 if (!(refBase == 'G' || refBase == 'A' || refBase == 'T' || refBase == 'C')) {
253 throw new IllegalArgumentException("Colourspace sequence data should always start with a real DNA letter. Line '"+s+"' started with " + refBase
254 + " at position " + i);
255 }
256 switch (cs[i]) {
257 case ('0'):
258 switch (refBase) {
259 case ('G'):
260 bp[i - 1] = 'G';
261 break;
262 case ('A'):
263 bp[i - 1] = 'A';
264 break;
265 case ('T'):
266 bp[i - 1] = 'T';
267 break;
268 case ('C'):
269 bp[i - 1] = 'C';
270 break;
271 }
272 break;
273 case ('1'):
274 switch (refBase) {
275 case ('G'):
276 bp[i - 1] = 'T';
277 break;
278 case ('A'):
279 bp[i - 1] = 'C';
280 break;
281 case ('T'):
282 bp[i - 1] = 'G';
283 break;
284 case ('C'):
285 bp[i - 1] = 'A';
286 break;
287 }
288 break;
289
290 case ('2'):
291 switch (refBase) {
292 case ('G'):
293 bp[i - 1] = 'A';
294 break;
295 case ('A'):
296 bp[i - 1] = 'G';
297 break;
298 case ('T'):
299 bp[i - 1] = 'C';
300 break;
301 case ('C'):
302 bp[i - 1] = 'T';
303 break;
304 }
305 break;
306
307 case ('3'):
308 switch (refBase) {
309 case ('G'):
310 bp[i - 1] = 'C';
311 break;
312 case ('A'):
313 bp[i - 1] = 'T';
314 break;
315 case ('T'):
316 bp[i - 1] = 'A';
317 break;
318 case ('C'):
319 bp[i - 1] = 'G';
320 break;
321 }
322 break;
323
324 case ('.'):
325 case ('4'):
326 case ('5'):
327 case ('6'):
328 for (; i < cs.length; i++) {
329 bp[i - 1] = 'N';
330 }
331 break;
332 default:
333 throw new IllegalArgumentException("Unexpected cs char "
334 + cs[i]);
335 }
336 }
337
338 return new String(bp);
339 }
340
341 public void remove() {
342 // No action here
343 }
344
345 public File getFile() {
346 return file;
347 }
348
349 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Sequence;
20
21 import java.io.BufferedReader;
22 import java.io.File;
23 import java.io.FileInputStream;
24 import java.io.IOException;
25 import java.io.InputStreamReader;
26 import java.util.regex.Matcher;
27 import java.util.regex.Pattern;
28
29 import org.itadaki.bzip2.BZip2InputStream;
30
31 import uk.ac.babraham.FastQC.FastQCConfig;
32 import uk.ac.babraham.FastQC.Utilities.MultiMemberGZIPInputStream;
33
34 public class FastQFile implements SequenceFile {
35
36 private Sequence nextSequence = null;
37 private File file;
38 private long fileSize = 0;
39
40 private boolean casavaMode = false;
41 private boolean nofilter = false;
42
43 // We actually read our final data from this buffered reader
44 private BufferedReader br;
45
46 // We keep the file stream around just so we can see how far through
47 // the file we've got. We don't read from this directly, but it's the
48 // only way to access the file pointer.
49 private FileInputStream fis;
50
51 private String name;
52 private boolean isColorspace = false;
53
54 protected FastQFile(FastQCConfig config,File file) throws SequenceFormatException, IOException {
55 this.file = file;
56 if (file.getName().startsWith("stdin")) {
57 fileSize = Long.MAX_VALUE;
58 }
59 else {
60 fileSize = file.length();
61 }
62 name = file.getName();
63
64 if (config.casava) {
65 casavaMode = true;
66 if (config.nofilter) {
67 nofilter = true;
68 }
69 }
70
71 if (!file.getName().startsWith("stdin")) {
72 fis = new FileInputStream(file);
73 }
74
75 if (file.getName().startsWith("stdin")) {
76 br = new BufferedReader(new InputStreamReader(System.in));
77 }
78 else if (file.getName().toLowerCase().endsWith(".gz")) {
79 br = new BufferedReader(new InputStreamReader(new MultiMemberGZIPInputStream(fis)));
80 }
81 else if (file.getName().toLowerCase().endsWith(".bz2")) {
82 br = new BufferedReader(new InputStreamReader(new BZip2InputStream(fis,false)));
83 }
84
85 else {
86 br = new BufferedReader(new InputStreamReader(fis));
87 }
88 readNext();
89 }
90
91 public String name() {
92 return name;
93 }
94
95 public int getPercentComplete() {
96 if (! hasNext()) return 100;
97 if (file.getName().startsWith("stdin")) {
98 return 0;
99 }
100 try {
101 int percent = (int) (((double)fis.getChannel().position()/ fileSize)*100);
102 return percent;
103 }
104 catch (IOException e) {
105 e.printStackTrace();
106 }
107 return 0;
108 }
109
110 public boolean isColorspace() {
111 return isColorspace;
112 }
113
114 public void setIsColorspace(boolean isColorspace) {
115 this.isColorspace = isColorspace;
116 }
117
118 public boolean hasNext() {
119 return nextSequence != null;
120 }
121
122 public Sequence next() throws SequenceFormatException {
123 Sequence seq = nextSequence;
124 readNext();
125 return seq;
126 }
127
128 private void readNext() throws SequenceFormatException {
129 try {
130 // First line should be the id
131
132 // We might have blank lines between entries or at the end
133 // so allow for this
134 String id;
135
136 while (true) {
137 id = br.readLine();
138
139 if (id == null) {
140 nextSequence = null;
141 br.close();
142 if (fis != null) {
143 fis.close();
144 }
145 return;
146 }
147 if (id.length()==0) {
148 continue;
149 }
150
151 break;
152 }
153
154
155 if (!id.startsWith("@")) {
156 nextSequence = null;
157 throw new SequenceFormatException("ID line didn't start with '@'");
158 }
159
160 String seq;
161 String midLine;
162 String quality;
163 try {
164 // Then the sequence
165 seq = br.readLine();
166 if (seq == null) throw new IOException("No more data");
167 // Then another id which we don't need
168 midLine = br.readLine();
169 if (midLine == null) throw new IOException("No more data");
170 if (!midLine.startsWith("+")) {
171 throw new SequenceFormatException("Midline '"+midLine+"' didn't start with '+'");
172 }
173 // Then the quality string
174 quality = br.readLine();
175 if (quality == null) throw new IOException("No more data");
176 }
177 catch (IOException ioe) {
178 throw new SequenceFormatException("Ran out of data in the middle of a fastq entry. Your file is probably truncated");
179 }
180
181
182 // We only check for colourspace on the first entry. After that we assume
183 // the rest of the file is the same. For the first entry the nextSequence
184 // will be null, but we'll have real data in seq
185 if (nextSequence == null && seq != null) {
186 checkColorspace(seq);
187 }
188
189 if (isColorspace()) {
190 nextSequence = new Sequence(this,convertColorspaceToBases(seq.toUpperCase()), seq.toUpperCase(), quality, id);
191 }
192 else {
193 nextSequence = new Sequence(this, seq.toUpperCase(),quality, id);
194 }
195
196 // If we're running in --casava mode then we will flag any sequences which
197 // are marked as being filtered.
198 if (casavaMode && !nofilter) {
199
200 // This is the test illumina suggest, but it's a bit flakey, and I'm not
201 // sure it's not going to catch things it shouldn't.
202 if (id.indexOf(":Y:") > 0) {
203 nextSequence.setIsFiltered(true);
204 }
205 }
206
207
208
209
210 }
211 catch (IOException ioe) {
212 nextSequence = null;
213 ioe.printStackTrace();
214 }
215 }
216
217 private void checkColorspace(String seq) {
218 // Some basecalled files can be all dots, which leads to them
219 // being identified as colorspace data. This check should find
220 // only true colorspace files.
221 String regex = "^[GATCNgatcn][\\.0123456]+$";
222 Pattern pattern = Pattern.compile(regex);
223 Matcher matcher = pattern.matcher(seq);
224 if (matcher.find()) {
225 isColorspace = true;
226 } else {
227 isColorspace = false;
228 }
229 }
230
231 private String convertColorspaceToBases(String s) {
232
233 char[] cs = s.toUpperCase().toCharArray();
234
235 // We've had a crash report where a file contained a zero length
236 // colorspace entry. This is completely invalid, but we should
237 // handle it anyway.
238 if (cs.length == 0) {
239 return "";
240 }
241
242 char[] bp = new char[cs.length - 1];
243
244 char refBase;
245
246 for (int i = 1; i < cs.length; i++) {
247 if (i == 1) {
248 refBase = cs[i - 1];
249 } else {
250 refBase = bp[i - 2];
251 }
252 if (!(refBase == 'G' || refBase == 'A' || refBase == 'T' || refBase == 'C')) {
253 throw new IllegalArgumentException("Colourspace sequence data should always start with a real DNA letter. Line '"+s+"' started with " + refBase
254 + " at position " + i);
255 }
256 switch (cs[i]) {
257 case ('0'):
258 switch (refBase) {
259 case ('G'):
260 bp[i - 1] = 'G';
261 break;
262 case ('A'):
263 bp[i - 1] = 'A';
264 break;
265 case ('T'):
266 bp[i - 1] = 'T';
267 break;
268 case ('C'):
269 bp[i - 1] = 'C';
270 break;
271 }
272 break;
273 case ('1'):
274 switch (refBase) {
275 case ('G'):
276 bp[i - 1] = 'T';
277 break;
278 case ('A'):
279 bp[i - 1] = 'C';
280 break;
281 case ('T'):
282 bp[i - 1] = 'G';
283 break;
284 case ('C'):
285 bp[i - 1] = 'A';
286 break;
287 }
288 break;
289
290 case ('2'):
291 switch (refBase) {
292 case ('G'):
293 bp[i - 1] = 'A';
294 break;
295 case ('A'):
296 bp[i - 1] = 'G';
297 break;
298 case ('T'):
299 bp[i - 1] = 'C';
300 break;
301 case ('C'):
302 bp[i - 1] = 'T';
303 break;
304 }
305 break;
306
307 case ('3'):
308 switch (refBase) {
309 case ('G'):
310 bp[i - 1] = 'C';
311 break;
312 case ('A'):
313 bp[i - 1] = 'T';
314 break;
315 case ('T'):
316 bp[i - 1] = 'A';
317 break;
318 case ('C'):
319 bp[i - 1] = 'G';
320 break;
321 }
322 break;
323
324 case ('.'):
325 case ('4'):
326 case ('5'):
327 case ('6'):
328 for (; i < cs.length; i++) {
329 bp[i - 1] = 'N';
330 }
331 break;
332 default:
333 throw new IllegalArgumentException("Unexpected cs char "
334 + cs[i]);
335 }
336 }
337
338 return new String(bp);
339 }
340
341 public void remove() {
342 // No action here
343 }
344
345 public File getFile() {
346 return file;
347 }
348
349 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Sequence.QualityEncoding;
20
21 public class PhredEncoding {
22
23 private String name;
24 private int offset;
25
26 private static final int SANGER_ENCODING_OFFSET = 33;
27 private static final int ILLUMINA_1_3_ENCODING_OFFSET = 64;
28
29 public static PhredEncoding getFastQEncodingOffset (char lowestChar) {
30 if (lowestChar < 33) {
31 throw new IllegalArgumentException("No known encodings with chars < 33 (Yours was "+lowestChar+")");
32 }
33 else if (lowestChar < 64) {
34 return new PhredEncoding("Sanger / Illumina 1.9", SANGER_ENCODING_OFFSET);
35 }
36
37 // There are potentially two encodings using an offset of 64. Illumina
38 // v1.3 allowed quality values of 1, whereas from v1.5 onwards the lowest
39 // value allowed was 2. If we guess wrong between these two then it's not
40 // the end of the world since they use the same offset.
41 else if (lowestChar == ILLUMINA_1_3_ENCODING_OFFSET+1) {
42 return new PhredEncoding("Illumina 1.3", ILLUMINA_1_3_ENCODING_OFFSET);
43 }
44 else if (lowestChar <= 126) {
45 return new PhredEncoding("Illumina 1.5", ILLUMINA_1_3_ENCODING_OFFSET);
46 }
47 throw new IllegalArgumentException("No known encodings with chars > 126 (Yours was "+lowestChar+")");
48 }
49
50 public static double convertSangerPhredToProbability (int phred) {
51 return Math.pow(10,phred/-10d);
52 }
53
54 public static double convertOldIlluminaPhredToProbability (int phred) {
55 return Math.pow(10, ((double)phred/(phred+1))/-10d);
56 }
57
58 public static int convertProbabilityToSangerPhred (double p) {
59 return (int)Math.round(-10d*Math.log10(p));
60 }
61
62 public static int convertProbabilityToOldIlluminaPhred (double p) {
63 return (int)Math.round(-10d*Math.log10(p/1-p));
64 }
65
66 private PhredEncoding (String name, int offset) {
67 this.name = name;
68 this.offset = offset;
69 }
70
71 public String name () {
72 return name;
73 }
74
75 public String toString () {
76 return name();
77 }
78
79 public int offset () {
80 return offset;
81 }
82
83
84 public static void main (String [] args) {
85 double p = 0.4;
86
87 System.out.println("Sanger phred for p="+p+" is "+convertProbabilityToSangerPhred(p));
88
89 int phred=4;
90 System.out.println("P value for Sanger phred="+phred+" is "+convertSangerPhredToProbability(phred));
91
92 }
93 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Sequence.QualityEncoding;
20
21 public class PhredEncoding {
22
23 private String name;
24 private int offset;
25
26 private static final int SANGER_ENCODING_OFFSET = 33;
27 private static final int ILLUMINA_1_3_ENCODING_OFFSET = 64;
28
29 public static PhredEncoding getFastQEncodingOffset (char lowestChar) {
30 if (lowestChar < 33) {
31 throw new IllegalArgumentException("No known encodings with chars < 33 (Yours was "+lowestChar+")");
32 }
33 else if (lowestChar < 64) {
34 return new PhredEncoding("Sanger / Illumina 1.9", SANGER_ENCODING_OFFSET);
35 }
36
37 // There are potentially two encodings using an offset of 64. Illumina
38 // v1.3 allowed quality values of 1, whereas from v1.5 onwards the lowest
39 // value allowed was 2. If we guess wrong between these two then it's not
40 // the end of the world since they use the same offset.
41 else if (lowestChar == ILLUMINA_1_3_ENCODING_OFFSET+1) {
42 return new PhredEncoding("Illumina 1.3", ILLUMINA_1_3_ENCODING_OFFSET);
43 }
44 else if (lowestChar <= 126) {
45 return new PhredEncoding("Illumina 1.5", ILLUMINA_1_3_ENCODING_OFFSET);
46 }
47 throw new IllegalArgumentException("No known encodings with chars > 126 (Yours was "+lowestChar+")");
48 }
49
50 public static double convertSangerPhredToProbability (int phred) {
51 return Math.pow(10,phred/-10d);
52 }
53
54 public static double convertOldIlluminaPhredToProbability (int phred) {
55 return Math.pow(10, ((double)phred/(phred+1))/-10d);
56 }
57
58 public static int convertProbabilityToSangerPhred (double p) {
59 return (int)Math.round(-10d*Math.log10(p));
60 }
61
62 public static int convertProbabilityToOldIlluminaPhred (double p) {
63 return (int)Math.round(-10d*Math.log10(p/1-p));
64 }
65
66 private PhredEncoding (String name, int offset) {
67 this.name = name;
68 this.offset = offset;
69 }
70
71 public String name () {
72 return name;
73 }
74
75 public String toString () {
76 return name();
77 }
78
79 public int offset () {
80 return offset;
81 }
82
83
84 public static void main (String [] args) {
85 double p = 0.4;
86
87 System.out.println("Sanger phred for p="+p+" is "+convertProbabilityToSangerPhred(p));
88
89 int phred=4;
90 System.out.println("P value for Sanger phred="+phred+" is "+convertSangerPhredToProbability(phred));
91
92 }
93 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Sequence;
20
21 public class Sequence {
22
23 private String sequence;
24 private String quality;
25 private String id;
26 private SequenceFile file;
27 private String colorspace;
28 private boolean isFiltered;
29
30 public Sequence (SequenceFile file,String sequence, String quality, String id) {
31 this.id = id;
32 this.file = file;
33 this.sequence = sequence.toUpperCase();
34 this.quality = quality;
35 this.colorspace = null;
36 this.isFiltered = false;
37 }
38
39 public Sequence (SequenceFile file,String sequence, String colorspace, String quality, String id) {
40 this.id = id;
41 this.file = file;
42 this.sequence = sequence;
43 this.quality = quality;
44 this.colorspace = colorspace;
45 }
46
47 public void setIsFiltered (boolean isFiltered) {
48 this.isFiltered = isFiltered;
49 }
50
51 public boolean isFiltered () {
52 return isFiltered;
53 }
54
55 public SequenceFile file () {
56 return file;
57 }
58
59 public String getSequence () {
60 return sequence;
61 }
62
63 public String getColorspace () {
64 return colorspace;
65 }
66
67 public String getQualityString () {
68 return quality;
69 }
70
71 public String getID () {
72 return id;
73 }
74
75 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Sequence;
20
21 public class Sequence {
22
23 private String sequence;
24 private String quality;
25 private String id;
26 private SequenceFile file;
27 private String colorspace;
28 private boolean isFiltered;
29
30 public Sequence (SequenceFile file,String sequence, String quality, String id) {
31 this.id = id;
32 this.file = file;
33 this.sequence = sequence.toUpperCase();
34 this.quality = quality;
35 this.colorspace = null;
36 this.isFiltered = false;
37 }
38
39 public Sequence (SequenceFile file,String sequence, String colorspace, String quality, String id) {
40 this.id = id;
41 this.file = file;
42 this.sequence = sequence;
43 this.quality = quality;
44 this.colorspace = colorspace;
45 }
46
47 public void setIsFiltered (boolean isFiltered) {
48 this.isFiltered = isFiltered;
49 }
50
51 public boolean isFiltered () {
52 return isFiltered;
53 }
54
55 public SequenceFile file () {
56 return file;
57 }
58
59 public String getSequence () {
60 return sequence;
61 }
62
63 public String getColorspace () {
64 return colorspace;
65 }
66
67 public String getQualityString () {
68 return quality;
69 }
70
71 public String getID () {
72 return id;
73 }
74
75 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Sequence;
20
21 import java.io.File;
22 import java.io.IOException;
23
24 import uk.ac.babraham.FastQC.FastQCConfig;
25 import uk.ac.babraham.FastQC.Utilities.CasavaBasename;
26 import uk.ac.babraham.FastQC.Utilities.NameFormatException;
27
28 public class SequenceFactory {
29 /**
30 *
31 * This option is used when multiple files are to be treated as a group to produce
32 * a single output. This is currently used for groups of files generated by casava
33 *
34 * @param files
35 * @return
36 * @throws SequenceFormatException
37 * @throws IOException
38 */
39 public static SequenceFile getSequenceFile (File [] files) throws SequenceFormatException, IOException {
40
41 /*
42 * We used to build a set of SequenceFile objects to make a sequence group, but we found that
43 * doing this caused too many files to be open simultaneously and caused crashes on large
44 * runs. We now just pass the files to the group and let it open them as and when they're needed.
45 */
46
47 if (files.length == 1) {
48
49 if (FastQCConfig.getInstance().casava) {
50 try {
51 // We do this simply to find out if the casava basename is valid. If it is then the
52 // Sequencefilegroup is created at the end of this sub. If it's not then we do a
53 // simple creation without modifying the file name.
54 CasavaBasename.getCasavaBasename(files[0].getName());
55 }
56 catch (NameFormatException nfe) {
57 return getSequenceFile(files[0]);
58 }
59 }
60 else {
61 return getSequenceFile(files[0]);
62 }
63 }
64
65
66 return new SequenceFileGroup(files);
67
68 }
69
70 public static SequenceFile getSequenceFile(File file) throws SequenceFormatException, IOException {
71
72 FastQCConfig config = FastQCConfig.getInstance();
73
74 // System.err.println("Format is "+config.sequence_format);
75
76 if (config.sequence_format != null) {
77 // We're not autodetecting the format, but taking whatever they said
78
79 if (config.sequence_format.equals("bam") || config.sequence_format.equals("sam")) {
80 return new BAMFile(file,false);
81 }
82 else if (config.sequence_format.equals("bam_mapped") || config.sequence_format.equals("sam_mapped")) {
83 return new BAMFile(file,true);
84 }
85 else if (config.sequence_format.equals("fastq")) {
86 return new FastQFile(config,file);
87 }
88 else {
89 throw new SequenceFormatException("Didn't understand format name '"+config.sequence_format+"'");
90 }
91
92 }
93
94
95 // Otherwise we just use the extension on the end of the file name to try to determine
96 // the type
97 if (file.getName().toLowerCase().endsWith(".bam") || file.getName().toLowerCase().endsWith(".sam")) {
98 // We default to using all reads
99 return new BAMFile(file,false);
100 }
101 else if (file.getName().toLowerCase().endsWith(".fast5")) {
102 return new Fast5File(file);
103 }
104 else {
105 return new FastQFile(config,file);
106 }
107
108
109 }
110
111
112 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Sequence;
20
21 import java.io.File;
22 import java.io.IOException;
23
24 import uk.ac.babraham.FastQC.FastQCConfig;
25 import uk.ac.babraham.FastQC.Utilities.CasavaBasename;
26 import uk.ac.babraham.FastQC.Utilities.NameFormatException;
27
28 public class SequenceFactory {
29 /**
30 *
31 * This option is used when multiple files are to be treated as a group to produce
32 * a single output. This is currently used for groups of files generated by casava
33 *
34 * @param files
35 * @return
36 * @throws SequenceFormatException
37 * @throws IOException
38 */
39 public static SequenceFile getSequenceFile (File [] files) throws SequenceFormatException, IOException {
40
41 /*
42 * We used to build a set of SequenceFile objects to make a sequence group, but we found that
43 * doing this caused too many files to be open simultaneously and caused crashes on large
44 * runs. We now just pass the files to the group and let it open them as and when they're needed.
45 */
46
47 if (files.length == 1) {
48
49 if (FastQCConfig.getInstance().casava) {
50 try {
51 // We do this simply to find out if the casava basename is valid. If it is then the
52 // Sequencefilegroup is created at the end of this sub. If it's not then we do a
53 // simple creation without modifying the file name.
54 CasavaBasename.getCasavaBasename(files[0].getName());
55 }
56 catch (NameFormatException nfe) {
57 return getSequenceFile(files[0]);
58 }
59 }
60 else {
61 return getSequenceFile(files[0]);
62 }
63 }
64
65
66 return new SequenceFileGroup(files);
67
68 }
69
70 public static SequenceFile getSequenceFile(File file) throws SequenceFormatException, IOException {
71
72 FastQCConfig config = FastQCConfig.getInstance();
73
74 // System.err.println("Format is "+config.sequence_format);
75
76 if (config.sequence_format != null) {
77 // We're not autodetecting the format, but taking whatever they said
78
79 if (config.sequence_format.equals("bam") || config.sequence_format.equals("sam")) {
80 return new BAMFile(file,false);
81 }
82 else if (config.sequence_format.equals("bam_mapped") || config.sequence_format.equals("sam_mapped")) {
83 return new BAMFile(file,true);
84 }
85 else if (config.sequence_format.equals("fastq")) {
86 return new FastQFile(config,file);
87 }
88 else {
89 throw new SequenceFormatException("Didn't understand format name '"+config.sequence_format+"'");
90 }
91
92 }
93
94
95 // Otherwise we just use the extension on the end of the file name to try to determine
96 // the type
97 if (file.getName().toLowerCase().endsWith(".bam") || file.getName().toLowerCase().endsWith(".sam")) {
98 // We default to using all reads
99 return new BAMFile(file,false);
100 }
101 else if (file.getName().toLowerCase().endsWith(".fast5")) {
102 return new Fast5File(file);
103 }
104 else {
105 return new FastQFile(config,file);
106 }
107
108
109 }
110
111
112 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Sequence;
20
21 import java.io.File;
22
23 public interface SequenceFile {
24
25 public boolean hasNext();
26 public Sequence next() throws SequenceFormatException;
27 public boolean isColorspace();
28 public String name();
29 public int getPercentComplete();
30 public File getFile();
31
32 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Sequence;
20
21 import java.io.File;
22
23 public interface SequenceFile {
24
25 public boolean hasNext();
26 public Sequence next() throws SequenceFormatException;
27 public boolean isColorspace();
28 public String name();
29 public int getPercentComplete();
30 public File getFile();
31
32 }
0 /**
1 * Copyright Copyright 2013-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Sequence;
20
21 import java.io.File;
22 import java.io.IOException;
23
24 import uk.ac.babraham.FastQC.Utilities.CasavaBasename;
25 import uk.ac.babraham.FastQC.Utilities.NameFormatException;
26
27 public class SequenceFileGroup implements SequenceFile {
28
29 private File [] files;
30 private SequenceFile sequenceFile;
31 private File groupFile;
32 private int currentIndex = 0;
33
34 public SequenceFileGroup( File [] files) throws IOException, SequenceFormatException {
35 this.files = files;
36 sequenceFile = SequenceFactory.getSequenceFile(files[0]);
37
38 try {
39 String baseName = CasavaBasename.getCasavaBasename(sequenceFile.name());
40 if (sequenceFile.getFile().getParent() == null) {
41 groupFile = new File(baseName);
42 } else {
43 groupFile = new File(sequenceFile.getFile().getParent() + "/"
44 + baseName);
45 }
46 } catch (NameFormatException nfe) {
47 groupFile = sequenceFile.getFile();
48 }
49 }
50
51 public File getFile() {
52 return groupFile;
53 }
54
55 public int getPercentComplete() {
56 return ((100 * currentIndex) / files.length)
57 + (sequenceFile.getPercentComplete() / files.length);
58 }
59
60 public boolean hasNext() {
61 if (sequenceFile.hasNext()) {
62 return true;
63 }
64 else {
65 while (currentIndex < files.length - 1) {
66 ++currentIndex;
67 try {
68 sequenceFile = SequenceFactory.getSequenceFile(files[currentIndex]);
69 }
70 catch (Exception e) {
71 e.printStackTrace();
72 return false;
73 }
74 if (sequenceFile.hasNext()) break;
75 }
76 return sequenceFile.hasNext();
77 }
78 }
79
80 public boolean isColorspace() {
81 return sequenceFile.isColorspace();
82 }
83
84 public String name() {
85 return groupFile.getName();
86 }
87
88 public Sequence next() throws SequenceFormatException {
89 return sequenceFile.next();
90 }
91
92 }
0 /**
1 * Copyright Copyright 2013-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Sequence;
20
21 import java.io.File;
22 import java.io.IOException;
23
24 import uk.ac.babraham.FastQC.Utilities.CasavaBasename;
25 import uk.ac.babraham.FastQC.Utilities.NameFormatException;
26
27 public class SequenceFileGroup implements SequenceFile {
28
29 private File [] files;
30 private SequenceFile sequenceFile;
31 private File groupFile;
32 private int currentIndex = 0;
33
34 public SequenceFileGroup( File [] files) throws IOException, SequenceFormatException {
35 this.files = files;
36 sequenceFile = SequenceFactory.getSequenceFile(files[0]);
37
38 try {
39 String baseName = CasavaBasename.getCasavaBasename(sequenceFile.name());
40 if (sequenceFile.getFile().getParent() == null) {
41 groupFile = new File(baseName);
42 } else {
43 groupFile = new File(sequenceFile.getFile().getParent() + "/"
44 + baseName);
45 }
46 } catch (NameFormatException nfe) {
47 groupFile = sequenceFile.getFile();
48 }
49 }
50
51 public File getFile() {
52 return groupFile;
53 }
54
55 public int getPercentComplete() {
56 return ((100 * currentIndex) / files.length)
57 + (sequenceFile.getPercentComplete() / files.length);
58 }
59
60 public boolean hasNext() {
61 if (sequenceFile.hasNext()) {
62 return true;
63 }
64 else {
65 while (currentIndex < files.length - 1) {
66 ++currentIndex;
67 try {
68 sequenceFile = SequenceFactory.getSequenceFile(files[currentIndex]);
69 }
70 catch (Exception e) {
71 e.printStackTrace();
72 return false;
73 }
74 if (sequenceFile.hasNext()) break;
75 }
76 return sequenceFile.hasNext();
77 }
78 }
79
80 public boolean isColorspace() {
81 return sequenceFile.isColorspace();
82 }
83
84 public String name() {
85 return groupFile.getName();
86 }
87
88 public Sequence next() throws SequenceFormatException {
89 return sequenceFile.next();
90 }
91
92 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Sequence;
20
21 public class SequenceFormatException extends Exception {
22
23 public SequenceFormatException (String message) {
24 super(message);
25 }
26
27 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Sequence;
20
21 public class SequenceFormatException extends Exception {
22
23 public SequenceFormatException (String message) {
24 super(message);
25 }
26
27 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Statistics;
20
21 public class NormalDistribution {
22
23 private double mean;
24 private double stdev;
25
26 public NormalDistribution (double mean, double stdev) {
27 // System.out.println("Made distribution with mean "+mean+" and variance "+stdev);
28 this.mean = mean;
29 this.stdev = stdev;
30 }
31
32 public double getZScoreForValue (double value) {
33 double lhs = 1d/(Math.sqrt(2*Math.PI*stdev*stdev));
34 double rhs = Math.pow(Math.E, 0 - (Math.pow(value-mean,2)/(2*stdev*stdev)));
35
36 return lhs*rhs;
37 }
38
39
40 // public static void main (String [] args) {
41 // NormalDistribution nd = new NormalDistribution(50, 5);
42 //
43 // for (int i=0;i<=100;i++) {
44 // System.out.println(i+"\t"+nd.getZScoreForValue(i));
45 // }
46 // }
47
48 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Statistics;
20
21 public class NormalDistribution {
22
23 private double mean;
24 private double stdev;
25
26 public NormalDistribution (double mean, double stdev) {
27 // System.out.println("Made distribution with mean "+mean+" and variance "+stdev);
28 this.mean = mean;
29 this.stdev = stdev;
30 }
31
32 public double getZScoreForValue (double value) {
33 double lhs = 1d/(Math.sqrt(2*Math.PI*stdev*stdev));
34 double rhs = Math.pow(Math.E, 0 - (Math.pow(value-mean,2)/(2*stdev*stdev)));
35
36 return lhs*rhs;
37 }
38
39
40 // public static void main (String [] args) {
41 // NormalDistribution nd = new NormalDistribution(50, 5);
42 //
43 // for (int i=0;i<=100;i++) {
44 // System.out.println(i+"\t"+nd.getZScoreForValue(i));
45 // }
46 // }
47
48 }
0 /**
1 * Copyright Copyright 2010-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Statistics;
20
21
22 /**
23 * A Class to calculate the Pearson Correlation.
24 */
25 public class PearsonCorrelation {
26
27
28 /**
29 * Calculate correlation.
30 *
31 * @param data1 the first dataset
32 * @param data2 the second dataset
33 * @return the Pearson r-value
34 * @throws SeqMonkException if the two datasets don't have the same number of points in them.
35 */
36 public static float calculateCorrelation (long [] data1, long [] data2) {
37
38 float [] d1 = new float[data1.length];
39 float [] d2 = new float[data2.length];
40 for (int i=0;i<data1.length;i++)d1[i] = data1[i];
41 for (int i=0;i<data2.length;i++)d2[i] = data2[i];
42
43 return calculateCorrelation(d1, d2);
44
45 }
46
47 /**
48 * Calculate correlation.
49 *
50 * @param data1 the first dataset
51 * @param data2 the second dataset
52 * @return the Pearson r-value
53 * @throws SeqMonkException if the two datasets don't have the same number of points in them.
54 */
55 public static float calculateCorrelation (long [] data1, long [] data2, int offset) {
56
57 float [] d1 = new float[data1.length-offset];
58 float [] d2 = new float[data2.length-offset];
59 for (int i=0;i<d1.length;i++)d1[i] = data1[i];
60 for (int i=0;i<d2.length;i++)d2[i] = data2[i+offset];
61
62 return calculateCorrelation(d1, d2);
63
64 }
65
66
67 /**
68 * Calculate correlation.
69 *
70 * @param data1 the first dataset
71 * @param data2 the second dataset
72 * @return the Pearson r-value
73 * @throws SeqMonkException if the two datasets don't have the same number of points in them.
74 */
75 public static float calculateCorrelation (float [] data1, float [] data2) {
76
77 if (data1.length != data2.length) {
78 throw new IllegalArgumentException("Data sets must be the same length when calculating correlation");
79 }
80
81 float sum12 = 0;
82 float sum1 = 0;
83 float sum2 = 0;
84 float sum1square = 0;
85 float sum2square =0;
86
87 for (int i=0;i<data1.length;i++) {
88 sum12 += data1[i]*data2[i];
89 sum1 += data1[i];
90 sum2 += data2[i];
91 sum1square += data1[i]*data1[i];
92 sum2square += data2[i]*data2[i];
93 }
94
95 float top = sum12 - ((sum1*sum2)/data1.length);
96 float bottomRight = sum2square - ((sum2*sum2)/data1.length);
97 float bottomLeft = sum1square - ((sum1*sum1)/data1.length);
98 float bottom = (float)Math.sqrt(bottomLeft * bottomRight);
99
100
101 return top/bottom;
102 }
103
104 }
0 /**
1 * Copyright Copyright 2010-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Statistics;
20
21
22 /**
23 * A Class to calculate the Pearson Correlation.
24 */
25 public class PearsonCorrelation {
26
27
28 /**
29 * Calculate correlation.
30 *
31 * @param data1 the first dataset
32 * @param data2 the second dataset
33 * @return the Pearson r-value
34 * @throws SeqMonkException if the two datasets don't have the same number of points in them.
35 */
36 public static float calculateCorrelation (long [] data1, long [] data2) {
37
38 float [] d1 = new float[data1.length];
39 float [] d2 = new float[data2.length];
40 for (int i=0;i<data1.length;i++)d1[i] = data1[i];
41 for (int i=0;i<data2.length;i++)d2[i] = data2[i];
42
43 return calculateCorrelation(d1, d2);
44
45 }
46
47 /**
48 * Calculate correlation.
49 *
50 * @param data1 the first dataset
51 * @param data2 the second dataset
52 * @return the Pearson r-value
53 * @throws SeqMonkException if the two datasets don't have the same number of points in them.
54 */
55 public static float calculateCorrelation (long [] data1, long [] data2, int offset) {
56
57 float [] d1 = new float[data1.length-offset];
58 float [] d2 = new float[data2.length-offset];
59 for (int i=0;i<d1.length;i++)d1[i] = data1[i];
60 for (int i=0;i<d2.length;i++)d2[i] = data2[i+offset];
61
62 return calculateCorrelation(d1, d2);
63
64 }
65
66
67 /**
68 * Calculate correlation.
69 *
70 * @param data1 the first dataset
71 * @param data2 the second dataset
72 * @return the Pearson r-value
73 * @throws SeqMonkException if the two datasets don't have the same number of points in them.
74 */
75 public static float calculateCorrelation (float [] data1, float [] data2) {
76
77 if (data1.length != data2.length) {
78 throw new IllegalArgumentException("Data sets must be the same length when calculating correlation");
79 }
80
81 float sum12 = 0;
82 float sum1 = 0;
83 float sum2 = 0;
84 float sum1square = 0;
85 float sum2square =0;
86
87 for (int i=0;i<data1.length;i++) {
88 sum12 += data1[i]*data2[i];
89 sum1 += data1[i];
90 sum2 += data2[i];
91 sum1square += data1[i]*data1[i];
92 sum2square += data2[i]*data2[i];
93 }
94
95 float top = sum12 - ((sum1*sum2)/data1.length);
96 float bottomRight = sum2square - ((sum2*sum2)/data1.length);
97 float bottomLeft = sum1square - ((sum1*sum1)/data1.length);
98 float bottom = (float)Math.sqrt(bottomLeft * bottomRight);
99
100
101 return top/bottom;
102 }
103
104 }
0 /**
1 * Copyright Copyright 2011-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19
20 package uk.ac.babraham.FastQC.Utilities;
21
22 import java.io.File;
23 import java.util.Hashtable;
24 import java.util.Vector;
25
26 public class CasavaBasename {
27
28 /**
29 * This method finds the core name from a CASAVA 1.8 fastq file. It strips off the
30 * part which indicates that this file is one of a set and returns the base name with
31 * this part removed.
32 *
33 * If the filename does not conform to standard CASAVA naming then a NameFormatException
34 * is thrown.
35 *
36 * @param originalName
37 * @return
38 * @throws NameFormatException
39 */
40
41 public static String getCasavaBasename (String originalName) throws NameFormatException {
42
43 // Find the base name. We need to remove the 123 numbers from
44 // files of the form:
45 //
46 // anyold_text_123.fastq.gz
47 //
48 // where the base name will be
49 //
50 // anyoldtext.fastq.gz
51
52 // The file must usually end with .fastq.gz, but you can tell cassava to not
53 // compress, in which case the .gz is missing.
54
55
56 if (originalName.endsWith(".fastq.gz")) {
57
58 // They must have an _ 13 chars before the end
59 if (originalName.substring(originalName.length()-13, originalName.length()-12).equals("_")) {
60
61 // They must have numbers for the 3 positions before .fastq
62 try {
63 Integer.parseInt(originalName.substring(originalName.length()-12, originalName.length()-9));
64
65 // If we get here then everything is OK to use the base name from this file
66 String baseName = originalName.substring(0,originalName.length()-13)+".fastq.gz";
67 return baseName;
68 }
69 catch (NumberFormatException nfe) {}
70 }
71 }
72
73
74 else if (originalName.endsWith(".fastq")) {
75
76 // They must have an _ 10 chars before the end
77 if (originalName.substring(originalName.length()-10, originalName.length()-9).equals("_")) {
78
79 // They must have numbers for the 3 positions before .fastq
80 try {
81 Integer.parseInt(originalName.substring(originalName.length()-9, originalName.length()-6));
82
83 // If we get here then everything is OK to use the base name from this file
84 String baseName = originalName.substring(0,originalName.length()-10)+".fastq";
85 return baseName;
86 }
87 catch (NumberFormatException nfe) {}
88 }
89 }
90
91
92 throw new NameFormatException();
93 }
94
95 public static File [][] getCasavaGroups (File [] files) {
96 Hashtable<String, Vector<File>> fileBases = new Hashtable<String, Vector<File>>();
97
98 for (int f=0;f<files.length;f++) {
99
100 // If a file forms part of a CASAVA group then put it into that
101 // group.
102 try {
103 String baseName = CasavaBasename.getCasavaBasename(files[f].getName());
104 if (! fileBases.containsKey(baseName)) {
105 fileBases.put(baseName,new Vector<File>());
106 }
107 fileBases.get(baseName).add(files[f]);
108
109 }
110
111 // If the file name doesn't appear to be part of a CASAVA group
112 // then add it as a singleton
113 catch (NameFormatException nfe) {
114
115 System.err.println("File '"+files[f].getName()+"' didn't look like part of a CASAVA group");
116 Vector<File> newVector = new Vector<File>();
117 newVector.add(files[f]);
118 fileBases.put(files[f].getName(), newVector);
119 }
120
121 }
122
123 String [] baseNames = fileBases.keySet().toArray(new String [0]);
124
125 File [][] fileGroups = new File[baseNames.length][];
126
127 for (int i=0;i<baseNames.length;i++) {
128 fileGroups[i] = fileBases.get(baseNames[i]).toArray(new File[0]);
129 }
130
131 return fileGroups;
132 }
133
134 }
0 /**
1 * Copyright Copyright 2011-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19
20 package uk.ac.babraham.FastQC.Utilities;
21
22 import java.io.File;
23 import java.util.Hashtable;
24 import java.util.Vector;
25
26 public class CasavaBasename {
27
28 /**
29 * This method finds the core name from a CASAVA 1.8 fastq file. It strips off the
30 * part which indicates that this file is one of a set and returns the base name with
31 * this part removed.
32 *
33 * If the filename does not conform to standard CASAVA naming then a NameFormatException
34 * is thrown.
35 *
36 * @param originalName
37 * @return
38 * @throws NameFormatException
39 */
40
41 public static String getCasavaBasename (String originalName) throws NameFormatException {
42
43 // Find the base name. We need to remove the 123 numbers from
44 // files of the form:
45 //
46 // anyold_text_123.fastq.gz
47 //
48 // where the base name will be
49 //
50 // anyoldtext.fastq.gz
51
52 // The file must usually end with .fastq.gz, but you can tell cassava to not
53 // compress, in which case the .gz is missing.
54
55
56 if (originalName.endsWith(".fastq.gz")) {
57
58 // They must have an _ 13 chars before the end
59 if (originalName.substring(originalName.length()-13, originalName.length()-12).equals("_")) {
60
61 // They must have numbers for the 3 positions before .fastq
62 try {
63 Integer.parseInt(originalName.substring(originalName.length()-12, originalName.length()-9));
64
65 // If we get here then everything is OK to use the base name from this file
66 String baseName = originalName.substring(0,originalName.length()-13)+".fastq.gz";
67 return baseName;
68 }
69 catch (NumberFormatException nfe) {}
70 }
71 }
72
73
74 else if (originalName.endsWith(".fastq")) {
75
76 // They must have an _ 10 chars before the end
77 if (originalName.substring(originalName.length()-10, originalName.length()-9).equals("_")) {
78
79 // They must have numbers for the 3 positions before .fastq
80 try {
81 Integer.parseInt(originalName.substring(originalName.length()-9, originalName.length()-6));
82
83 // If we get here then everything is OK to use the base name from this file
84 String baseName = originalName.substring(0,originalName.length()-10)+".fastq";
85 return baseName;
86 }
87 catch (NumberFormatException nfe) {}
88 }
89 }
90
91
92 throw new NameFormatException();
93 }
94
95 public static File [][] getCasavaGroups (File [] files) {
96 Hashtable<String, Vector<File>> fileBases = new Hashtable<String, Vector<File>>();
97
98 for (int f=0;f<files.length;f++) {
99
100 // If a file forms part of a CASAVA group then put it into that
101 // group.
102 try {
103 String baseName = CasavaBasename.getCasavaBasename(files[f].getName());
104 if (! fileBases.containsKey(baseName)) {
105 fileBases.put(baseName,new Vector<File>());
106 }
107 fileBases.get(baseName).add(files[f]);
108
109 }
110
111 // If the file name doesn't appear to be part of a CASAVA group
112 // then add it as a singleton
113 catch (NameFormatException nfe) {
114
115 System.err.println("File '"+files[f].getName()+"' didn't look like part of a CASAVA group");
116 Vector<File> newVector = new Vector<File>();
117 newVector.add(files[f]);
118 fileBases.put(files[f].getName(), newVector);
119 }
120
121 }
122
123 String [] baseNames = fileBases.keySet().toArray(new String [0]);
124
125 File [][] fileGroups = new File[baseNames.length][];
126
127 for (int i=0;i<baseNames.length;i++) {
128 fileGroups[i] = fileBases.get(baseNames[i]).toArray(new File[0]);
129 }
130
131 return fileGroups;
132 }
133
134 }
0 /**
1 * Copyright Copyright 2007-15 Simon Andrews
2 *
3 * This file is part of SeqMonk.
4 *
5 * SeqMonk is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * SeqMonk is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with SeqMonk; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Utilities;
20
21 import java.awt.Color;
22
23 /**
24 * Provides a range of colours which form a smooth gradient
25 * from Blue through Green to Red.
26 */
27 public class HotColdColourGradient {
28
29
30 // To save generating ridiculous numbers of colours and wasting
31 // memory we're going to make up a pool of 100 colours and pick
32 // the closest one from that set to return.
33
34 private final Color [] colors = makeColors();
35
36 public String toString() {
37 return "Hot Cold Colour Gradient";
38 }
39
40 /**
41 * A method initially called to create a palette of 100 pre-cached
42 * colours from which the closest match will be selected to
43 * return for future queries. Setting the colours this way
44 * saves on the overhead of generating a lot of new objects
45 *
46 * @return An array of colours crossing the full palette.
47 */
48 protected Color [] makeColors() {
49 /*
50 * We pre-generate a list of 100 colours we're going to
51 * use for this display.
52 *
53 * Because a linear gradient ends up leaving too much
54 * green in the spectrum we put this on a log scale
55 * to emphasise low and high values so the display
56 * is clearer.
57 */
58
59 Color [] colors = new Color[100];
60
61 // We base colors on the square root of their raw value
62
63 double min = 0 - Math.pow(50, 0.5);
64 double max = Math.pow(99-50,0.5);
65 for (int c=0;c<100;c++) {
66 int actualC = c-50;
67 if (actualC < 0) actualC = 0-actualC;
68 double corrected = Math.pow(actualC, 0.5);
69 if (c<50 && corrected > 0) corrected = 0-corrected;
70 RGB r = getRGB(corrected, min, max);
71 colors[c] = new Color(r.r,r.g,r.b);
72 }
73
74 return colors;
75 }
76
77 /**
78 * Gets a colour from the gradient
79 *
80 * @param value The value for which you want a colour
81 * @param min The minimum value in the gradient
82 * @param max The maximum value in the gradient
83 * @return A colour from the appropriate part of the gradient
84 */
85 public Color getColor (double value,double min, double max) {
86 if (colors[0] == null) makeColors();
87
88 int percentage = (int)((100 * (value-min)) / (max-min));
89
90 if (percentage > 100) percentage = 100;
91 if (percentage < 1) percentage = 1;
92
93 return colors[percentage-1];
94 }
95
96 /**
97 * Gets an RGB object for separate colour values
98 *
99 * @param value The value stored
100 * @param min The minimum value in the gradient
101 * @param max The maximum value in the gradient
102 * @return An RGB object representing the colour
103 */
104 private RGB getRGB (double value, double min, double max) {
105
106 int red;
107 int green;
108 int blue;
109
110 double diff = max - min;
111
112 // Red
113 // Red is 0 for the first 50%, scales from 0-200 over 50-75%
114 // and stays at 200 from 75-100%
115
116 // Green
117 // Green scales from 0-200 over the first 25%, stays at
118 // 200 from 25-75% and then scales from 200-0 from 75-100%
119
120 // Blue
121 // Blue starts at 200 until 25%, then scales from 200-0
122 // from 25-50%, then stays at 0 until 100%
123
124 // Since all transitions happen in quarters of the spectrum
125 // range it's easiest to deal with colour values in those
126 // ranges
127
128
129 if (value < (min+(diff*0.25))) {
130 red = 0;
131 blue = 200;
132 green = (int)(200 * ((value-min) / (diff*0.25)));
133
134 }
135 else if (value < (min+(diff*0.5))) {
136 red = 0;
137 green = 200;
138 blue = (int)(200 - (200 * ((value-(min+(diff*0.25))) / (diff*0.25))));
139 }
140 else if (value < (min+(diff*0.75))) {
141 green = 200;
142 blue = 0;
143 red = (int)(200 * ((value-(min+(diff*0.5))) / (diff*0.25)));
144 }
145 else {
146 red = 200;
147 blue = 0;
148 green = (int)(200 - (200 * ((value-(min+(diff*0.75))) / (diff*0.25))));
149 }
150
151 return new RGB(red,green,blue);
152 }
153
154 }
0 /**
1 * Copyright Copyright 2007-15 Simon Andrews
2 *
3 * This file is part of SeqMonk.
4 *
5 * SeqMonk is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * SeqMonk is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with SeqMonk; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Utilities;
20
21 import java.awt.Color;
22
23 /**
24 * Provides a range of colours which form a smooth gradient
25 * from Blue through Green to Red.
26 */
27 public class HotColdColourGradient {
28
29
30 // To save generating ridiculous numbers of colours and wasting
31 // memory we're going to make up a pool of 100 colours and pick
32 // the closest one from that set to return.
33
34 private final Color [] colors = makeColors();
35
36 public String toString() {
37 return "Hot Cold Colour Gradient";
38 }
39
40 /**
41 * A method initially called to create a palette of 100 pre-cached
42 * colours from which the closest match will be selected to
43 * return for future queries. Setting the colours this way
44 * saves on the overhead of generating a lot of new objects
45 *
46 * @return An array of colours crossing the full palette.
47 */
48 protected Color [] makeColors() {
49 /*
50 * We pre-generate a list of 100 colours we're going to
51 * use for this display.
52 *
53 * Because a linear gradient ends up leaving too much
54 * green in the spectrum we put this on a log scale
55 * to emphasise low and high values so the display
56 * is clearer.
57 */
58
59 Color [] colors = new Color[100];
60
61 // We base colors on the square root of their raw value
62
63 double min = 0 - Math.pow(50, 0.5);
64 double max = Math.pow(99-50,0.5);
65 for (int c=0;c<100;c++) {
66 int actualC = c-50;
67 if (actualC < 0) actualC = 0-actualC;
68 double corrected = Math.pow(actualC, 0.5);
69 if (c<50 && corrected > 0) corrected = 0-corrected;
70 RGB r = getRGB(corrected, min, max);
71 colors[c] = new Color(r.r,r.g,r.b);
72 }
73
74 return colors;
75 }
76
77 /**
78 * Gets a colour from the gradient
79 *
80 * @param value The value for which you want a colour
81 * @param min The minimum value in the gradient
82 * @param max The maximum value in the gradient
83 * @return A colour from the appropriate part of the gradient
84 */
85 public Color getColor (double value,double min, double max) {
86 if (colors[0] == null) makeColors();
87
88 int percentage = (int)((100 * (value-min)) / (max-min));
89
90 if (percentage > 100) percentage = 100;
91 if (percentage < 1) percentage = 1;
92
93 return colors[percentage-1];
94 }
95
96 /**
97 * Gets an RGB object for separate colour values
98 *
99 * @param value The value stored
100 * @param min The minimum value in the gradient
101 * @param max The maximum value in the gradient
102 * @return An RGB object representing the colour
103 */
104 private RGB getRGB (double value, double min, double max) {
105
106 int red;
107 int green;
108 int blue;
109
110 double diff = max - min;
111
112 // Red
113 // Red is 0 for the first 50%, scales from 0-200 over 50-75%
114 // and stays at 200 from 75-100%
115
116 // Green
117 // Green scales from 0-200 over the first 25%, stays at
118 // 200 from 25-75% and then scales from 200-0 from 75-100%
119
120 // Blue
121 // Blue starts at 200 until 25%, then scales from 200-0
122 // from 25-50%, then stays at 0 until 100%
123
124 // Since all transitions happen in quarters of the spectrum
125 // range it's easiest to deal with colour values in those
126 // ranges
127
128
129 if (value < (min+(diff*0.25))) {
130 red = 0;
131 blue = 200;
132 green = (int)(200 * ((value-min) / (diff*0.25)));
133
134 }
135 else if (value < (min+(diff*0.5))) {
136 red = 0;
137 green = 200;
138 blue = (int)(200 - (200 * ((value-(min+(diff*0.25))) / (diff*0.25))));
139 }
140 else if (value < (min+(diff*0.75))) {
141 green = 200;
142 blue = 0;
143 red = (int)(200 * ((value-(min+(diff*0.5))) / (diff*0.25)));
144 }
145 else {
146 red = 200;
147 blue = 0;
148 green = (int)(200 - (200 * ((value-(min+(diff*0.75))) / (diff*0.25))));
149 }
150
151 return new RGB(red,green,blue);
152 }
153
154 }
00 /**
1 * Copyright Copyright 2014-15 Simon Andrews
1 * Copyright Copyright 2014-17 Simon Andrews
22 *
33 * This file is part of SeqMonk.
44 *
0 package uk.ac.babraham.FastQC.Utilities;
1
2 /**
3 * This class is a work round for a bug in the core java gzip decompression
4 * code which can't handle multiple compressed blocks being concatenated within
5 * the same file. It incorrectly reports that it has hit EOF when it gets to
6 * the end of the first compressed block.
7 *
8 * This work round was posted in the bug report at:
9 *
10 * http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4691425
11 *
12 * Unfortunately the Sun bug database doesn't show who posted it so I can't
13 * provide an attribution. They didn't specify under what licence this code
14 * was being provided, but I'm going to assume that since it was posted as
15 * a work round for a reported bug that it's open enough to use here.
16 */
17
18
19 import java.io.InputStream;
20 import java.io.PushbackInputStream;
21 import java.io.IOException;
22 import java.util.zip.GZIPInputStream;
23
24 public class MultiMemberGZIPInputStream extends GZIPInputStream {
25
26 public MultiMemberGZIPInputStream(InputStream in, int size) throws IOException
27 {
28 // Wrap the stream in a PushbackInputStream...
29 super(new PushbackInputStream(in, size), size);
30 this.size=size;
31 }
32
33 public MultiMemberGZIPInputStream(InputStream in) throws IOException
34 {
35 // Wrap the stream in a PushbackInputStream...
36 super(new PushbackInputStream(in, 1024));
37 this.size=-1;
38 }
39
40 private MultiMemberGZIPInputStream(MultiMemberGZIPInputStream parent) throws IOException
41 {
42 super(parent.in);
43 this.size=-1;
44 this.parent=parent.parent==null ? parent : parent.parent;
45 this.parent.child=this;
46 }
47
48 private MultiMemberGZIPInputStream(MultiMemberGZIPInputStream parent, int size) throws IOException
49 {
50 super(parent.in, size);
51 this.size=size;
52 this.parent=parent.parent==null ? parent : parent.parent;
53 this.parent.child=this;
54 }
55
56 private MultiMemberGZIPInputStream parent;
57 private MultiMemberGZIPInputStream child;
58 private int size;
59 private boolean eos;
60
61 public int read(byte[] inputBuffer, int inputBufferOffset, int inputBufferLen) throws IOException {
62
63 if (eos) { return -1;}
64 if (this.child!=null)
65 return this.child.read(inputBuffer, inputBufferOffset, inputBufferLen);
66
67 int charsRead=super.read(inputBuffer, inputBufferOffset, inputBufferLen);
68 if (charsRead==-1)
69 {
70 // Push any remaining buffered data back onto the stream
71 // If the stream is then not empty, use it to construct
72 // a new instance of this class and delegate this and any
73 // future calls to it...
74 int n = inf.getRemaining() - 8;
75 if (n > 0)
76 {
77 // More than 8 bytes remaining in deflater
78 // First 8 are gzip trailer. Add the rest to
79 // any un-read data...
80 ((PushbackInputStream)this.in).unread(buf, len-n, n);
81 }
82 else
83 {
84 // Nothing in the buffer. We need to know whether or not
85 // there is unread data available in the underlying stream
86 // since the base class will not handle an empty file.
87 // Read a byte to see if there is data and if so,
88 // push it back onto the stream...
89 byte[] b=new byte[1];
90 int ret=in.read(b,0,1);
91 if (ret==-1)
92 {
93 eos=true;
94 return -1;
95 }
96 else
97 ((PushbackInputStream)this.in).unread(b, 0, 1);
98 }
99
100 MultiMemberGZIPInputStream child;
101 if (this.size==-1)
102 child=new MultiMemberGZIPInputStream(this);
103 else
104 child=new MultiMemberGZIPInputStream(this, this.size);
105 return child.read(inputBuffer, inputBufferOffset, inputBufferLen);
106 }
107 else
108 return charsRead;
109 }
110
0 package uk.ac.babraham.FastQC.Utilities;
1
2 /**
3 * This class is a work round for a bug in the core java gzip decompression
4 * code which can't handle multiple compressed blocks being concatenated within
5 * the same file. It incorrectly reports that it has hit EOF when it gets to
6 * the end of the first compressed block.
7 *
8 * This work round was posted in the bug report at:
9 *
10 * http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4691425
11 *
12 * Unfortunately the Sun bug database doesn't show who posted it so I can't
13 * provide an attribution. They didn't specify under what licence this code
14 * was being provided, but I'm going to assume that since it was posted as
15 * a work round for a reported bug that it's open enough to use here.
16 */
17
18
19 import java.io.InputStream;
20 import java.io.PushbackInputStream;
21 import java.io.IOException;
22 import java.util.zip.GZIPInputStream;
23
24 public class MultiMemberGZIPInputStream extends GZIPInputStream {
25
26 public MultiMemberGZIPInputStream(InputStream in, int size) throws IOException
27 {
28 // Wrap the stream in a PushbackInputStream...
29 super(new PushbackInputStream(in, size), size);
30 this.size=size;
31 }
32
33 public MultiMemberGZIPInputStream(InputStream in) throws IOException
34 {
35 // Wrap the stream in a PushbackInputStream...
36 super(new PushbackInputStream(in, 1024));
37 this.size=-1;
38 }
39
40 private MultiMemberGZIPInputStream(MultiMemberGZIPInputStream parent) throws IOException
41 {
42 super(parent.in);
43 this.size=-1;
44 this.parent=parent.parent==null ? parent : parent.parent;
45 this.parent.child=this;
46 }
47
48 private MultiMemberGZIPInputStream(MultiMemberGZIPInputStream parent, int size) throws IOException
49 {
50 super(parent.in, size);
51 this.size=size;
52 this.parent=parent.parent==null ? parent : parent.parent;
53 this.parent.child=this;
54 }
55
56 private MultiMemberGZIPInputStream parent;
57 private MultiMemberGZIPInputStream child;
58 private int size;
59 private boolean eos;
60
61 public int read(byte[] inputBuffer, int inputBufferOffset, int inputBufferLen) throws IOException {
62
63 if (eos) { return -1;}
64 if (this.child!=null)
65 return this.child.read(inputBuffer, inputBufferOffset, inputBufferLen);
66
67 int charsRead=super.read(inputBuffer, inputBufferOffset, inputBufferLen);
68 if (charsRead==-1)
69 {
70 // Push any remaining buffered data back onto the stream
71 // If the stream is then not empty, use it to construct
72 // a new instance of this class and delegate this and any
73 // future calls to it...
74 int n = inf.getRemaining() - 8;
75 if (n > 0)
76 {
77 // More than 8 bytes remaining in deflater
78 // First 8 are gzip trailer. Add the rest to
79 // any un-read data...
80 ((PushbackInputStream)this.in).unread(buf, len-n, n);
81 }
82 else
83 {
84 // Nothing in the buffer. We need to know whether or not
85 // there is unread data available in the underlying stream
86 // since the base class will not handle an empty file.
87 // Read a byte to see if there is data and if so,
88 // push it back onto the stream...
89 byte[] b=new byte[1];
90 int ret=in.read(b,0,1);
91 if (ret==-1)
92 {
93 eos=true;
94 return -1;
95 }
96 else
97 ((PushbackInputStream)this.in).unread(b, 0, 1);
98 }
99
100 MultiMemberGZIPInputStream child;
101 if (this.size==-1)
102 child=new MultiMemberGZIPInputStream(this);
103 else
104 child=new MultiMemberGZIPInputStream(this, this.size);
105 return child.read(inputBuffer, inputBufferOffset, inputBufferLen);
106 }
107 else
108 return charsRead;
109 }
110
111111 }
0 /**
1 * Copyright Copyright 2011-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19
20 package uk.ac.babraham.FastQC.Utilities;
21
22 @SuppressWarnings("serial")
23 public class NameFormatException extends Exception {
24
25 }
0 /**
1 * Copyright Copyright 2011-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19
20 package uk.ac.babraham.FastQC.Utilities;
21
22 @SuppressWarnings("serial")
23 public class NameFormatException extends Exception {
24
25 }
0 /**
1 * Copyright Copyright 2011-15 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19
20 package uk.ac.babraham.FastQC.Utilities;
21
22 import java.io.File;
23 import java.util.Hashtable;
24 import java.util.Vector;
25
26 public class NanoporeBasename {
27
28 /**
29 * This method finds the core name from a CASAVA 1.8 fastq file. It strips off the
30 * part which indicates that this file is one of a set and returns the base name with
31 * this part removed.
32 *
33 * If the filename does not conform to standard CASAVA naming then a NameFormatException
34 * is thrown.
35 *
36 * @param originalName
37 * @return
38 * @throws NameFormatException
39 */
40
41 public static String getNanoporeBasename (String originalName) throws NameFormatException {
42
43 // Files from nanopores look like: Computer_Samplename_number_chXXX_fileXXX_strand.fast5
44 // We need to reduce this to Computer_Samplename_number
45
46 String basename = originalName.replaceAll("_ch\\d+_file\\d+_strand.fast5$", "");
47
48 if (basename.equals(originalName)) {
49 throw new NameFormatException();
50 }
51
52 return basename;
53
54 }
55
56 public static File [][] getNanoporeGroups (File [] files) {
57 Hashtable<String, Vector<File>> fileBases = new Hashtable<String, Vector<File>>();
58
59 for (int f=0;f<files.length;f++) {
60
61
62 if (files[f].getName().contains("muxscan")) continue; // Control files not containing real data.
63
64 // If a file forms part of a nanopore group then put it into that
65 // group.
66 try {
67 String baseName = NanoporeBasename.getNanoporeBasename(files[f].getName());
68 if (! fileBases.containsKey(baseName)) {
69 fileBases.put(baseName,new Vector<File>());
70 }
71 fileBases.get(baseName).add(files[f]);
72
73 }
74
75 // If the file name doesn't appear to be part of a nanopore group
76 // then add it as a singleton
77 catch (NameFormatException nfe) {
78
79 System.err.println("File '"+files[f].getName()+"' didn't look like part of a CASAVA group");
80 Vector<File> newVector = new Vector<File>();
81 newVector.add(files[f]);
82 fileBases.put(files[f].getName(), newVector);
83 }
84
85 }
86
87 String [] baseNames = fileBases.keySet().toArray(new String [0]);
88
89 File [][] fileGroups = new File[baseNames.length][];
90
91 for (int i=0;i<baseNames.length;i++) {
92 fileGroups[i] = fileBases.get(baseNames[i]).toArray(new File[0]);
93 }
94
95 return fileGroups;
96 }
97
98 }
0 /**
1 * Copyright Copyright 2011-17 Simon Andrews
2 *
3 * This file is part of FastQC.
4 *
5 * FastQC is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * FastQC is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with FastQC; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19
20 package uk.ac.babraham.FastQC.Utilities;
21
22 import java.io.File;
23 import java.util.Hashtable;
24 import java.util.Vector;
25
26 public class NanoporeBasename {
27
28 /**
29 * This method finds the core name from an ONT fast5 file. It strips off the
30 * part which indicates that this file is one of a set and returns the base name with
31 * this part removed.
32 *
33 * If the filename does not conform to standard CASAVA naming then a NameFormatException
34 * is thrown.
35 *
36 * @param originalName
37 * @return
38 * @throws NameFormatException
39 */
40
41 public static String getNanoporeBasename (String originalName) throws NameFormatException {
42
43 // Files from nanopores look like: Computer_Samplename_number_chXXX_fileXXX_strand.fast5
44 // We need to reduce this to Computer_Samplename_number
45
46 String [] subNames = originalName.split("_");
47
48 if (subNames.length < 5) {
49 throw new NameFormatException();
50 }
51
52 String basename = subNames[0]+"_"+subNames[1]+"_"+subNames[2];
53
54 System.err.println("Basename is "+basename);
55
56 return basename;
57
58 }
59
60 public static File [][] getNanoporeGroups (File [] files) {
61 Hashtable<String, Vector<File>> fileBases = new Hashtable<String, Vector<File>>();
62
63 for (int f=0;f<files.length;f++) {
64
65
66 if (files[f].getName().contains("muxscan")) continue; // Control files not containing real data.
67
68 // If a file forms part of a nanopore group then put it into that
69 // group.
70 try {
71 String baseName = NanoporeBasename.getNanoporeBasename(files[f].getName());
72 if (! fileBases.containsKey(baseName)) {
73 fileBases.put(baseName,new Vector<File>());
74 }
75 fileBases.get(baseName).add(files[f]);
76
77 }
78
79 // If the file name doesn't appear to be part of a nanopore group
80 // then add it as a singleton
81 catch (NameFormatException nfe) {
82
83 System.err.println("File '"+files[f].getName()+"' didn't look like part of a CASAVA group");
84 Vector<File> newVector = new Vector<File>();
85 newVector.add(files[f]);
86 fileBases.put(files[f].getName(), newVector);
87 }
88
89 }
90
91 String [] baseNames = fileBases.keySet().toArray(new String [0]);
92
93 File [][] fileGroups = new File[baseNames.length][];
94
95 for (int i=0;i<baseNames.length;i++) {
96 fileGroups[i] = fileBases.get(baseNames[i]).toArray(new File[0]);
97 }
98
99 return fileGroups;
100 }
101
102 }
0 /**
1 * Copyright Copyright 2013-15 Simon Andrews
2 *
3 * This file is part of SeqMonk.
4 *
5 * SeqMonk is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * SeqMonk is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with SeqMonk; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19
20 package uk.ac.babraham.FastQC.Utilities;
21
22 public class QualityCount {
23
24 /*
25 * So I'm on my third go at writing this. I've now tried an all
26 * primitive version of this class so that we don't have to do
27 * hash lookps which require a conversion from chr to Character.
28 * We should also be safe with 150 slots which will give us up to
29 * Phred 86 with a 64 offset, which should be plenty.
30 */
31
32 private long [] actualCounts = new long[150];
33
34 private long totalCounts = 0;
35
36 public void addValue(char c) {
37 totalCounts++;
38 actualCounts[(int)c]++;
39 }
40
41 public long getTotalCount () {
42 return totalCounts;
43 }
44
45 public char getMinChar () {
46
47 for (int i=0;i<actualCounts.length;i++) {
48 if (actualCounts[i]>0) return (char)i;
49 }
50
51 return (char)1000;
52 }
53
54 public char getMaxChar () {
55 for (int i=actualCounts.length-1;i>=0;i--) {
56 if (actualCounts[i]>0) return (char)i;
57 }
58
59 return (char)1000;
60
61 }
62
63 public double getMean (int offset) {
64 long total = 0;
65 long count = 0;
66
67 for (int i=offset;i<actualCounts.length;i++) {
68 total += actualCounts[i]*(i-offset);
69 count += actualCounts[i];
70 }
71
72 return ((double)total)/count;
73 }
74
75 public double getPercentile (int offset, int percentile) {
76
77 long total = totalCounts;
78
79 total *= percentile;
80 total /= 100;
81
82 long count = 0;
83 for (int i=offset;i<actualCounts.length;i++) {
84 count += actualCounts[i];
85 if (count >=total) {
86 return((char)(i-offset));
87 }
88 }
89
90 return -1;
91
92 }
93
94 }
0 /**
1 * Copyright Copyright 2013-17 Simon Andrews
2 *
3 * This file is part of SeqMonk.
4 *
5 * SeqMonk is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * SeqMonk is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with SeqMonk; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19
20 package uk.ac.babraham.FastQC.Utilities;
21
22 public class QualityCount {
23
24 /*
25 * So I'm on my third go at writing this. I've now tried an all
26 * primitive version of this class so that we don't have to do
27 * hash lookps which require a conversion from chr to Character.
28 * We should also be safe with 150 slots which will give us up to
29 * Phred 86 with a 64 offset, which should be plenty.
30 */
31
32 private long [] actualCounts = new long[150];
33
34 private long totalCounts = 0;
35
36 public void addValue(char c) {
37 totalCounts++;
38 actualCounts[(int)c]++;
39 }
40
41 public long getTotalCount () {
42 return totalCounts;
43 }
44
45 public char getMinChar () {
46
47 for (int i=0;i<actualCounts.length;i++) {
48 if (actualCounts[i]>0) return (char)i;
49 }
50
51 return (char)1000;
52 }
53
54 public char getMaxChar () {
55 for (int i=actualCounts.length-1;i>=0;i--) {
56 if (actualCounts[i]>0) return (char)i;
57 }
58
59 return (char)1000;
60
61 }
62
63 public double getMean (int offset) {
64 long total = 0;
65 long count = 0;
66
67 for (int i=offset;i<actualCounts.length;i++) {
68 total += actualCounts[i]*(i-offset);
69 count += actualCounts[i];
70 }
71
72 return ((double)total)/count;
73 }
74
75 public double getPercentile (int offset, int percentile) {
76
77 long total = totalCounts;
78
79 total *= percentile;
80 total /= 100;
81
82 long count = 0;
83 for (int i=offset;i<actualCounts.length;i++) {
84 count += actualCounts[i];
85 if (count >=total) {
86 return((char)(i-offset));
87 }
88 }
89
90 return -1;
91
92 }
93
94 }
0 /**
1 * Copyright Copyright 2007-15 Simon Andrews
2 *
3 * This file is part of SeqMonk.
4 *
5 * SeqMonk is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * SeqMonk is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with SeqMonk; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Utilities;
20
21 /**
22 * A simple class to represent separate values for Red Green and Blue
23 * components of a colour.
24 */
25 public class RGB {
26
27 public int r;
28 public int g;
29 public int b;
30
31 /**
32 * Instantiates a new RGB colour.
33 *
34 * @param r RED
35 * @param g GREEN
36 * @param b BLUE
37 */
38 public RGB (int r, int g, int b) {
39 this.r = r;
40 this.g = g;
41 this.b = b;
42 }
43 }
0 /**
1 * Copyright Copyright 2007-15 Simon Andrews
2 *
3 * This file is part of SeqMonk.
4 *
5 * SeqMonk is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * SeqMonk is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with SeqMonk; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 package uk.ac.babraham.FastQC.Utilities;
20
21 /**
22 * A simple class to represent separate values for Red Green and Blue
23 * components of a colour.
24 */
25 public class RGB {
26
27 public int r;
28 public int g;
29 public int b;
30
31 /**
32 * Instantiates a new RGB colour.
33 *
34 * @param r RED
35 * @param g GREEN
36 * @param b BLUE
37 */
38 public RGB (int r, int g, int b) {
39 this.r = r;
40 this.g = g;
41 this.b = b;
42 }
43 }