Codebase list mash / 20e4a69
dist: option to show seq comment field Brian Ondov 5 years ago
3 changed file(s) with 30 addition(s) and 5 deletion(s). Raw diff Collapse all Expand all
3636 //addOption("log", Option(Option::Boolean, "L", "Output", "Log scale distances and divide by k-mer size to provide a better analog to phylogenetic distance. The special case of zero shared min-hashes will result in a distance of 1.", ""));
3737 addOption("pvalue", Option(Option::Number, "v", "Output", "Maximum p-value to report.", "1.0", 0., 1.));
3838 addOption("distance", Option(Option::Number, "d", "Output", "Maximum distance to report.", "1.0", 0., 1.));
39 addOption("comment", Option(Option::Boolean, "C", "Output", "Show comment fields with reference/query names (denoted with ':').", "1.0", 0., 1.));
3940 useSketchOptions();
4041 }
4142
5051 int threads = options.at("threads").getArgumentAsNumber();
5152 bool list = options.at("list").active;
5253 bool table = options.at("table").active;
54 bool comment = options.at("comment").active;
5355 //bool log = options.at("log").active;
5456 double pValueMax = options.at("pvalue").getArgumentAsNumber();
5557 double distanceMax = options.at("distance").getArgumentAsNumber();
224226
225227 while ( threadPool.outputAvailable() )
226228 {
227 writeOutput(threadPool.popOutputWhenAvailable(), table);
229 writeOutput(threadPool.popOutputWhenAvailable(), table, comment);
228230 }
229231 }
230232
231233 while ( threadPool.running() )
232234 {
233 writeOutput(threadPool.popOutputWhenAvailable(), table);
235 writeOutput(threadPool.popOutputWhenAvailable(), table, comment);
234236 }
235237
236238 if ( warningCount > 0 && ! parameters.reads )
241243 return 0;
242244 }
243245
244 void CommandDistance::writeOutput(CompareOutput * output, bool table) const
246 void CommandDistance::writeOutput(CompareOutput * output, bool table, bool comment) const
245247 {
246248 uint64_t i = output->indexQuery;
247249 uint64_t j = output->indexRef;
266268 }
267269 else if ( pair->pass )
268270 {
269 cout << output->sketchRef.getReference(j).name << '\t' << output->sketchQuery.getReference(i).name << '\t' << pair->distance << '\t' << pair->pValue << '\t' << pair->numer << '/' << pair->denom << endl;
271 cout << output->sketchRef.getReference(j).name;
272
273 if ( comment )
274 {
275 cout << ':' << output->sketchRef.getReference(j).comment;
276 }
277
278 cout << '\t' << output->sketchQuery.getReference(i).name;
279
280 if ( comment )
281 {
282 cout << ':' << output->sketchQuery.getReference(i).comment;
283 }
284
285 cout << '\t' << pair->distance << '\t' << pair->pValue << '\t' << pair->numer << '/' << pair->denom << endl;
270286 }
271287
272288 j++;
8484
8585 private:
8686
87 void writeOutput(CompareOutput * output, bool table) const;
87 void writeOutput(CompareOutput * output, bool table, bool comment) const;
8888 };
8989
9090 CommandDistance::CompareOutput * compare(CommandDistance::CompareInput * input);
1818
1919 namespace mash {
2020
21 struct HashTableEntry
22 {
23 HashTableEntry() : count(0) {}
24
25 uint32_t count;
26 std::unordered_set<uint64_t> indices;
27 };
28
29 //typedef std::unordered_map< uint64_t, HashTableEntry > HashTable;
2130 typedef std::unordered_map< uint64_t, std::unordered_set<uint64_t> > HashTable;
2231
2332 static const std::unordered_map< std::string, char > codons =