Codebase list seer / debian/latest scripts / reformat_output.pl
debian/latest

Tree @debian/latest (Download .tar.gz)

reformat_output.pl @debian/latestraw · history · blame

#!/usr/bin/perl -w

use strict;
use warnings;

use Getopt::Long;

my $usage_message = <<USAGE;
Usage: ./hits_to_matrix.pl -k <filtered_kmers.txt> -p <pheno_file>

Reformats output for easier reading as tsv. Prints to stdout

   Options

   Required
   -k, --kmers      Filtered significant kmers, as output by seer + filter_seer
   -p, --pheno       Phenotype file used by seer

   Optional
   -h, --help       Displays this message

USAGE

#****************************************************************************************#
#* Main                                                                                 *#
#****************************************************************************************#

#* gets input parameters
my ($kmer_file, $pheno, $help);
GetOptions ("kmers|k=s"  => \$kmer_file,
            "pheno|p=s" => \$pheno,
            "help|h"     => \$help
		   ) or die($usage_message);

if (defined($help) || !defined($kmer_file) || !defined($pheno))
{
   print $usage_message;
}
else
{
   # Read possible samples
   open(PHENO, $pheno) || die("Could not open pheno file $pheno\n");

   my @samples;
   while (my $pheno_line = <PHENO>)
   {
      chomp $pheno_line;

      my ($sample, $junk, $pheno) = split("\t", $pheno_line);

      push(@samples, $sample);
   }

   # Keep everything sorted (as depends on kmer counting program used)
   @samples = sort @samples;
   close PHENO;

   # Print header row to stdout
   open(KMERS, $kmer_file) || die("Could not open kmer file $kmer_file\n");
   print join("\t", "sequence", "maf", "unadj_p_val", "p_val", "beta", "se", "comments", @samples) . "\n";

   while (my $kmer_line = <KMERS>)
   {
      chomp $kmer_line;

      my ($sequence, $maf, $unadj, $adj, $beta, $se, $comment, @kmer_samples) = split("\t", $kmer_line);
      @kmer_samples = sort @kmer_samples;

      print join("\t", $sequence, $maf, $unadj, $adj, $beta, $se, $comment) . "\t";

      my $next_sample = shift(@kmer_samples);
      foreach my $sample (@samples)
      {
         print "\t";
         if ($sample eq $next_sample)
         {
            print $sample;
            if (scalar(@kmer_samples) != 0)
            {
               $next_sample = shift(@kmer_samples);
            }
         }
      }
      print "\n";

   }
   close KMERS;
}