Codebase list tigr-glimmer / debian/3.02-4 sample-run / g3-from-training.csh
debian/3.02-4

Tree @debian/3.02-4 (Download .tar.gz)

g3-from-training.csh @debian/3.02-4raw · history · blame

#!/bin/csh

if ($#argv < 3) then
  echo "Usage:  g3-from-training.csh  <genome> <coords> <tag>"
  echo "           [step<i>  [only]]"
  echo ""
  echo "Run Glimmer3 on the sequence in file <genome> using the genes"
  echo "in file <coords> to extract a training set.  Use <tag> to prefix"
  echo "output files, which are:"
  echo "<tag>.train is the multifasta file of training sequences"
  echo "<tag>.icm is the model"
  echo "<tag>.upstream are the regions before the starts in <coords>"
  echo "<tag>.motif is a PWM of the upstream regions"
  echo "<tag>.detail is Glimmer3 output detail"
  echo "<tag>.predict is Glimmer3 predictions"
  echo ""
  echo "If the 6th argument is filled in, then jump to that step"
  echo "If the 7th argument is also set to 'only', then do only that step"

  exit -1;
endif


set genome = $1
set coords = $2
set tag = $3
set step = $4
set onestep = $5

set awkpath = /fs/szgenefinding/Glimmer3/scripts
set glimmerpath = /fs/szgenefinding/Glimmer3/bin
set elphbin = /nfshomes/adelcher/bin/elph

# add/change glimmer options here
set glimmeropts = "-o50 -g110 -t30"

set numsteps = 5

if  ($step != "")  goto $step
    

step1:
# Extract the training sequences from the genome file
echo "Step 1 of ${numsteps}:  Extracting training sequences"
$glimmerpath/extract -t $genome $coords > $tag.train
if  ($status != 0)  then
  echo "Failed to extract training sequences"
  exit
endif
if  ($onestep == "only")  exit


step2:
# Build the icm from the training sequences
echo "Step 2 of ${numsteps}:  Building ICM"
$glimmerpath/build-icm -r $tag.icm < $tag.train
if  ($status != 0)  then
  echo "Failed to build ICM"
  exit
endif
if  ($onestep == "only")  exit


step3:
# Create a position weight matrix (PWM) from the regions
# upstream of the start locations in $coords
echo "Step 3 of ${numsteps}:  Making PWM from upstream regions"
$awkpath/upstream-coords.awk 25 0 $coords \
   | $glimmerpath/extract $genome - > $tag.upstream
$elphbin $tag.upstream LEN=6 | $awkpath/get-motif-counts.awk > $tag.motif
if  ($status != 0)  then
  echo "Failed to create PWM"
  exit
endif
if  ($onestep == "only")  exit


step4:
# Determine the distribution of start-codon usage in $coords
echo "Step 4 of ${numsteps}:  Getting start-codon usage"
set startuse = `$glimmerpath/start-codon-distrib -3 $genome $coords`
if  ($onestep == "only")  exit


step5:
# Run Glimmer
echo "Step 5 of ${numsteps}:  Running Glimmer3"
$glimmerpath/glimmer3 $glimmeropts -b $tag.motif -P $startuse $genome $tag.icm $tag
if  ($status != 0)  then
  echo "Failed to run Glimmer3"
  exit
endif
if  ($onestep == "only")  exit